In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset, random_split
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
class MLP(nn.Module):
    def __init__(self, hidden_size, num_layers, input_shape = 310, output_shape = 20):
        super().__init__()
        self.input = nn.Linear(input_shape, hidden_size)
        self.layers = nn.ModuleList([
            nn.Linear(hidden_size, hidden_size) for _ in range(num_layers)
        ])
        self.output = nn.Linear(hidden_size, output_shape)

    def forward(self, x):
        x = nn.functional.leaky_relu(self.input(x))
        for layer in self.layers:
            x = nn.functional.leaky_relu(layer(x))
        return self.output(x)

In [4]:
class CSVDataset(Dataset):
    def __init__(self, file_path):
        self.data = pd.read_csv(file_path)
        self.X = self.data.iloc[:, :-1].values
        self.y = self.data.iloc[:, -1].values

        # Standardize features
        self.scaler = StandardScaler()
        self.X = self.scaler.fit_transform(self.X)

        # Encode target labels
        self.label_encoder = LabelEncoder()
        self.y = self.label_encoder.fit_transform(self.y)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return torch.tensor(self.X[idx], dtype=torch.float32), torch.tensor(self.y[idx], dtype=torch.long)


In [5]:
# Load dataset
dataset = CSVDataset('../data/raw/test.csv')

# Split dataset into training and testing
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=4096, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=4096, shuffle=False)


In [6]:
# Initialize the model
input_size = 310
hidden_size = 1024
output_size = 20  # Number of classes
model = MLP(hidden_size=hidden_size, num_layers=10).to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=0.001)

# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    for inputs, targets in train_loader:
        # Forward pass
        inputs = inputs.to(device)
        targets = targets.to(device)
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        
        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')


Epoch [1/10], Loss: 1.9007
Epoch [2/10], Loss: 1.4269
Epoch [3/10], Loss: 0.5983


In [None]:
# Evaluate the model
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for inputs, targets in test_loader:
        inputs = inputs.to(device)
        targets = targets.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += targets.size(0)
        correct += (predicted == targets).sum().item()

accuracy = 100 * correct / total
print(f'Accuracy of the model on the test data: {accuracy:.2f}%')


Accuracy of the model on the test data: 92.49%


In [2]:
from zenml.client import Client


In [9]:
client = Client()
l = client.list_artifact_versions(name = "features_target", sort_by="version", tag='6').items
l.reverse()

In [10]:
l

[ArtifactVersionResponse(body=ArtifactVersionResponseBody(created=datetime.datetime(2024, 7, 24, 13, 9, 17, 400686), updated=datetime.datetime(2024, 7, 24, 13, 9, 17, 400688), user=UserResponse(body=UserResponseBody(created=datetime.datetime(2024, 7, 19, 12, 49, 52, 360837), updated=datetime.datetime(2024, 7, 19, 13, 2, 55, 636870), active=True, activation_token=None, full_name='Aleksandr', email_opted_in=False, is_service_account=False, is_admin=True), metadata=None, resources=None, id=UUID('fac9fd98-1514-487b-be06-d322a0ba32b6'), permission_denied=False, name='default'), artifact=ArtifactResponse(body=ArtifactResponseBody(created=datetime.datetime(2024, 7, 24, 8, 56, 49, 97771), updated=datetime.datetime(2024, 7, 24, 8, 56, 49, 97772), tags=[TagResponse(body=TagResponseBody(created=datetime.datetime(2024, 7, 24, 8, 56, 49, 98960), updated=datetime.datetime(2024, 7, 24, 8, 56, 49, 98961), color=<ColorVariants.YELLOW: 'yellow'>, tagged_count=6), metadata=None, resources=None, id=UUID('