This notebook is to run the model and get the actual output vs model output on validation data 

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
import numpy as np
import csv
import copy

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Device = {device}")

class RandomOptimiser(nn.Module):
    def __init__(self, input_size):
        super(RandomOptimiser, self).__init__()
        self.fc1 = nn.Linear(input_size, 32)
        self.fc2 = nn.Linear(32, 64)
        self.fc3 = nn.Linear(64, 32)
        self.fc4 = nn.Linear(32, 3)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.relu(self.fc3(x))
        x = self.fc4(x)
        return x

def train_model(model, train_loader, valid_features, valid_labels, criterion, optimizer, epochs):
    train_losses = []
    train_accuracies = []
    valid_losses = []
    valid_accuracies = []

    for epoch in range(epochs):
        model.train()
        running_loss = 0.0

        for batch_features, batch_labels in train_loader:
            optimizer.zero_grad()
            outputs = model(batch_features)
            loss = criterion(outputs, batch_labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

        model.eval()
        with torch.no_grad():
            train_outputs = model(train_loader.dataset.tensors[0])
            train_loss = criterion(train_outputs, train_loader.dataset.tensors[1])
            train_predicted = torch.argmax(train_outputs, dim=1)
            train_accuracy = (train_predicted == train_loader.dataset.tensors[1]).float().mean()

            valid_outputs = model(valid_features)
            valid_loss = criterion(valid_outputs, valid_labels)
            valid_predicted = torch.argmax(valid_outputs, dim=1)
            valid_accuracy = (valid_predicted == valid_labels).float().mean()

        train_losses.append(running_loss / len(train_loader))
        train_accuracies.append(train_accuracy.item())
        valid_losses.append(valid_loss.item())
        valid_accuracies.append(valid_accuracy.item())

        if (epoch + 1) % 50 == 0:
            print(f'Epoch [{epoch + 1}/{epochs}], Train Loss: {train_losses[-1]:.4f}, Valid Loss: {valid_losses[-1]:.4f}')
            print(f'Train Accuracy: {train_accuracies[-1]:.4f}, Valid Accuracy: {valid_accuracies[-1]:.4f}')

    return train_losses, train_accuracies, valid_losses, valid_accuracies

def process_data(data):
    features = []
    labels = []
    for line in data:
        if line.strip():
            split_line = list(map(float, line.split(',')))
            features.append(split_line[:-1])
            labels.append(int(split_line[-1]))
    return torch.tensor(features, dtype=torch.float32, device=device), torch.tensor(labels, dtype=torch.long, device=device)

def save_validation_outputs_to_csv(valid_features, valid_labels, model):
    model.eval()
    with torch.no_grad():
        valid_outputs = model(valid_features)
        predictions = torch.argmax(valid_outputs, dim=1)
        with open('validation_results.csv', mode='w', newline='') as file:
            writer = csv.writer(file)
            writer.writerow(['Feature Input', 'Actual Output', 'Model Output'])

            for i in range(len(valid_features)):
                feature_input = valid_features[i].cpu().numpy().tolist()
                actual_output = valid_labels[i].cpu().item()
                model_output = predictions[i].cpu().item()
                writer.writerow([f"[{', '.join(map(str, feature_input))}]", actual_output, model_output])

if __name__ == "__main__":
    TestingData = open('TestingEncoded.csv', 'r').read().split("\n")[1:]
    TrainingData = open('TrainingEncoded.csv', 'r').read().split("\n")[1:]
    ValidationData = open('ValidationEncoded.csv', 'r').read().split("\n")[1:]
    train_features, train_labels = process_data(TrainingData)
    valid_features, valid_labels = process_data(ValidationData)
    test_features, test_labels = process_data(TestingData)
    input_size = train_features.shape[1]
    model = RandomOptimiser(input_size).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    train_dataset = data.TensorDataset(train_features, train_labels)
    train_loader = data.DataLoader(train_dataset, batch_size=64, shuffle=True)

    train_losses, train_accuracies, valid_losses, valid_accuracies = train_model(
        model, train_loader, valid_features, valid_labels, criterion, optimizer, 200
    )
    save_validation_outputs_to_csv(valid_features, valid_labels, model)


Device = cpu
Epoch [50/200], Train Loss: 0.6297, Valid Loss: 0.9822
Train Accuracy: 0.7699, Valid Accuracy: 0.6153
Epoch [100/200], Train Loss: 0.4589, Valid Loss: 0.5947
Train Accuracy: 0.8703, Valid Accuracy: 0.7773
Epoch [150/200], Train Loss: 0.4193, Valid Loss: 0.5202
Train Accuracy: 0.8892, Valid Accuracy: 0.8163
Epoch [200/200], Train Loss: 0.4376, Valid Loss: 0.6904
Train Accuracy: 0.8600, Valid Accuracy: 0.7180
