In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import torch.nn.functional as F
import torch.utils.data as data
import matplotlib.pyplot as plt


In [None]:
# Baseline Neural Network
class BaselineNN(nn.Module):
  def __init__(self, input_size):
    super(BaselineNN, self).__init__()
    # Define layers
    self.fc1 = nn.Linear(input_size, 64)
    self.fc2 = nn.Linear(64, 64)
    self.fc3 = nn.Linear(64, 3) # Output is 3 nodes, one for each level of engagement

  def forward(self, x):
    x = torch.relu(self.fc1(x))
    x = torch.relu(self.fc2(x))
    x = self.fc3(x)
    return x

In [None]:
# Hyperparameters
learning_rate = 0.001 
batch_size = 32
epochs = 100

TrainingLoss = []
TrainingAccuracy = []
ValidationLoss = []
ValidationAccuracy = []

# Split the original Dataset into 3
TestingData = open('TestingEncoded.csv','r').read().split("\n")[1:]
TrainingData = open('TrainingEncoded.csv','r').read().split("\n")[1:]
ValidationData = open('ValidationEncoded.csv','r').read().split("\n")[1:]

input_size  = len(TestingData[0].split(",")) - 1

model = BaselineNN(input_size)

# Loss and optimizer
criterion = nn.MSELoss() 
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
num_classes = 3

def process_data(data):
    features = []
    labels = []

    for line in data:
        if line.strip():
            split_line = list(map(float, line.split(',')))
            features.append(split_line[:-1])  # All except last column
            labels.append(int(split_line[-1]))

    return torch.tensor(features, dtype=torch.float32), torch.tensor(labels, dtype=torch.long)

train_features, train_labels = process_data(TrainingData)
valid_features, valid_labels = process_data(ValidationData)
test_features, test_labels = process_data(TestingData)

train_dataset = data.TensorDataset(train_features, train_labels)
train_loader = data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

def one_hot_encode(labels, num_classes):
    return torch.eye(num_classes)[labels]

# Training Loop
for epoch in range(epochs):
    model.train()  # Set the model to training mode
    running_loss = 0.0

    for batch_features, batch_labels in train_loader:
        optimizer.zero_grad()
        # Forward pass
        outputs = model(batch_features)

        # One-hot encode the labels
        one_hot_labels = one_hot_encode(batch_labels, num_classes)

        # Compute MSE loss
        loss = criterion(outputs)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    print(f'Epoch [{epoch + 1}/{epochs}], Loss: {running_loss / len(train_loader):.4f}')
    TrainingLoss.append(running_loss / len(train_loader))

    # Validation
    model.eval()  # Set the model to evaluation mode
    with torch.no_grad():
        valid_outputs = model(valid_features)
        one_hot_valid_labels = one_hot_encode(valid_labels, num_classes)
        valid_loss = criterion(valid_outputs, one_hot_valid_labels)
        print(f'Validation Loss after epoch {epoch + 1}: {valid_loss.item():.4f}')
        ValidationLoss.append(valid_loss.item())

        # Accuracy
        predicted_classes = torch.argmax(valid_outputs, dim=1)
        accuracy = (predicted_classes == valid_labels).float().mean()
        ValidationAccuracy.append(accuracy.item())
        print(f'Validation Accuracy after epoch {epoch + 1}: {accuracy.item() * 100:.2f}%')

        # Training accuracy
        train_outputs = model(train_features)
        predicted_classes = torch.argmax(train_outputs, dim=1)
        accuracy = (predicted_classes == train_labels).float().mean()
        TrainingAccuracy.append(accuracy.item())

# Save the model
torch.save(model.state_dict(), 'baseline_model.pth')

In [None]:
#Compare with Testing dataset
model.eval()  # Set the model to evaluation mode
with torch.no_grad():
    test_outputs = model(test_features)
    predicted_classes = torch.argmax(test_outputs, dim=1)
    accuracy = (predicted_classes == test_labels).float().mean()
    print(f'Final Accuracy using Testing dataset: {accuracy.item() * 100:.2f}%')

## Plotting data 

In [None]:
plt.plot(TrainingLoss, color='C0',label='Training Data')
plt.plot(ValidationLoss, color='C1',label='Validation Data',linestyle='--')
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.title("Baseline model Loss")
plt.legend()
plt.show()

In [None]:
plt.plot(TrainingAccuracy, color='C0',label='Training Data')
plt.plot(ValidationAccuracy, color='C1',label='Validation Data',linestyle='--')
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.title("Baseline model Accuracy")
plt.legend()
plt.show()