In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [2]:
class MLP(nn.Module):
    def __init__(self, input_size, hidden_size1, hidden_size2, output_size):
        super(MLP, self).__init__()
        # First hidden layer
        self.fc1 = nn.Linear(input_size, hidden_size1)
        # Second hidden layer
        self.fc2 = nn.Linear(hidden_size1, hidden_size2)
        # Output layer
        self.fc3 = nn.Linear(hidden_size2, output_size)

    def forward(self, x):
        # Apply ReLU activation function after each hidden layer
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        # Output layer does not have an activation function here
        # This can be changed depending on the problem
        x = self.fc3(x)
        return x

In [4]:
import pandas as pd
data = '../../data/Processed_Data/leakage_PCA_results.csv'
df = pd.read_csv(data)
df

Unnamed: 0.1,Unnamed: 0,component 1,component 2,component 3,Status
0,0,-7.421883,-0.819801,2.028855,Leakage
1,1,-15.323043,-0.497512,0.746582,Leakage
2,2,-6.114105,-1.917797,1.015368,Leakage
3,3,-9.479358,-0.598465,1.138944,Leakage
4,4,-0.860349,2.826234,-1.64534,Leakage
5,5,22.702054,-6.145934,14.438932,Leakage
6,6,-11.670814,-0.705471,0.988405,Leakage
7,7,21.102197,13.969196,-3.822477,Leakage
8,8,15.629545,-13.089129,-8.578873,Leakage
9,9,-6.229331,-2.089063,-4.332429,Leakage


In [5]:
input_size = 3    # 3 components
hidden_size1 = 50  # number of neurons in the first hidden layer
hidden_size2 = 50  # number of neurons in the second hidden layer
output_size = 2    # number of output classes: leakage/non leakage

# Create an instance of the MLP
mlp = MLP(input_size, hidden_size1, hidden_size2, output_size)

In [11]:
input_df = df.iloc[:, 1:4]
input_df

Unnamed: 0,component 1,component 2,component 3
0,-7.421883,-0.819801,2.028855
1,-15.323043,-0.497512,0.746582
2,-6.114105,-1.917797,1.015368
3,-9.479358,-0.598465,1.138944
4,-0.860349,2.826234,-1.64534
5,22.702054,-6.145934,14.438932
6,-11.670814,-0.705471,0.988405
7,21.102197,13.969196,-3.822477
8,15.629545,-13.089129,-8.578873
9,-6.229331,-2.089063,-4.332429


In [32]:
#converting input data to tensor
forward_pass_example = input_df.iloc[0]
forward_pass_example = torch.tensor(forward_pass_example.values, dtype=torch.float32)
type(forward_pass_example)

torch.Tensor

In [33]:
#forward pass example
#lowkey what do these output values mean because I didn't give it Y info yet ??
output = mlp(forward_pass_example)
print(output)

tensor([-0.3298,  0.2536], grad_fn=<ViewBackward0>)


In [34]:
# Back propagation
import torch.optim as optim

In [42]:
import numpy as np
from sklearn.model_selection import train_test_split

X = df.iloc[:, 1:4]
y = np.where(df["Status"] == "Non leakage", 0, 1)

# Splitting the data into training and test sets by a 80:20 ratio
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)


In [54]:
# Putting in the dataset from PCA results
# Assuming inputs are of size 10 and there are 3 classes
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.long) #labels

  X_train = torch.tensor(X_train, dtype=torch.float32)
  y_train = torch.tensor(y_train, dtype=torch.long) #labels


In [52]:
# Loss function
criterion = nn.CrossEntropyLoss()

# Optimizer (e.g., Stochastic Gradient Descent)
optimizer = optim.SGD(mlp.parameters(), lr=0.01)

In [55]:
# Training loop
for epoch in range(10):  # number of epochs
    for i in range(len(X_train)):
        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = mlp(X_train[i])

        # Compute the loss
        loss = criterion(outputs.unsqueeze(0), y_train[i].unsqueeze(0))

        # Backward pass
        loss.backward()

        # Update weights
        optimizer.step()

    print(f"Epoch {epoch+1}, Loss: {loss.item()}")

Epoch 1, Loss: 0.7973233461380005
Epoch 2, Loss: 0.7243119478225708
Epoch 3, Loss: 0.7203292846679688
Epoch 4, Loss: 0.6884123086929321
Epoch 5, Loss: 0.6719635725021362
Epoch 6, Loss: 0.6667874455451965
Epoch 7, Loss: 0.6394017934799194
Epoch 8, Loss: 0.6297168731689453
Epoch 9, Loss: 0.6148592829704285
Epoch 10, Loss: 0.618548572063446


In [62]:
X_test = torch.tensor(X_test, dtype=torch.float32)
#returns logit values
mlp(X_test)

  X_test = torch.tensor(X_test, dtype=torch.float32)


tensor([[ 1.3434, -0.9447],
        [-0.3006,  0.0516],
        [-0.1783, -0.0817],
        [ 0.4221, -0.3359],
        [ 0.0114, -0.1277],
        [ 0.8383, -0.6857],
        [ 0.8242, -0.7799]], grad_fn=<AddmmBackward0>)

In [63]:
mlp.eval()  # Set the model to evaluation mode
with torch.no_grad():
    # Apply softmax to get probabilities
    probabilities = F.softmax(mlp(X_test), dim=1)
print(probabilities)

tensor([[0.9079, 0.0921],
        [0.4128, 0.5872],
        [0.4759, 0.5241],
        [0.6809, 0.3191],
        [0.5347, 0.4653],
        [0.8211, 0.1789],
        [0.8326, 0.1674]])


In [64]:
predicted_labels = torch.argmax(probabilities, dim=1)
print(predicted_labels)

tensor([0, 1, 1, 0, 0, 0, 0])


In [70]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score

# Calculate accuracy
accuracy = accuracy_score(y_test, predicted_labels)
print(accuracy)

0.2857142857142857
