# Develop an NN Solution (**Classification Model**) 🧠

**Part 1: Loading Custom Data**

1. Load in your train/test battery datasets
2. Create DataLoaders for those datasets

**Part 2: Training and Evaluating a Simple NN**

3. Define and train a simple NN
4. Evaluate your NN on some test data, recording the resultant RMSE

You may be wondering: *How do I submit this in parts?* 

> You will be expected to submit this notebook **twice** for grading; once, when you've completed part one, and once when you've completed the entire baseline solution (part two). 

**Hint**: It may be helpful to reference the codebase for Household Power Prediction, which you saw a while ago—this codebase contains several helper functions for training and testing a model; including converting a typical MSE function into an RMSE value. 

In [1]:
# Import Nessecary Libraries
import torch
import torch.nn as nn
import torch.optim as optim
import sklearn
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.preprocessing import StandardScaler
from torch.nn import MSELoss
from torch.utils.data import Dataset, DataLoader, random_split
import pandas as pd

In [34]:
# Add Nessecary Column
df = pd.read_csv("data/scaled_filtered_dataset.csv")
df['charge_duration_mins'] *= 100 # Re-Scale DataFrame to Generate New Column

# Create New Column
df['more_3hrs'] = (df['charge_duration_mins'] / 60 >= 3).astype(int)

# Create new DataFrame
df_new = df
df_new.head()

Unnamed: 0,value,charging_frequency_category,plug_in_hour,time_since_last_charge,is_night_charge,week,average_duration,charging_frequency_per_day,charge_duration_mins,more_3hrs
0,1.0,2,0,0.0,1,10,70.832335,10,59.0,0
1,1.0,1,1,1.0,1,10,70.832335,10,0.0,0
2,1.0,2,1,0.0,1,10,70.832335,10,34.0,0
3,1.0,2,3,1.466667,1,10,70.832335,10,6.0,0
4,1.0,1,3,0.2,1,10,70.832335,10,341.0,1


In [35]:
class OBC_Dataset(Dataset):
    def __init__(self):
        # Read-in DataFrame
        df = df_new

        # Separate features and target (target = last column)
        input_features = df.iloc[:, :-1].values
        target = df.iloc[:, -1:].values
        
        # Convert features and target into tensors
        self.x = torch.tensor(input_features, dtype=torch.float32)
        self.y = torch.tensor(target, dtype=torch.float32)

    def __len__(self):
        # Necessary __len__ method
        return len(self.y)

    def __getitem__(self, index):
        # Necessary fetch item method
        return self.x[index], self.y[index]

    def split_data(self, n_test):
        # Split into desired train-test split
        test_size = round(n_test * len(self.x))
        train_size = len(self.x) - test_size
        return random_split(self, [train_size, test_size])

In [36]:
# Get DataFrame
df = OBC_Dataset()

In [37]:
# Split Into Train-Test Split
train, test = df.split_data(n_test = 0.2)

In [38]:
# Examine Train Length to Ensure Validity
len(train)

1022650

In [39]:
# Examine Test Length to Ensure Validity
len(test)

255662

In [40]:
# Examine Input Features & Target of Sample
index = 2
features, target = train[index]
print("Features at position: ", index, ":", features)
print("\n\nTarget at position: ", index, ":", target)

Features at position:  2 : tensor([ 1.0000,  0.0000, 23.0000, 10.3167,  1.0000, 12.0000, 64.6494,  3.0000,
         0.0000])


Target at position:  2 : tensor([0.])


In [41]:
# Add to Data Loaders
train_loader = DataLoader(train, batch_size = 64, shuffle = True)
test_loader = DataLoader(test, batch_size = 64)
# ------- END OF ASSIGNMENT ONE ------- #

# Develop A Neural Network Solution: Part Two 🛠️

In [42]:
# Define Model Structure
class Construct_Model(nn.Sequential):
    def __init__(self, input_dim):
        super(Construct_Model, self).__init__(
            nn.Linear(input_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1),   # Single output neuron for binary classification
            nn.Sigmoid()        # Sigmoid activation for probability
        )

# Device setup for compatibility with GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define Loss Function, Construct Model, Epochs, & Define Optimizer
criterion = nn.BCELoss()
model = Construct_Model(9).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.00001)
num_epochs = 10

# Train The Model
model.train()
for epoch in range(1, num_epochs + 1):
    train_loss = 0.0
    for data, target in train_loader:
        # Move data and target to the same device as model
        data, target = data.to(device), target.to(device)
        
        # Forward pass
        optimizer.zero_grad()
        output = model(data)
        
        # Compute loss
        loss = criterion(output, target)
        loss.backward()
        
        # Update weights
        optimizer.step()
        
        # Accumulate training loss
        train_loss += loss.item()
    
    # Print epoch training loss
    print(f"Epoch: {epoch}, Loss: {train_loss / len(train_loader)}")

Epoch: 1, Loss: 0.08602028023437579
Epoch: 2, Loss: 0.028014063850880767
Epoch: 3, Loss: 0.019610424666989194
Epoch: 4, Loss: 0.016751255395721514
Epoch: 5, Loss: 0.015391061388550443
Epoch: 6, Loss: 0.01465655174548681
Epoch: 7, Loss: 0.014149965464245661
Epoch: 8, Loss: 0.013825017023077724
Epoch: 9, Loss: 0.013554842553019402
Epoch: 10, Loss: 0.013339526167858195


In [49]:
from sklearn.metrics import roc_auc_score
import numpy as np

# Ensure the model is in evaluation mode
model.eval()

all_labels = []
all_predictions = []

# Disable gradient calculation
with torch.no_grad():
    for data, target in test_loader:
        # Get model predictions (probabilities)
        output = model(data)
        probs = output.numpy()
        
        # Collect predictions and true labels
        all_predictions.extend(probs)
        all_labels.extend(target.numpy())

# Calculate AUC using sklearn
auc = roc_auc_score(all_labels, all_predictions)
print("AUC Score:", auc)

AUC Score: 0.9994088708958818
