In [3]:
import matplotlib.pylab as plt
import numpy as np
import sklearn as sk
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch import Tensor
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import accuracy_score


#Load the CSV file
data = pd.read_csv('Data_EV.csv')


def Create_data(one_hot):

    #Prepare the data
    X = data.drop(columns=['Number of Evs 2022'])
    y = data['Number of Evs 2022']  # Dependent variable
    X = X.drop(columns=['State'])

    if one_hot == False:
        return X, y
    else:
        encoder = OneHotEncoder(sparse=False)

        one_hot_encoded = encoder.fit_transform(data[['Political Affiliation']])
        feature_names = encoder.categories_[0]
        one_hot_encoded_df = pd.DataFrame(one_hot_encoded, columns=feature_names)
        X = pd.concat([X, one_hot_encoded_df], axis=1)
        X = X.drop(columns=['Political Affiliation'])

        return X, y


X,y = Create_data(True)
# Standardize the features

#Scale X
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

#Scale y
scaler = StandardScaler()
y_scaled = scaler.fit_transform(y.values.reshape(-1, 1))

# Define function to calculate regression metrics
def calculate_regression_metrics(model, loader):
    model.eval()
    y_true = []
    y_pred = []
    with torch.no_grad():
        for X, y in loader:
            pred = model(X)
            y_true.extend(y.tolist())
            y_pred.extend(pred.tolist())
    model.train()
    
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_true, y_pred)
    
    return mae, mse, rmse, r2




In [7]:
class NeuralNetwork(nn.Module):
    def __init__(self, num_layers, num_nodes, activation):
        super().__init__()
        layers = []
        layers.append(nn.Linear(in_features=39, out_features=num_nodes))
        if activation == 'sigmoid':
            layers.append(nn.Sigmoid())
        elif activation == 'relu':
            layers.append(nn.ReLU())
        elif activation == 'tanh':
                layers.append(nn.Tanh())
        else:
            raise ValueError("Unsupported activation function")
        
        for _ in range(num_layers - 1):
            layers.append(nn.Linear(in_features=num_nodes, out_features=num_nodes))
            if activation == 'sigmoid':
                layers.append(nn.Sigmoid())
            elif activation == 'relu':
                layers.append(nn.ReLU())
            elif activation == 'tanh':
                layers.append(nn.Tanh())
            else:
                raise ValueError("Unsupported activation function")
        layers.append(nn.Linear(in_features=num_nodes, out_features=1))
        self.linear_relu_stack = nn.Sequential(*layers)

    def forward(self, x):
        return self.linear_relu_stack(x)

# Define function to train the model
def train_model(model, optimizer, criterion, train_loader, val_loader, epochs):
    train_loss_history = []
    val_loss_history = []
    for epoch in range(epochs):
        train_loss = 0.0
        for X, y in train_loader:
            optimizer.zero_grad()
            pred = model(X)
            loss = criterion(pred, y)
            loss.backward()
            optimizer.step()
            train_loss += loss.item() * len(X)
        train_loss /= len(train_loader.dataset)
        train_loss_history.append(train_loss)

        val_loss = 0.0
        with torch.no_grad():
            for X, y in val_loader:
                pred = model(X)
                loss = criterion(pred, y)
                val_loss += loss.item() * len(X)
            val_loss /= len(val_loader.dataset)
            val_loss_history.append(val_loss)

        if (epoch + 1) % 5 == 0:
            print(f"Epoch {epoch+1} Train Loss: {train_loss:.4f}, Validation Loss: {val_loss:.4f}")
    return train_loss_history, val_loss_history

# Define function to calculate accuracy
def calculate_accuracy(model, loader):
    model.eval()
    y_true = []
    y_pred = []
    with torch.no_grad():
        for X, y in loader:
            pred = model(X).round()  # Round predictions to the nearest integer (0 or 1)
            y_true.extend(y.tolist())
            y_pred.extend(pred.tolist())
    model.train()
    return accuracy_score(y_true, y_pred)


X_train, X_val, y_train, y_val = train_test_split(X_scaled, y_scaled, test_size=0.2, shuffle=True, random_state=42)
train_dataset = TensorDataset(torch.tensor(X_train, dtype=torch.float32), torch.tensor(y_train, dtype=torch.float32))
val_dataset = TensorDataset(torch.tensor(X_val, dtype=torch.float32), torch.tensor(y_val, dtype=torch.float32))
train_loader = DataLoader(train_dataset, shuffle=True, batch_size=32)
val_loader = DataLoader(val_dataset, shuffle=False, batch_size=32)

# Define hyperparameters
learning_rate = 0.01
epochs = 30

# Define different configurations of hyperparameters to sweep over
num_layers_list = [1, 3, 5, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]
num_nodes_list = [16, 32, 64, 128, 256, 512, 1024, 2048, 4096]
activation_functions = ['relu', 'tanh', 'sigmoid']

# Store results
results = []

# Iterate over different hyperparameter configurations
for num_layers in num_layers_list:
    for num_nodes in num_nodes_list:
        for activation in activation_functions:
            # Construct neural network
            model = NeuralNetwork(num_layers, num_nodes, activation)
            optimizer = optim.Adam(model.parameters(), lr=learning_rate)
            criterion = nn.MSELoss()

            # Train and evaluate the model
            train_model(model, optimizer, criterion, train_loader, val_loader, epochs)
            # Calculate regression metrics
            train_mae, train_mse, train_rmse, train_r2 = calculate_regression_metrics(model, train_loader)
            val_mae, val_mse, val_rmse, val_r2 = calculate_regression_metrics(model, val_loader)

            # Store results in a tuple
            results.append((num_layers, num_nodes, activation, train_mae, val_mae, train_mse, val_mse, train_rmse, val_rmse, train_r2, val_r2))


# Iterate over different hyperparameter configurations
results_table = []
for result in results:
    result_dict = {
        "Configuration": f"{result[0]} layers, {result[1]} nodes, {result[2]} activation",
        "Train MAE": result[3],
        "Validation MAE": result[4],
        "Train MSE": result[5],
        "Validation MSE": result[6],
        "Train RMSE": result[7],
        "Validation RMSE": result[8],
        "Train R^2": result[9],
        "Validation R^2": result[10]
    }
    results_table.append(result_dict)

# Create a DataFrame from the list of dictionaries
results_df = pd.DataFrame(results_table)

styled_df = results_df.style \
    .format({
        "Train MAE": "{:.4f}",
        "Validation MAE": "{:.4f}",
        "Train MSE": "{:.4f}",
        "Validation MSE": "{:.4f}",
        "Train RMSE": "{:.4f}",
        "Validation RMSE": "{:.4f}",
        "Train R^2": "{:.4f}",
        "Validation R^2": "{:.4f}"
    }) \
    .background_gradient(cmap='viridis', subset=["Train R^2", "Validation R^2"]) \
    .applymap(lambda x: 'font-weight: bold', subset=["Configuration"])

# Display the styled DataFrame
styled_df

Epoch 5 Train Loss: 0.0318, Validation Loss: 0.0589
Epoch 10 Train Loss: 0.0849, Validation Loss: 0.0622
Epoch 15 Train Loss: 0.0231, Validation Loss: 0.0461
Epoch 20 Train Loss: 0.0033, Validation Loss: 0.0551
Epoch 25 Train Loss: 0.0016, Validation Loss: 0.0480
Epoch 30 Train Loss: 0.0031, Validation Loss: 0.0476
Epoch 5 Train Loss: 0.5342, Validation Loss: 0.0945
Epoch 10 Train Loss: 0.3360, Validation Loss: 0.1161
Epoch 15 Train Loss: 0.2057, Validation Loss: 0.0738
Epoch 20 Train Loss: 0.1110, Validation Loss: 0.1118
Epoch 25 Train Loss: 0.0581, Validation Loss: 0.0987
Epoch 30 Train Loss: 0.0276, Validation Loss: 0.1232
Epoch 5 Train Loss: 0.9390, Validation Loss: 0.0808
Epoch 10 Train Loss: 0.7727, Validation Loss: 0.0820
Epoch 15 Train Loss: 0.6434, Validation Loss: 0.0928
Epoch 20 Train Loss: 0.5347, Validation Loss: 0.1240
Epoch 25 Train Loss: 0.4273, Validation Loss: 0.1762
Epoch 30 Train Loss: 0.3410, Validation Loss: 0.1560
Epoch 5 Train Loss: 0.1249, Validation Loss: 0.05