# ECON 425 Homework 6 - Neural Networks
## Submission by Pranov Suresh

## Q 1 - Part B

In [1]:
import torch
import pandas as pd
import numpy as np
from torch import nn
from scipy.optimize import minimize
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import StandardScaler

In [2]:
sample_size = 1000000
np.random.seed(11)
x = np.random.normal(0,1, sample_size)
def sigmoid(z):
   return 1/(1 + np.exp(-z))
y = sigmoid(x)

In [3]:
def neural_network_2(weights, x, y):
    w0, w1, w2, = weights
    h1 = w0*x
    z1 = sigmoid(h1)
    h2 = np.dot(w1,z1)
    z2 = sigmoid(h2)
    y_hat = np.dot(z2, w2)
    loss = np.mean((y_hat - y) ** 2)
    return loss

In [4]:
def neural_network_1(weights, x, y):
    w0, w1 = weights
    h1 = x*w0
    z1 = sigmoid(h1)
    y_hat = w1 * z1
    loss = np.mean((y_hat-y)**2)
    return loss

In [5]:
sample_weights_one = np.array([0.5, 0.6])
sample_weights_two = np.array([0.5,0.75, 0.35])

In [6]:
test_results_one = neural_network_1(sample_weights_one, x, y)

In [7]:
test_results_two = neural_network_2(sample_weights_two, x, y)

In [8]:
optimised_weights_one = minimize(neural_network_1, sample_weights_one, args = (x,y), method = "BFGS")

In [9]:
optimised_weights_two = minimize(neural_network_2, sample_weights_two, args=(x, y), method = "BFGS")

In [10]:
final_error_one = optimised_weights_one.fun
final_error_two = optimised_weights_two.fun

print("Neural Network 1:")
print(f"Initial Error: {test_results_one}")
print(f"Final Error: {final_error_one}")
print(f"Optimized Weights: {optimised_weights_one.x}")

print("\nNeural Network 2:")
print(f"Initial Error: {test_results_two}")
print(f"Final Error: {final_error_two}")
print(f"Optimized Weights: {optimised_weights_two.x}")

Neural Network 1:
Initial Error: 0.05895706909518932
Final Error: 1.92841113648809e-10
Optimized Weights: [0.99990374 1.00001395]

Neural Network 2:
Initial Error: 0.12591543999922
Final Error: 0.010062587343962363
Optimized Weights: [5.59552035 3.45803255 0.67314698]


## Having built the neural network with one and two layers respectively, we can see that the training error of the models, using the optimised weights noticeably increases. This increase is also noticed when the model is built on randomly guessed weights initially. 

### The initial error for the one layer neural network yields 0.058595, while the inital error for the two layer neural network yields 0.12591 already revealing the pattern which we were looking for. The final error using optimised weights rises from 0.00000000019284 to 0.0100625. This is a larger increase than even in the initial random error. 

Given the relatively simple structure of the first model, we might be possibly looking at a case of model overfitting in the first case, as training error is not reflective of testing accuracy. Even if we were to assume that the first model is quite robust, the training error increase is explainable by the increased complexity of the model. One with more hidden layers and neutrons might possibly lead to more variance, as part of the standard complexity-variance tradeoff. 

## Q2

In [11]:
credit_card_data = pd.read_csv("E:/UCLA/Winter 2024/ECON 425/card_transdata.csv")
X = pd.DataFrame(credit_card_data.drop("fraud",axis = 1))
y = pd.DataFrame(credit_card_data["fraud"])
X_train = X.iloc[:500000]
y_train = y.iloc[:500000]
X_test = X.iloc[500000:]
y_test = y.iloc[500000:]

In [12]:
X_train_tensor = torch.tensor(X_train.values)
X_test_tensor = torch.tensor(X_test.values)
y_train_tensor = torch.tensor(y_train.values).squeeze().long()
y_test_tensor = torch.tensor(y_test.values)

In [13]:
class NeuralNetwork(nn.Module):
    def __init__(self, input_size, hidden_size1,output_size):
        super(NeuralNetwork, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size1)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size1, output_size)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu1(x)
        x = self.fc2(x)
        return x

In [14]:
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
input_size = X_train_tensor.shape[1]
hidden_size1 = 150
output_size = 2
model = NeuralNetwork(input_size, hidden_size1, output_size)

In [15]:
loss_criterion = nn.CrossEntropyLoss()
learning_rate = 0.01  
optimiser = optim.SGD(model.parameters(), lr=learning_rate)

In [16]:
num_epochs =15  
batch_size = 128
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

In [17]:
def train_loop(dataloader, model, loss_criterion, optimiser):
    size = len(dataloader.dataset)
    model.train()
    for batch, (x,y) in enumerate(dataloader):
        optimiser.zero_grad()
        x = x.float()
        outputs = model(x)
        loss = loss_criterion(outputs, y)  
        loss.backward()  
        optimiser.step()
        if batch % 100 == 0: 
            loss, current = loss.item(), batch*batch_size + len(x)

In [18]:
for epoch in range(num_epochs):
    print(f"Epoch {epoch+1}\n----------")
    train_loop(train_dataloader, model, loss_criterion, optimiser)

Epoch 1
----------
Epoch 2
----------
Epoch 3
----------
Epoch 4
----------
Epoch 5
----------
Epoch 6
----------
Epoch 7
----------
Epoch 8
----------
Epoch 9
----------
Epoch 10
----------
Epoch 11
----------
Epoch 12
----------
Epoch 13
----------
Epoch 14
----------
Epoch 15
----------


In [19]:
from sklearn.metrics import accuracy_score, f1_score

model.eval()
predicted_values = []
with torch.no_grad():
    for inputs, labels in test_dataloader: 
        outputs = model(inputs.float())
        _, predictions = torch.max(outputs, 1)
        predicted_values.extend(predictions.tolist())

accuracy = accuracy_score(y_test_tensor.numpy(), predicted_values)
f1 = f1_score(y_test_tensor.numpy(), predicted_values)

print(f'Test Accuracy: {accuracy:.4f}')
print(f'Test F1 Score: {f1:.4f}')

Test Accuracy: 0.8672
Test F1 Score: 0.0693


## Based on the selected parameters of 150 neurons, 20 epochs, and a batch size of 128, we find that the testing F1-score is 0.0706. This is comparable to a F1-score of nearly 0.9999 for the decision tree models built on the same dataset. 

### Purely based on the testing F1-score, it is clear that the decision tree model fares much better than the neural network. 

The reasons behind this could be down to the standard model complexity-variance tradeoff, where the decision tree model performs better due to its simpler model structure. A model with 150 neurons, one hidden layer, and multiple epochs could lead to a poor testing F1-score. 
Thus, it can be concluded that the decision tree model is better and should be selected. 