In [1]:
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import roc_auc_score, roc_curve, f1_score, confusion_matrix
import seaborn as sns
from google.colab import drive


drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
# Define the neural network model
class CHDModel(nn.Module):
    def __init__(self):
        super(CHDModel, self).__init__()
        self.fc1 = nn.Linear(14, 32)
        self.fc2 = nn.Linear(32, 32)
        self.fc3 = nn.Linear(32, 16)
        self.fc4 = nn.Linear(16, 1)

        # self.fc1 = nn.Linear(12, 15)
        # self.fc2 = nn.Linear(15, 15)
        # self.fc3 = nn.Linear(15, 1)
        # self.dropout = nn.Dropout(0.5)
        
    def forward(self, x):
        x = nn.functional.relu(self.fc1(x))
        # x = self.dropout(x)
        x = nn.functional.relu(self.fc2(x))
        x = nn.functional.relu(self.fc3(x))
        # x = nn.functional.relu(self.fc4(x))
        # x = nn.functional.relu(self.fc5(x))
        # x = self.dropout(x)
        # x = nn.functional.relu(self.fc3(x))
        x = torch.sigmoid(self.fc4(x))
        return x

In [3]:
%cd drive/MyDrive/Project271B

/content/drive/.shortcut-targets-by-id/1Nr0kdWFr2lwIr68GZsixdoEXZU3DUzVg/Project271B


In [4]:
# Load the data and split into train and test sets

# X_train = pd.read_csv("X_train.csv")
# X_test = pd.read_csv("X_test.csv")
# y_train = pd.read_csv("y_train.csv")
# y_test = pd.read_csv("y_test.csv")

X_train = pd.read_csv("xtrain_dropped.csv")
X_test = pd.read_csv("xest_dropped.csv")
y_train = pd.read_csv("ytrain_dropped.csv")
y_test = pd.read_csv("ytest_dropped.csv")

len(X_train), len(X_test), len(y_train), len(y_test)

(3388, 599, 3388, 599)

In [5]:
X_train = X_train.loc[:, ~X_train.columns.str.contains('^Unnamed')]
X_test = X_test.loc[:, ~X_test.columns.str.contains('^Unnamed')]
y_train = y_train.loc[:, ~y_train.columns.str.contains('^Unnamed')]
y_test = y_test.loc[:, ~y_test.columns.str.contains('^Unnamed')]


In [6]:
# Scale the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Convert the data to PyTorch tensors
X_train = torch.FloatTensor(X_train)
y_train = torch.FloatTensor(y_train.values)
X_test = torch.FloatTensor(X_test)
y_test = torch.FloatTensor(y_test.values)

In [7]:
# Initialize model

model = CHDModel()
learning_rate = 0.01
epochs = 200
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [8]:
# Class imbalanced 

class_count = [0, 0]
total_count = len(y_train)
for target in y_train:
  class_count[int(target)] += 1
class_weights = [total_count / (2 * class_count[i]) for i in range(len(class_count))]

# define loss function with class weighting
criterion = nn.BCELoss(weight=torch.tensor(class_weights))
# criterion = nn.BCELoss()

In [9]:
class_weights

[0.5886031966643502, 3.3215686274509806]

In [10]:
def plot_cm(labels, predictions, p=0.5):
  cm = confusion_matrix(labels, predictions > p)
  plt.figure(figsize=(5,5))
  sns.heatmap(cm, annot=True, fmt="d")
  plt.title('Confusion matrix @{:.2f}'.format(p))
  plt.ylabel('Actual label')
  plt.xlabel('Predicted label')

  print('Legitimate Transactions Detected (True Negatives): ', cm[0][0])
  print('Legitimate Transactions Incorrectly Detected (False Positives): ', cm[0][1])
  print('Fraudulent Transactions Missed (False Negatives): ', cm[1][0])
  print('Fraudulent Transactions Detected (True Positives): ', cm[1][1])
  print('Total Fraudulent Transactions: ', np.sum(cm[1]))

In [11]:
class_weights

[0.5886031966643502, 3.3215686274509806]

In [12]:
for epoch in range(epochs+1):
    optimizer.zero_grad()
    y_pred = model(X_train)
    loss = criterion(y_pred, y_train)
    loss.backward()
    optimizer.step()
    
    if epoch % 10 == 0:
        print(f"Epoch {epoch}, Loss: {loss.item():.4f}")


ValueError: ignored

In [None]:
# Evaluate the train model
with torch.no_grad():
    y_pred = model(X_train)
    y_pred = torch.round(y_pred)

    accuracy = (y_pred == y_train.unsqueeze(1)).float().mean()
    auc = roc_auc_score(y_train, y_pred)
    f1 = f1_score(y_train, y_pred)

    print("AUC score: {:.2f}".format(auc))
    print("F1 score: {:.2f}".format(f1))
    print(f"Accuracy: {accuracy.item():.4f}")


In [None]:
)
plot_cm(y_train, y_pred)

In [None]:
# Evaluate the test model
with torch.no_grad():
    y_pred = model(X_test)
    y_pred = torch.round(y_pred)
    accuracy = (y_pred == y_test.unsqueeze(1)).float().mean()

    auc = roc_auc_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)

    print("AUC score: {:.2f}".format(auc))
    print("F1 score: {:.2f}".format(f1))
    print(f"Accuracy: {accuracy.item():.4f}")


In [None]:
plot_cm(y_test, y_pred)