<a href="https://colab.research.google.com/github/Prernatripathi7/24095082-CSOC-IG/blob/main/Week%201/Vanilla_Neural_Network.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import torch
import torch.nn as nn
import time
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, precision_recall_curve, auc, confusion_matrix
from sklearn.metrics import classification_report
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
df = pd.read_csv("KaggleV2-May-2016.csv").dropna()
df = pd.concat([df, pd.get_dummies(df['Gender'], prefix='Gender')], axis=1)
df = pd.concat([df, pd.get_dummies(df['Neighbourhood'], prefix='Neighbourhood')], axis=1)
df['No-show'] = df['No-show'].map({'No': 0, 'Yes': 1})
df['ScheduledDay'] = pd.to_datetime(df['ScheduledDay'])
df['AppointmentDay'] = pd.to_datetime(df['AppointmentDay'])
df['ScheduledDate'] = df['ScheduledDay'].dt.date
df['AppointmentDate'] = df['AppointmentDay'].dt.date
df['WaitingDays'] = (df['AppointmentDate'] - df['ScheduledDate']).apply(lambda x: x.days)

X_df = df.drop(columns=[
    'No-show', 'ScheduledDay', 'AppointmentDay', 'ScheduledDate', 'AppointmentDate',
    'PatientId', 'AppointmentID', 'Neighbourhood', 'Gender'
])
for col in X_df.columns:
    if X_df[col].nunique() > 2:
        std = X_df[col].std()
        if std != 0:
            X_df[col] = (X_df[col] - X_df[col].mean()) / std
X = X_df.values.astype(np.float32)
y = df['No-show'].values.astype(np.int64)

trainx, valx, trainy, valy = train_test_split(X, y, test_size=0.2, random_state=42)

class_weights = compute_class_weight(class_weight='balanced', classes=np.unique(trainy), y=trainy)
class_weights_tensor = torch.tensor(class_weights, dtype=torch.float32).to(device)

batch_size = 128
train_dataset = TensorDataset(torch.tensor(trainx), torch.tensor(trainy))
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True,num_workers=2,pin_memory=True)
class Net(nn.Module):
    def __init__(self, input_size):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(input_size, 80)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(80, 40)
        self.relu2 = nn.ReLU()
        self.out = nn.Linear(40, 2)

    def forward(self, x):
        x = self.relu1(self.fc1(x))
        x = self.relu2(self.fc2(x))
        return self.out(x)

model = Net(trainx.shape[1]).to(device)
criterion = nn.CrossEntropyLoss(weight=class_weights_tensor)
optimizer = optim.SGD(model.parameters(), lr=0.05)

epochs = 2700
losses = []
start_time=time.time()
for epoch in range(epochs):
    model.train()
    running_loss = 0.0

    for inputs, targets in train_loader:
        inputs = inputs.to(device)
        targets = targets.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * inputs.size(0)

    epoch_loss = running_loss / len(train_loader.dataset)
    losses.append(epoch_loss)

    if epoch % 100 == 0:
        print(f"Epoch {epoch}, Loss: {epoch_loss:.4f}")
end_time=time.time()
max_memory_allocated = torch.cuda.max_memory_allocated(device) / (1024 ** 2)
max_memory_reserved = torch.cuda.max_memory_reserved(device) / (1024 ** 2)
model.eval()
with torch.no_grad():
    val_inputs = torch.tensor(valx).to(device)
    logits = model(val_inputs)
    probs = torch.softmax(logits, dim=1).cpu().numpy()
    preds_class1 = probs[:, 1]

best_threshold = 0.35
val_preds_thresh = (preds_class1 > best_threshold).astype(int)
convergence_time=end_time-start_time
pr, rc, _ = precision_recall_curve(valy, preds_class1)
pr_auc = auc(rc, pr)
cm = confusion_matrix(valy, val_preds_thresh)
print(f"Convergence Time: {convergence_time:.4f}")
print(f"Precision-Recall AUC: {pr_auc:.4f}")
print("Confusion Matrix:")
print(cm)
report = classification_report(valy, val_preds_thresh, target_names=['Show', 'No-show'])
print("Classification Report:")
print(report)
print(f"Max CUDA Memory Allocated: {max_memory_allocated:.2f} MB")
print(f"Max CUDA Memory Reserved: {max_memory_reserved:.2f} MB")

Epoch 0, Loss: 0.6620
Epoch 100, Loss: 0.5724
Epoch 200, Loss: 0.5561
Epoch 300, Loss: 0.5423
Epoch 400, Loss: 0.5310
Epoch 500, Loss: 0.5222
Epoch 600, Loss: 0.5162
Epoch 700, Loss: 0.5104
Epoch 800, Loss: 0.5054
Epoch 900, Loss: 0.5025
Epoch 1000, Loss: 0.4985
Epoch 1100, Loss: 0.4949
Epoch 1200, Loss: 0.4922
Epoch 1300, Loss: 0.4900
Epoch 1400, Loss: 0.4887
Epoch 1500, Loss: 0.4865
Epoch 1600, Loss: 0.4847
Epoch 1700, Loss: 0.4842
Epoch 1800, Loss: 0.4814
Epoch 1900, Loss: 0.4822
Epoch 2000, Loss: 0.4798
Epoch 2100, Loss: 0.4792
Epoch 2200, Loss: 0.4788
Epoch 2300, Loss: 0.4772
Epoch 2400, Loss: 0.4763
Epoch 2500, Loss: 0.4751
Epoch 2600, Loss: 0.4748
Convergence Time: 6145.7082
Precision-Recall AUC: 0.3295
Confusion Matrix:
[[9177 8492]
 [1003 3434]]
Classification Report:
              precision    recall  f1-score   support

        Show       0.90      0.52      0.66     17669
     No-show       0.29      0.77      0.42      4437

    accuracy                           0.57     

In [4]:
import pandas as pd
import numpy as np
import time
import psutil
import os
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import accuracy_score, f1_score, precision_recall_curve, auc, confusion_matrix
from sklearn.metrics import classification_report
df = pd.read_csv("KaggleV2-May-2016.csv")
df = df.dropna(axis=0)

gender_encoded = pd.get_dummies(df['Gender'], prefix='Gender')
neighbourhood_encoded = pd.get_dummies(df['Neighbourhood'], prefix='Neighbourhood')
df = pd.concat([df, gender_encoded,neighbourhood_encoded], axis=1)
df = df.drop(['Gender'], axis=1)
df['No-show'] = df['No-show'].map({'No': 0, 'Yes': 1})
y = pd.get_dummies(df['No-show']).values.astype(np.float32)
df['ScheduledDay'] = pd.to_datetime(df['ScheduledDay'])
df['AppointmentDay'] = pd.to_datetime(df['AppointmentDay'])
df['ScheduledDate'] = df['ScheduledDay'].dt.date
df['AppointmentDate'] = df['AppointmentDay'].dt.date
df['WaitingDays'] = (df['AppointmentDate'] - df['ScheduledDate']).apply(lambda x: x.days)
X_df = df.drop(columns=[
    'No-show', 'ScheduledDay', 'AppointmentDay', 'ScheduledDate', 'AppointmentDate',
    'PatientId', 'AppointmentID','Neighbourhood'
])

for col in X_df.columns:
    if X_df[col].nunique() > 2:
        std = X_df[col].std()
        if std == 0:
            continue
        X_df[col] = (X_df[col] - X_df[col].mean()) / std

X = X_df.values.astype(np.float32)
trainx, valx, trainy, valy = train_test_split(X, y, test_size=0.2, random_state=0)
def relu(x):
    return np.maximum(0, x)

def relu_derivative(x):
    return np.where(x > 0, 1.0, 0.0)
def softmax(x):
    exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))
    return exp_x / np.sum(exp_x, axis=1, keepdims=True)
np.random.seed(2)
layer1_nodes = 64
layer2_nodes = 32
output_nodes = 2

weights1 = np.random.randn(trainx.shape[1], layer1_nodes) * np.sqrt(1 / trainx.shape[1])
weights2 = np.random.randn(layer1_nodes, layer2_nodes) * np.sqrt(1 / layer1_nodes)
weights3 = np.random.randn(layer2_nodes, output_nodes) * np.sqrt(1 / layer2_nodes)

biases1 = np.zeros((1, layer1_nodes))
biases2 = np.zeros((1, layer2_nodes))
biases3 = np.zeros((1, output_nodes))
def feed_forward(X, W1, b1, W2, b2, W3, b3):
    Z1 = np.dot(X, W1) + b1
    A1 = relu(Z1)
    Z2 = np.dot(A1, W2) + b2
    A2 = relu(Z2)
    Z3 = np.dot(A2, W3) + b3
    A3 = softmax(Z3)
    return Z1, A1, Z2, A2, Z3, A3
def backpropagation(X, y, Z1, A1, Z2, A2, Z3, A3, W2, W3, class_weights):
    m = X.shape[0]
    weights_array = np.array([class_weights[0], class_weights[1]])
    sample_weights = np.sum(y * weights_array, axis=1)[:, np.newaxis]

    dZ3 = (A3 - y) * sample_weights
    dW3 = np.dot(A2.T, dZ3) / m
    db3 = np.sum(dZ3, axis=0, keepdims=True) / m

    dA2 = np.dot(dZ3, W3.T)
    dZ2 = dA2 * relu_derivative(Z2)
    dW2 = np.dot(A1.T, dZ2) / m
    db2 = np.sum(dZ2, axis=0, keepdims=True) / m

    dA1 = np.dot(dZ2, W2.T)
    dZ1 = dA1 * relu_derivative(Z1)
    dW1 = np.dot(X.T, dZ1) / m
    db1 = np.sum(dZ1, axis=0, keepdims=True) / m

    return dW1, db1, dW2, db2, dW3, db3
def balanced_cross_entropy(y_true, y_pred, class_weights):
    epsilon = 1e-8
    weights_array = np.array([class_weights[0], class_weights[1]])
    sample_weights = np.sum(y_true * weights_array, axis=1)
    loss = -np.sum(y_true * np.log(y_pred + epsilon), axis=1)
    weighted_loss = sample_weights * loss

    return np.mean(weighted_loss)

def get_batches(X, y, batch_size):
    indices = np.arange(X.shape[0])
    np.random.shuffle(indices)
    for start in range(0, X.shape[0], batch_size):
        end = start + batch_size
        batch_idx = indices[start:end]
        yield X[batch_idx], y[batch_idx]

trainy_labels = np.argmax(trainy, axis=1)
class_weights_array = compute_class_weight(class_weight='balanced', classes=np.array([0, 1]), y=trainy_labels)
class_weights = {0: class_weights_array[0], 1: class_weights_array[1]}
learning_rate = 0.05
epochs = 2700
batch_size = 128
loss_history = []
start_time=time.time()
for epoch in range(epochs):
    epoch_loss = 0
    for X_batch, y_batch in get_batches(trainx, trainy, batch_size):
        Z1, A1, Z2, A2, Z3, A3 = feed_forward(X_batch, weights1, biases1, weights2, biases2, weights3, biases3)
        loss = balanced_cross_entropy(y_batch, A3, class_weights)
        epoch_loss += loss * X_batch.shape[0]
        dW1, db1, dW2, db2, dW3, db3 = backpropagation(X_batch, y_batch, Z1, A1, Z2, A2, Z3, A3, weights2, weights3, class_weights)

        weights1 -= learning_rate * dW1
        biases1 -= learning_rate * db1
        weights2 -= learning_rate * dW2
        biases2 -= learning_rate * db2
        weights3 -= learning_rate * dW3
        biases3 -= learning_rate * db3

    epoch_loss /= trainx.shape[0]
    loss_history.append(epoch_loss)

    if epoch % 100 == 0:
        print(f"Epoch {epoch}, Loss: {epoch_loss:.4f}")
end_time=time.time()
process = psutil.Process(os.getpid())
mem_info = process.memory_info()
rss_memory = mem_info.rss / (1024 ** 2)
vms_memory = mem_info.vms / (1024 ** 2)
def predict(X, W1, b1, W2, b2, W3, b3):
    _, _, _, _, _, A3 = feed_forward(X, W1, b1, W2, b2, W3, b3)
    preds = np.argmax(A3, axis=1)
    return preds, A3
from sklearn.metrics import (
    accuracy_score,
    f1_score,
    precision_score,
    recall_score,
    precision_recall_curve,
    auc,
    confusion_matrix
)
val_preds_binary, val_probs = predict(valx, weights1, biases1, weights2, biases2, weights3, biases3)

valy_flat = np.argmax(valy, axis=1)
val_probs_class1 = val_probs[:, 1]

best_threshold = 0.35
val_preds_thresholded = (val_probs_class1 > best_threshold).astype(int)
convergence_time=end_time-start_time
precision, recall, _ = precision_recall_curve(valy_flat, val_probs_class1)
pr_auc = auc(recall, precision)
cm = confusion_matrix(valy_flat, val_preds_thresholded)
print(f"Convergence Time: {convergence_time:.4f}")
print(f"Precision-Recall AUC: {pr_auc:.4f}")
print("Confusion Matrix:")
print(cm)
print("Classification Report:")
print(classification_report(valy_flat, val_preds_thresholded, target_names=["No-show=0", "No-show=1"]))
print(f"RSS Memory Usage: {rss_memory:.2f} MB")
print(f"VMS Memory Usage: {vms_memory:.2f} MB")


Epoch 0, Loss: 0.6550
Epoch 100, Loss: 0.5683
Epoch 200, Loss: 0.5520
Epoch 300, Loss: 0.5413
Epoch 400, Loss: 0.5325
Epoch 500, Loss: 0.5260
Epoch 600, Loss: 0.5226
Epoch 700, Loss: 0.5194
Epoch 800, Loss: 0.5159
Epoch 900, Loss: 0.5139
Epoch 1000, Loss: 0.5115
Epoch 1100, Loss: 0.5097
Epoch 1200, Loss: 0.5084
Epoch 1300, Loss: 0.5065
Epoch 1400, Loss: 0.5063
Epoch 1500, Loss: 0.5051
Epoch 1600, Loss: 0.5037
Epoch 1700, Loss: 0.5018
Epoch 1800, Loss: 0.5022
Epoch 1900, Loss: 0.5016
Epoch 2000, Loss: 0.5005
Epoch 2100, Loss: 0.4993
Epoch 2200, Loss: 0.4995
Epoch 2300, Loss: 0.4989
Epoch 2400, Loss: 0.4988
Epoch 2500, Loss: 0.4984
Epoch 2600, Loss: 0.4976
Convergence Time: 1041.6400
Precision-Recall AUC: 0.3332
Confusion Matrix:
[[8253 9472]
 [ 731 3650]]
Classification Report:
              precision    recall  f1-score   support

   No-show=0       0.92      0.47      0.62     17725
   No-show=1       0.28      0.83      0.42      4381

    accuracy                           0.54     