In [1]:
#Impporting libraries and loading data
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np 
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, precision_recall_curve, average_precision_score, confusion_matrix
df = pd.read_csv('/kaggle/input/noshowappointments/KaggleV2-May-2016.csv')
df['No-show'] = (df['No-show']=='Yes').astype(int)
df = df.drop(['PatientId', 'AppointmentID'], axis=1)

In [2]:
#Data Handling 
df['ScheduledDay'] = pd.to_datetime(df['ScheduledDay'])
df['AppointmentDay'] = pd.to_datetime(df['AppointmentDay'])
df['DaysWaiting'] = (df['AppointmentDay'] - df['ScheduledDay']).dt.days
df['DaysWaiting'] = df['DaysWaiting'].apply(lambda x: x if x >= 0 else 0)
df = df.drop(['ScheduledDay', 'AppointmentDay'], axis=1)
df['Age'] = df['Age'].clip(0, 100)
df['Gender'] = df['Gender'].map({'F': 0, 'M': 1})
df = pd.get_dummies(df, columns=['Neighbourhood'])

In [3]:
numerical_cols = ['Age', 'DaysWaiting', 'Scholarship', 'Hipertension', 
                  'Diabetes', 'Alcoholism', 'Handcap', 'SMS_received']
X = df.drop('No-show', axis=1)
y = df['No-show'].values

In [4]:
#Preprocessing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
train_mean = X_train[numerical_cols].mean()
train_std = X_train[numerical_cols].std()
X_train[numerical_cols] = (X_train[numerical_cols] - train_mean) / train_std
X_test[numerical_cols] = (X_test[numerical_cols] - train_mean) / train_std

In [5]:
X_train = X_train.astype(np.float32).to_numpy()
X_test = X_test.astype(np.float32).to_numpy()
X_train_tensor = torch.tensor(X_train, dtype= torch.float32)
y_train_tensor = torch.tensor(y_train, dtype= torch.float32).unsqueeze(1)
X_test_tensor = torch.tensor(X_test, dtype= torch.float32)
y_test_tensor = torch.tensor(y_test, dtype= torch.float32).unsqueeze(1)

In [6]:
#pytorch model
class NoShowPredictor(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(NoShowPredictor, self).__init__()
        self.layer1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.layer2 = nn.Linear(hidden_size, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x= self.layer1(x)
        x=self.relu(x)
        x=self.layer2(x)
        x=self.sigmoid(x)
        return x

In [7]:
#Hyperparameters
input_size = X_train.shape[1]
hidden_size = 10
model = NoShowPredictor(input_size, hidden_size)
criteria = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

In [8]:
#Training loop
epochs = 1000
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    outputs = model(X_train_tensor)
    loss = criteria(outputs, y_train_tensor)
    loss.backward()
    optimizer.step()
    
    if epoch % 100 == 0:
        print(f'Epoch {epoch}, Loss: {loss.item():.4f}')

# evaluation 
# Evaluation
model.eval()
with torch.no_grad():
    train_probs = model(X_train_tensor).numpy().flatten()
    test_probs = model(X_test_tensor).numpy().flatten()

train_pred = (train_probs >= 0.5).astype(int)
test_pred = (test_probs >= 0.5).astype(int)

# Performance metrics
print("\nTraining Metrics:")
print(f"Accuracy: {accuracy_score(y_train, train_pred):.4f}")
print(f"F1-score: {f1_score(y_train, train_pred):.4f}")
print(f"PR-AUC: {average_precision_score(y_train, train_probs):.4f}")
print("Confusion Matrix:\n", confusion_matrix(y_train, train_pred))

print("\nTest Metrics:")
print(f"Accuracy: {accuracy_score(y_test, test_pred):.4f}")
print(f"F1-score: {f1_score(y_test, test_pred):.4f}")
print(f"PR-AUC: {average_precision_score(y_test, test_probs):.4f}")
print("Confusion Matrix:\n", confusion_matrix(y_test, test_pred))

Epoch 0, Loss: 0.7318
Epoch 100, Loss: 0.4645
Epoch 200, Loss: 0.4540
Epoch 300, Loss: 0.4512
Epoch 400, Loss: 0.4492
Epoch 500, Loss: 0.4477
Epoch 600, Loss: 0.4464
Epoch 700, Loss: 0.4451
Epoch 800, Loss: 0.4444
Epoch 900, Loss: 0.4439

Training Metrics:
Accuracy: 0.7992
F1-score: 0.0566
PR-AUC: 0.3863
Confusion Matrix:
 [[70134   405]
 [17349   533]]

Test Metrics:
Accuracy: 0.7988
F1-score: 0.0467
PR-AUC: 0.3547
Confusion Matrix:
 [[17549   120]
 [ 4328   109]]
