In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import f1_score, confusion_matrix, precision_recall_curve, auc
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import seaborn as sns


In [None]:
df = pd.read_csv("dataset.csv")
df['Gender'] = df['Gender'].map({'F':0, 'M':1})
df['No-show'] = df['No-show'].map({'No':0,'Yes':1})
df['ScheduledDay'] = pd.to_datetime(df['ScheduledDay'])
df['AppointmentDay'] = pd.to_datetime(df['AppointmentDay'])
df['wait_days'] = (df['AppointmentDay'] - df['ScheduledDay']).dt.days
df['wait_days'] = df['wait_days'].clip(lower=0)
df = df[df['Age'] >= 0]
min_age = df['Age'].min()
max_age = df['Age'].max()
df['Age'] = (df['Age'] - min_age)/(max_age-min_age)
min_wait = df['wait_days'].min()
max_wait = df['wait_days'].max()
df['wait_days'] = (df['wait_days'] - min_wait)/(max_wait-min_wait)
df = pd.get_dummies(df,columns=['Neighbourhood'],prefix='location',dtype=int)
delete_columns = ['PatientId','AppointmentID','ScheduledDay','AppointmentDay']
df = df.drop(columns=delete_columns)
column = 'No-show'
df = df[[column] + [col for col in df.columns if col != column]]

data = df.to_numpy()
np.random.shuffle(data)
data_train = data[:90000]
data_test = data[90000:]
x_train = data_train[:,1:]
y_train = data_train[:,0]
x_test = data_test[:,1:]
y_test = data_test[:,0]

x_train_tensor = torch.from_numpy(x_train).float()
y_train_tensor = torch.from_numpy(y_train).float()
x_test_tensor = torch.from_numpy(x_test).float()
y_test_tensor = torch.from_numpy(y_test).float()

train_dataset = TensorDataset(x_train_tensor, y_train_tensor)
test_dataset = TensorDataset(x_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)


In [None]:

class NeuralNet(nn.Module):
    def __init__(self):
        super(NeuralNet, self).__init__()
        self.fc1 = nn.Linear(90, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 32)
        self.fc4 = nn.Linear(32, 1)  
        self.relu = nn.ReLU()       

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.relu(self.fc3(x))
        x = self.fc4(x)               
        return x

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = NeuralNet().to(device)
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.005)

epochs = 50
for epoch in range(epochs):
    for batch_X, batch_y in train_loader:
        batch_X = batch_X.to(device)
        batch_y = batch_y.to(device)
        outputs = model(batch_X)
        batch_y = batch_y.view(-1,1)
        loss = criterion(outputs, batch_y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    if (epoch + 1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}')


In [None]:
model.eval()

with torch.no_grad():
    outputs = model(x_test_tensor.to(device))
    probs = torch.sigmoid(outputs)

    y_true = y_test_tensor.view(-1).cpu().numpy()
    y_probs = probs.view(-1).cpu().numpy()

thresholds = np.linspace(0, 1, 101)
f1_scores = []

for thresh in thresholds:
    y_pred_thresh = (y_probs >= thresh).astype(int)
    f1 = f1_score(y_true, y_pred_thresh)
    f1_scores.append(f1)

best_idx = np.argmax(f1_scores)
best_thresh = thresholds[best_idx]
best_f1 = f1_scores[best_idx]

print(f"\nBest Threshold: {best_thresh:.2f}")
print(f"Best F1 Score: {best_f1:.4f}")

y_pred_final = (y_probs >= best_thresh).astype(int)
cm = confusion_matrix(y_true, y_pred_final)

print("\nConfusion Matrix:")
print(cm)

sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.title("Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()

precision, recall, _ = precision_recall_curve(y_true, y_probs)
pr_auc = auc(recall, precision)

print(f"PR AUC: {pr_auc:.4f}")

plt.plot(recall, precision, label=f"PR AUC = {pr_auc:.2f}")
plt.xlabel("Recall")
plt.ylabel("Precision")
plt.title("Precision-Recall Curve")
plt.legend()
plt.grid(True)
plt.show()
