In [3]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/noshowappointments/KaggleV2-May-2016.csv


In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, f1_score, precision_recall_curve, average_precision_score, confusion_matrix

In [5]:
df = pd.read_csv('/kaggle/input/noshowappointments/KaggleV2-May-2016.csv')

In [6]:
df['No-show'] = (df['No-show'] == 'Yes').astype(int)

In [7]:
df = df.drop(['PatientId', 'AppointmentID'], axis =1)

In [8]:
df['ScheduledDay'] = pd.to_datetime(df['ScheduledDay'])
df['AppointmentDay'] = pd.to_datetime(df['AppointmentDay'])


In [9]:
df['DaysWaiting'] = (df['AppointmentDay'] - df['ScheduledDay']).dt.days

In [10]:
df = df.drop(['ScheduledDay', 'AppointmentDay'], axis = 1)

In [11]:
df['DaysWaiting'] = df['DaysWaiting'].apply(lambda x: x if x>=0 else 0)
df['Age'] = df['Age'].clip(lower=0, upper=100)
df['Gender'] = df['Gender'].map({'F':0, 'M':1})

df = pd.get_dummies(df, columns=['Neighbourhood'])

In [12]:
#Normalizing numerical data
numerical_cols = ['Age', 'DaysWaiting', 'Scholarship', 'Hipertension', 'Diabetes', 'Alcoholism', 'Handcap', 'SMS_received']
for col in numerical_cols:
    df[col] = (df[col] - df[col].mean())/df[col].std()

In [13]:
X = df.drop('No-show', axis =1).values
y= df['No-show'].values.reshape(-1,1)

In [14]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state=42)

In [15]:
#Neural-Network Class
class NeuralNetwork:
    def __init__(self, input_size, hidden_size,output_size):
        #He initialization
        self.W1 = np.random.randn(input_size, hidden_size).astype(np.float64) * np.sqrt(2 / input_size)
        self.b1 = np.zeros((1, hidden_size), dtype=np.float64)
        self.W2 = np.random.randn(hidden_size, output_size).astype(np.float64) * np.sqrt(2 / hidden_size)
        self.b2 = np.zeros((1, output_size), dtype=np.float64)

    def relu(self, Z):
        return np.maximum(0, Z)

    def sigmoid(Z):
        return 1 / (1 +np.exp(-Z))

    def forward(self, X):
        self.Z1 = np.dot(X, self.W1) + self.b1
        self.A1 = self.relu(self.Z1)
        self.Z2 = np.dot(self.A1, self.W2) + self.b2
        self.A2 = self.relu(self.Z2)
        return self.A2

    def compute_loss(self, y, y_pred):
        epsilon = 1e-8  # Avoid log(0)
        y = np.array(y, dtype=float).reshape(-1)
        y_pred = np.clip(y_pred, epsilon, 1 - epsilon)
        y_pred = np.array(y_pred, dtype=float).reshape(-1)
        loss = -np.mean(y * np.log(y_pred + epsilon) + (1 - y) * np.log(1 - y_pred + epsilon))
        return loss



    def backward(self, X, y, y_pred, learning_rate):
        m = y.shape[0] # Numbers of sample
        dZ2 = y_pred -y
        dW2 = np.dot(self.A1.T, dZ2) / m
        db2 = np.sum(dZ2, axis = 0, keepdims = True)/m

        dZ1 = np.dot(dZ2, self.W2.T) * (self.Z1>0)
        dW1 = np.dot(X.T, dZ1)/ m
        db1 = np.sum(dZ1, axis=0, keepdims =True)/m

        #update parameters
        self.W2 = self.W2 - learning_rate * dW2
        self.b2 = self.b2 - learning_rate * db2
        self.W1 = self.W1 - learning_rate * dW1
        self.b1 = self.b1 - learning_rate * db1

    def train(self, X, y, epochs, learning_rate):
        for epoch in range(epochs):
            y_pred = self.forward(X)
            loss = self.compute_loss(y, y_pred)
            self.backward(X, y, y_pred, learning_rate)
            if epoch%100 ==0:
                print(f'Epoch{epoch} Loss: {loss:.4f}')


    def predict(self, X):
        return (self.forward(X) >= 0.5).astype(int)




In [18]:
input_size = X_train.shape[1]
hidden_size =10
output_size =1 

model = NeuralNetwork(input_size, hidden_size, output_size)
model.train(X_train[:1000], y_train[:1000], epochs=100, learning_rate=0.01)

train_pred = model.predict(X_train)
test_pred = model.predict(X_test)

print("\nTraining Metrics:")
print(f"Training Accuracy: {np.mean(train_pred == y_train):.4f}")
print(f"Training F1: {f1_score(y_train, train_pred):.4f}")
print(f"PR-AUC: {average_precision_score(y_train, train_pred):.4f}")
print("Confusion Matrix:\n", confusion_matrix(y_train, train_pred))

print("\nTest Metrics:")
print(f"Test Accuracy: {np.mean(test_pred == y_test):.4f}")
print(f"Test F1: {f1_score(y_test, test_pred):.4f}")
print(f"PR-AUC: {average_precision_score(y_test, test_pred):.4f}")
print("Confusion Matrix:\n", confusion_matrix(y_test, test_pred))

Epoch0 Loss: 2.2427

Training Metrics:
Training Accuracy: 0.7764
Training F1: 0.0868
PR-AUC: 0.2047
Confusion Matrix:
 [[67706  2833]
 [16942   940]]

Test Metrics:
Test Accuracy: 0.7773
Test F1: 0.0800
PR-AUC: 0.2023
Confusion Matrix:
 [[16970   699]
 [ 4223   214]]
