In [7]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, precision_recall_curve, auc, confusion_matrix
import psutil


In [3]:
from google.colab import drive
try:
        drive.mount('/content/drive')
except Exception as e:
        print(f"An error occurred during mounting: {e}")

import zipfile
zip_path = '/content/drive/MyDrive/KaggleV2-May-2016.csv.zip'
extract_path = '/content/drive/MyDrive/'
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_path)

Mounted at /content/drive


In [4]:
def relu(z):
    return np.maximum(0, z)

def relu_prime(z):
    return (z > 0).astype(float)

def softmax(z):
    new_z = np.exp(z - np.max(z))
    return new_z / np.sum(new_z, axis=0, keepdims=True)


class NeuralNetwork(object):

    def __init__(self, architecture):

        self.L = architecture.size - 1
        self.n = architecture

        self.parameters = {}

        for i in range (1, self.L + 1):
            self.parameters['W' + str(i)] = np.random.randn(self.n[i], self.n[i - 1]) * np.sqrt(2. /self.n[i-1])

            self.parameters['b' + str(i)] = np.ones((self.n[i], 1))
            self.parameters['z' + str(i)] = np.ones((self.n[i], 1))
            self.parameters['a' + str(i)] = np.ones((self.n[i], 1))

        self.parameters['a0'] = np.ones((self.n[0], 1))

        self.parameters['C'] = 1

        self.derivatives = {}

    def forward_prop(self, X):
        self.parameters['a0'] = X

        for l in range(1, self.L + 1):
            self.parameters['z' + str(l)] = np.dot(self.parameters['W' + str(l)], self.parameters['a' + str(l - 1)]) + self.parameters['b' + str(l)]

            if l == self.L:
                self.parameters['a' + str(l)] = softmax(self.parameters['z' + str(l)])
            else:
                self.parameters['a' + str(l)] = relu(self.parameters['z' + str(l)])

    def cost(self, y):
        aL = self.parameters['a' + str(self.L)]
        self.parameters['C'] = -np.sum(y * np.log(aL + 1e-15))

    def back_prop(self, y):
        self.derivatives['dz' + str(self.L)] = self.parameters['a' + str(self.L)] - y
        self.derivatives['dW' + str(self.L)] = np.dot(self.derivatives['dz' + str(self.L)],self.parameters['a' + str(self.L - 1)].T)

        self.derivatives['db' + str(self.L)] = self.derivatives['dz' + str(self.L)]

        for l in range(self.L - 1, 0, -1):
            self.derivatives['dz' + str(l)] = (np.dot(self.parameters['W' + str(l + 1)].T, self.derivatives['dz' + str(l + 1)]) * relu_prime(self.parameters['z' + str(l)]))
            self.derivatives['dW' + str(l)] = np.dot(self.derivatives['dz' + str(l)],self.parameters['a' + str(l - 1)].T)
            self.derivatives['db' + str(l)] = self.derivatives['dz' + str(l)]

    def update_parameters(self, alpha):
        for l in range(1, self.L+1):
            self.parameters['W' + str(l)] -= alpha*self.derivatives['dW' + str(l)]
            self.parameters['b' + str(l)] -= alpha*self.derivatives['db' + str(l)]

    def predict(self, x):
        self.forward_prop(x)
        return self.parameters['a' + str(self.L)]

    def fit(self, X, Y, epochs, alpha):
        for j in range(0, epochs):
            c = 0
            n_c = 0

            for i in range(0, X.shape[0]):
              x = X[i].reshape((X[i].size, 1))
              y = Y[i].reshape((-1,1))

              self.forward_prop(x)
              self.cost(y)
              self.back_prop(y)
              self.update_parameters(alpha)

              c += self.parameters['C']

              y_pred = self.predict(x)
              if np.argmax(y_pred) == np.argmax(y):
                n_c += 1


            c = c/X.shape[0]
            print('Iteration: ', j)
            print("Cost :",c)
            print("Accuracy : ",(n_c/X.shape[0])*100)

In [5]:
df = pd.read_csv('/content/drive/MyDrive/KaggleV2-May-2016.csv')

df=df.drop(columns=['PatientId','AppointmentID','ScheduledDay','AppointmentDay',])
df.rename(columns={'No-show':'given'},inplace = True)
df['given'] = df['given'].map({'No':0,'Yes':1})
df['Gender'] = df['Gender'].map({'F':0,'M':1})

df = pd.get_dummies(df, columns=["Neighbourhood"])

features = ['Gender', 'Age', 'Scholarship', 'Hipertension',
            'Diabetes', 'Alcoholism', 'Handcap', 'SMS_received'] + [col for col in df.columns if col.startswith("Neighbourhood_")]

X = df[features].values
y = df['given'].values

SS_X = StandardScaler()
X = SS_X.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X,y,test_size = 0.3)
y_train_encoded = np.zeros((y_train.size, 2))
y_train_encoded[np.arange(y_train.size), y_train] = 1

input_size = X_train.shape[1]  # actual number of input features
architecture = np.array([input_size, 64, 32, 2])
classifier = NeuralNetwork(architecture)

classifier.fit(X_train,y_train_encoded,200,1e-4)



Iteration:  0
Cost : 0.5267908727478853
Accuracy :  79.12832178678524
Iteration:  1
Cost : 0.49803903721949144
Accuracy :  79.68281460035158
Iteration:  2
Cost : 0.49618013285774765
Accuracy :  79.68539964843346
Iteration:  3
Cost : 0.4952571775574647
Accuracy :  79.68927722055629
Iteration:  4
Cost : 0.4947035176145459
Accuracy :  79.68669217247441
Iteration:  5
Cost : 0.49429998767130784
Accuracy :  79.68669217247441
Iteration:  6
Cost : 0.4939839862622645
Accuracy :  79.68669217247441
Iteration:  7
Cost : 0.49371342140637897
Accuracy :  79.68669217247441
Iteration:  8
Cost : 0.49347303818234955
Accuracy :  79.68669217247441
Iteration:  9
Cost : 0.49325885462866825
Accuracy :  79.68669217247441
Iteration:  10
Cost : 0.49306174302116984
Accuracy :  79.68798469651536
Iteration:  11
Cost : 0.4928883958968034
Accuracy :  79.68798469651536
Iteration:  12
Cost : 0.4927300055888875
Accuracy :  79.68798469651536
Iteration:  13
Cost : 0.492577166204943
Accuracy :  79.68798469651536
Iteration:

In [9]:
y_preds = []
y_probs = []

for i in range(X_test.shape[0]):
    x = X_test[i].reshape((-1, 1))
    prob = classifier.predict(x)
    y_probs.append(prob.ravel())
    y_preds.append(np.argmax(prob))

y_probs = np.array(y_probs)
y_preds = np.array(y_preds)

# Accuracy
acc = accuracy_score(y_test, y_preds)
print(f"Accuracy: {acc:.4f}")

# F1 Score
f1 = f1_score(y_test, y_preds)
print(f"F1 Score: {f1:.4f}")

# PR AUC
precision, recall, _ = precision_recall_curve(y_test, y_probs[:, 1])
pr_auc = auc(recall, precision)
print(f"PR AUC: {pr_auc:.4f}")

# Confusion Matrix
cm = confusion_matrix(y_test, y_preds)
print("Confusion Matrix:")
print(cm)

def get_memory_usage():
    process = psutil.Process()
    mem_info = process.memory_info()
    memory_used_mb = mem_info.rss / 1024 ** 2
    print("Memory Usage (MB):")
    print(memory_used_mb)
get_memory_usage()

Accuracy: 0.8010
F1 Score: 0.0140
PR AUC: 0.2759
Confusion Matrix:
[[26512    50]
 [ 6550    47]]
Memory Usage (MB):
451.25
