In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [3]:
file_path = r"C:/Users/VICTUS/vs items/Codes/Python Codes/archive/KaggleV2-May-2016.csv"
df = pd.read_csv(file_path)
df.columns

Index(['PatientId', 'AppointmentID', 'Gender', 'ScheduledDay',
       'AppointmentDay', 'Age', 'Neighbourhood', 'Scholarship', 'Hipertension',
       'Diabetes', 'Alcoholism', 'Handcap', 'SMS_received', 'No-show'],
      dtype='object')

In [4]:
df.loc[df['No-show'] == 'Yes', 'No-show'] = 1
df.loc[df['No-show'] == 'No', 'No-show'] = 0
df.loc[df['Gender'] == 'M', 'Gender'] = 1
df.loc[df['Gender'] == 'F', 'Gender'] = 2

features = ['Gender', 'Age', 'Scholarship', 'Hipertension', 'Diabetes', 'Alcoholism', 'Handcap', 'SMS_received']

y = df['No-show']

X = df[features].astype(np.float64).values  # Convert to numpy array
X = np.nan_to_num(X)  # Replace NaNs with 0 if any remain

y = y.astype(int).values

y.shape

(110527,)

In [5]:
df.dropna()
df.shape

(110527, 14)

In [None]:
class Layer_Dense:
    def __init__(self, n_inputs, n_neurons):
        self.weights = 0.10*np.random.randn(n_inputs, n_neurons) # We put inputs before neuron numbers because it saves us from the hardwork of trnasposing
        self.biases = np.zeros((1, n_neurons)) # we add a tuple to determine shape
    def forward(self, inputs):
        self.output = np.dot(inputs, self.weights) + self.biases

class Activation_ReLU:
    def forward(self, inputs):
        self.output = np.maximum(0, inputs)

class Activation_Softmax:
    def forward(self, inputs):
        exp_values = np.exp(inputs - np.max(inputs, axis = 1, keepdims = True)) #This subtraction is performed to prevent overflow
        probabilities = exp_values / np.sum(exp_values, axis=1, keepdims=True)
        self.output = probabilities

class Loss:
    def calculate(self, output, y):
        sample_loss = self.forward(output, y)
        data_loss = np.mean(sample_loss)
        return sample_loss

class Loss_CategoricalCrossEntropy(Loss):
    def forward(self, y_pred, y_true):
        samples = len(y_pred)
        y_pred_clipped = np.clip(y_pred, 1e-7, 1-1e-7)

        if len(y_true.shape) == 1: #Scalar class values handling
            correct_confidences = y_pred_clipped[range(samples), y_true]
        elif len(y_true.shape) == 2: #one hot encoded vectors handling
            correct_confidences = np.sum(y_pred_clipped*y_true, axis = 1)

        negative_log_likelihoods = -np.log(correct_confidences)
        return negative_log_likelihoods

# 1/N * (w*x - y)**2

def gradient(x, y, y_pred):
    return np.dot(2*x, y_pred - y).mean()

layer1 = Layer_Dense(8, 16)
activation1 = Activation_ReLU()

layer2 = Layer_Dense(16, 2)
activation2 = Activation_Softmax()

loss_function = Loss_CategoricalCrossEntropy()

# Training step

learning_rate = 0.01
epochs = 500

for epoch in range(epochs):
    # ---- FORWARD PASS ----
    layer1.forward(X)
    activation1.forward(layer1.output)

    layer2.forward(activation1.output)
    activation2.forward(layer2.output)

    # ---- LOSS ----
    loss = loss_function.forward(activation2.output, y)
    avg_loss = np.mean(loss)
    predictions = np.argmax(activation2.output, axis=1)
    accuracy = np.mean(predictions == y)
    
    if epoch % 10 == 0:
        print(f"Epoch {epoch}, Loss: {avg_loss:.4f}, Accuracy: {accuracy:.4f}")

    # ---- BACKWARD PASS (manual gradient descent) ----

    # One-hot case
    if len(y.shape) == 1:
        y_true = np.eye(2)[y]  # Convert scalar class to one-hot
    else:
        y_true = y

    # Gradient of loss w.r.t. output (Softmax + CrossEntropy derivative)
    dvalues = activation2.output - y_true
    dvalues /= X.shape[0]  # Normalize gradient by batch size

    # Gradients for layer2
    dweights2 = np.dot(activation1.output.T, dvalues)
    dbiases2 = np.sum(dvalues, axis=0, keepdims=True)

    # Gradient flowing into layer1
    dactivation1 = np.dot(dvalues, layer2.weights.T)
    dactivation1[activation1.output <= 0] = 0  # ReLU derivative

    dweights1 = np.dot(X.T, dactivation1)
    dbiases1 = np.sum(dactivation1, axis=0, keepdims=True)

    # ---- WEIGHT UPDATE ----
    layer2.weights -= learning_rate * dweights2
    layer2.biases -= learning_rate * dbiases2

    layer1.weights -= learning_rate * dweights1
    layer1.biases -= learning_rate * dbiases1

print("Training over!")

Epoch 0, Loss: 0.5493, Accuracy: 0.7776
Epoch 10, Loss: 0.5341, Accuracy: 0.7860
Epoch 20, Loss: 0.5332, Accuracy: 0.7959
Epoch 30, Loss: 0.5323, Accuracy: 0.7979
Epoch 40, Loss: 0.5315, Accuracy: 0.7980
Epoch 50, Loss: 0.5307, Accuracy: 0.7980
Epoch 60, Loss: 0.5300, Accuracy: 0.7981
Epoch 70, Loss: 0.5292, Accuracy: 0.7981
Epoch 80, Loss: 0.5285, Accuracy: 0.7981
Epoch 90, Loss: 0.5278, Accuracy: 0.7981
Epoch 100, Loss: 0.5271, Accuracy: 0.7981
Epoch 110, Loss: 0.5264, Accuracy: 0.7981
Epoch 120, Loss: 0.5257, Accuracy: 0.7981
Epoch 130, Loss: 0.5251, Accuracy: 0.7981
Epoch 140, Loss: 0.5245, Accuracy: 0.7981
Epoch 150, Loss: 0.5239, Accuracy: 0.7981
Epoch 160, Loss: 0.5233, Accuracy: 0.7981
Epoch 170, Loss: 0.5227, Accuracy: 0.7981
Epoch 180, Loss: 0.5222, Accuracy: 0.7981
Epoch 190, Loss: 0.5216, Accuracy: 0.7981
Epoch 200, Loss: 0.5211, Accuracy: 0.7981
Epoch 210, Loss: 0.5206, Accuracy: 0.7981
Epoch 220, Loss: 0.5201, Accuracy: 0.7981
Epoch 230, Loss: 0.5196, Accuracy: 0.7981
Epo

In [9]:
loss = loss_function.calculate(activation2.output, y)

0.798067440534892