<a href="https://colab.research.google.com/github/Carine-69/SchoolDropOut-RW/blob/main/SchoolDropOut_NN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [298]:
import numpy as np


In [299]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [300]:
import numpy as np
def load_clean_data(filename, test_size=0.2, random_state=42):
  # Load data using numpy as it handles mixed types and headers better
  df = np.genfromtxt('/content/drive/MyDrive/ML-datasets/SchoolDropOut.csv', delimiter = ',', dtype=float, filling_values = np.nan)

  # Impute missing values with the mean of the respective column.
  for i in range(df.shape[1]):
        col = df[:, i]
        # Find non-NaN values to calculate the mean
        non_nan_indices = ~np.isnan(col)
        # Check if there are any non-NaN values in the column
        if np.any(non_nan_indices):
            mean_val = np.mean(col[non_nan_indices])
            # Replace NaN values with the calculated mean
            col[np.isnan(col)] = mean_val
        else:
            # If the entire column is NaN, fill with 0 or a placeholder
            col[np.isnan(col)] = 0

  return df # Added missing return statement

In [301]:
def preprocess_data(df, test_size=0.2, random_state=42):
    #separate features and labels
    X = df[:, :-1]
    y = df[:, -1].astype(int)

    # Define the sentinel value for invalid integers
    invalid_int_sentinel = -9223372036854775808

    # Remove rows where y is NaN or the invalid integer sentinel
    nan_mask = np.isnan(y)
    invalid_int_mask = (y == invalid_int_sentinel)
    combined_mask = nan_mask | invalid_int_mask

    X = X[~combined_mask]
    y = y[~combined_mask]

    # Check if y is empty after cleaning
    if y.size == 0:
        print("Warning: Target variable 'y' is empty after cleaning. No valid data remains.")
        # Return empty arrays or None values
        return np.array([]), np.array([]), np.array([]), np.array([]), 0


    #convert integers into one hot encoded format
    num_classes = len(np.unique(y))
    y_one_hot = np.eye(num_classes)[y].T

    #split data into training and testing
    np.random.seed(random_state)
    indices = np.random.permutation(X.shape[0])
    test_size = int(X.shape[0] * test_size)
    test_indices = indices[:test_size]
    train_indices = indices[test_size:]

    X_train = X[train_indices]
    y_train = y_one_hot[:, train_indices]
    X_test = X[test_indices]
    y_test = y_one_hot[:, test_indices]

    return X_train, y_train, X_test, y_test, num_classes # Added the missing return statement
    # define activation functions
def relu(z):
  return np.maximum(0, z)

def relu_derivative(a):
  return (a>0).astype(np.float64)

def softmax(z):
  output = np.exp(z - np.max(z, axis=0, keepdims=True))
  return output / np.sum(output, axis=0, keepdims=True)

  # loss function
  def cross_entropy_loss(y_true, y_pred):
    return -np.mean(y_true * np.log(y_pred + 1e-7))

    # define layers
class ThreeLayerNN:
  def __init__(self, input_size, hidden_size, output_size):
    self.input_size = input_size
    self.hidden1_size = hidden_size
    self.hidden2_size = hidden_size
    self.output_size = output_size

      #initializer parameters
  def initialize_parameters(self):
        np.random.seed(1)
        params = {
            'W1': np.random.randn(self.hidden1_size, self.input_size) * 0.01,
            'b1': np.zeros((self.hidden1_size, 1)),
            'W2': np.random.randn(self.hidden2_size, self.hidden1_size) * 0.01,
            'b2': np.zeros((self.hidden2_size, 1)),
            'W3': np.random.randn(self.output_size, self.hidden2_size) * 0.01,
            'b3': np.zeros((self.output_size, 1))
        }
        return params

          # forward propagation
  def forward_propagation(self, x):
        params = self.params
        # layer 1
        l1 = np.dot(params['W1'], x) + params['b1']
        a1 = relu(l1)
        l2 = np.dot(params['W2'], a1) + params['b2']
        a2 = relu(l2)
        l3 = np.dot(params['W3'], a2) + params['b3']
        a3 = softmax(l3)
        cache = (l1, a1, l2, a2, l3, a3)
        return a3, cache

          # backpropagation
  def backpropagation(self, x, y, l1, a1, l2, a2, l3, a3):
        params = self.params
        m = x.shape[1]

        #outer layer
        dl3 = a3 - y
        dW3 = (1/m) * np.dot(dl3, a2.T)
        db3 = (1/m) * np.sum(dl3, axis=1, keepdims=True)

        #second hidden layer
        dl2 = np.dot(params['W3'].T, dl3) * relu_derivative(a2)
        dW2 = (1/m) * np.dot(dl2, a1.T)
        db2 = (1/m) * np.sum(dl2, axis=1, keepdims=True)

        #first hidden layer
        dl1 = np.dot(params['W2'].T, dl2) * relu_derivative(a1)
        dW1 = (1/m) * np.dot(dl1, x.T)
        db1 = (1/m) * np.sum(dl1, axis=1, keepdims=True)

        return dW1, db1, dW2, db2, dW3, db3

          #update parameters
  def upt_params(self, dW1, db1, dW2, db2, dW3, db3, learning_rate):
        params = self.params
        params['W1'] -= learning_rate * dW1
        params['b1'] -= learning_rate * db1
        params['W2'] -= learning_rate * dW2
        params['b2'] -= learning_rate * db2
        params['W3'] -= learning_rate * dW3
        params['b3'] -= learning_rate * db3

          #train using  mini_batches
  def train(self,x_train, y_train,x_val, y_val, epochs, learning_rate, batch_size):
       m_train = x_train.shape[1]
       m_val = x_val.shape[1]
       train_losses, val_losses, train_accs, val_accs = [], [], [], []
       for epoch in range(epochs): #batches
        permutation = np.random.permutation(m_train)
        x_train = x_train[:, permutation]
        y_train = y_train[:, permutation]

        # forward training and pdating parameters
        for j in range(0, m_train, batch_size):
          x_batch = x_train[:, j:j+batch_size]
          y_batch = y_train[:, j:j+batch_size]
          y_pred_batch, cache = self.forward_propagation(x_batch)
          dW1, db1, dW2, db2, dW3, db3 = self.backpropagation(x_batch, y_batch, *cache)
          self.upt_params(dW1, db1, dW2, db2, dW3, db3, learning_rate)

        # record epochs
        y_pred_train, _ = self.forward_propagation(x_train)
        train_acc = self.calculate_accuracy(y_train, y_pred_train)
        train_loss = cross_entropy_loss(y_train, y_pred_train)

        y_pred_val, _ = self.forward_propagation(x_val)
        val_acc = self.calculate_accuracy(y_val, y_pred_val)
        va_loss = cross_entropy_loss(y_val, y_pred_val)

        train_losses.append(train_loss)
        val_losses.append(va_loss)
        train_accs.append(train_acc)
        val_accs.append(val_acc)

        if epoch % 100 == 0:
          print(f'epoch {epoch}: train loss = {train_loss:.4f}, val loss = {va_loss:.4f}, train acc = {train_acc:.4f}, val acc = {val_acc:.4f}')

       return train_losses, val_losses, train_accs, val_accs

  def predict(self, x):
    y_pred, _ = self.forward_propagation(x)
    return np.argmax(y_pred, axis=0)

  def calculate_accuracy(self, y_true, y_pred):
    y_true = np.argmax(y_true, axis=0)
    y_pred = np.argmax(y_pred, axis=0)
    return np.mean(y_true == y_pred)

    # Load the data
data = load_clean_data('/content/drive/MyDrive/ML-datasets/SchoolDropOut.csv')

# Preprocess it into train/test sets
X_train, y_train, X_test, y_test, num_classes = preprocess_data(data)

# Transpose X so it matches the expected input shape in your NN (features x samples)
X_train = X_train.T
X_test = X_test.T

In [302]:
# define activation functions
def relu(z):
  return np.maximum(0, z)

def relu_derivative(a):
  return (a>0).astype(np.float64)

def softmax(z):
  output = np.exp(z - np.max(z, axis=0, keepdims=True))
  return output / np.sum(output, axis=0, keepdims=True)

# loss function
def cross_entropy_loss(y_true, y_pred):
  return -np.mean(y_true * np.log(y_pred + 1e-7))

# define layers
class ThreeLayerNN:
  def __init__(self, input_size, hidden_size, output_size):
    self.input_size = input_size
    self.hidden1_size = hidden_size
    self.hidden2_size = hidden_size
    self.output_size = output_size

      #initializer parameters
  def initialize_parameters(self):
        np.random.seed(1)
        params = {
            'W1': np.random.randn(self.hidden1_size, self.input_size) * 0.01,
            'b1': np.zeros((self.hidden1_size, 1)),
            'W2': np.random.randn(self.hidden2_size, self.hidden1_size) * 0.01,
            'b2': np.zeros((self.hidden2_size, 1)),
            'W3': np.random.randn(self.output_size, self.hidden2_size) * 0.01,
            'b3': np.zeros((self.output_size, 1))
        }
        return params

          # forward propagation
  def forward_propagation(self, x):
        params = self.params
        # layer 1
        l1 = np.dot(params['W1'], x) + params['b1']
        a1 = relu(l1)
        l2 = np.dot(params['W2'], a1) + params['b2']
        a2 = relu(l2)
        l3 = np.dot(params['W3'], a2) + params['b3']
        a3 = softmax(l3)
        cache = (l1, a1, l2, a2, l3, a3)
        return a3, cache

          # backpropagation
  def backpropagation(self, x, y, l1, a1, l2, a2, l3, a3):
        params = self.params
        m = x.shape[1]

        #outer layer
        dl3 = a3 - y
        dW3 = (1/m) * np.dot(dl3, a2.T)
        db3 = (1/m) * np.sum(dl3, axis=1, keepdims=True)

        #second hidden layer
        dl2 = np.dot(params['W3'].T, dl3) * relu_derivative(a2)
        dW2 = (1/m) * np.dot(dl2, a1.T)
        db2 = (1/m) * np.sum(dl2, axis=1, keepdims=True)

        #first hidden layer
        dl1 = np.dot(params['W2'].T, dl2) * relu_derivative(a1)
        dW1 = (1/m) * np.dot(dl1, x.T)
        db1 = (1/m) * np.sum(dl1, axis=1, keepdims=True)

        return dW1, db1, dW2, db2, dW3, db3

          #update parameters
  def upt_params(self, dW1, db1, dW2, db2, dW3, db3, learning_rate):
        params = self.params
        params['W1'] -= learning_rate * dW1
        params['b1'] -= learning_rate * db1
        params['W2'] -= learning_rate * dW2
        params['b2'] -= learning_rate * db2
        params['W3'] -= learning_rate * dW3
        params['b3'] -= learning_rate * db3

          #train using  mini_batches
  def train(self,x_train, y_train,x_val, y_val, epochs, learning_rate, batch_size):
       m_train = x_train.shape[1]
       m_val = x_val.shape[1]
       train_losses, val_losses, train_accs, val_accs = [], [], [], []
       for epoch in range(epochs): #batches
        permutation = np.random.permutation(m_train)
        x_train = x_train[:, permutation]
        y_train = y_train[:, permutation]

        # forward training and pdating parameters
        for j in range(0, m_train, batch_size):
          x_batch = x_train[:, j:j+batch_size]
          y_batch = y_train[:, j:j+batch_size]
          y_pred_batch, cache = self.forward_propagation(x_batch)
          dW1, db1, dW2, db2, dW3, db3 = self.backpropagation(x_batch, y_batch, *cache)
          self.upt_params(dW1, db1, dW2, db2, dW3, db3, learning_rate)

        # record epochs
        y_pred_train, _ = self.forward_propagation(x_train)
        train_acc = self.calculate_accuracy(y_train, y_pred_train)
        train_loss = cross_entropy_loss(y_train, y_pred_train)

        y_pred_val, _ = self.forward_propagation(x_val)
        val_acc = self.calculate_accuracy(y_val, y_pred_val)
        va_loss = cross_entropy_loss(y_val, y_pred_val)

        train_losses.append(train_loss)
        val_losses.append(va_loss)
        train_accs.append(train_acc)
        val_accs.append(val_acc)

        if epoch % 100 == 0:
          print(f'epoch {epoch}: train loss = {train_loss:.4f}, val loss = {va_loss:.4f}, train acc = {train_acc:.4f}, val acc = {val_acc:.4f}')

       return train_losses, val_losses, train_accs, val_accs

  def predict(self, x):
    y_pred, _ = self.forward_propagation(x)
    return np.argmax(y_pred, axis=0)

  def calculate_accuracy(self, y_true, y_pred):
    y_true = np.argmax(y_true, axis=0)
    y_pred = np.argmax(y_pred, axis=0)
    return np.mean(y_true == y_pred)

# Define the compute_metrics function and move it to a proper scope
def compute_metrics(cm):
    precision = np.zeros(cm.shape[0])
    recall = np.zeros(cm.shape[0])
    f1 = np.zeros(cm.shape[0])

    for i in range(cm.shape[0]):
        tp = cm[i, i]
        fp = cm[:, i].sum() - tp
        fn = cm[i, :].sum() - tp

        precision[i] = tp / (tp + fp + 1e-7)
        recall[i] = tp / (tp + fn + 1e-7)
        f1[i] = 2 * precision[i] * recall[i] / (precision[i] + recall[i] + 1e-7)

    accuracy = np.trace(cm) / np.sum(cm)
    return accuracy, precision, recall, f1

# Experiment 1
nn1 = ThreeLayerNN(input_size=X_train.shape[0], hidden_size=32, output_size=num_classes)
nn1.params = nn1.initialize_parameters()
train_losses1, val_losses1, train_accs1, val_accs1 = nn1.train(
    X_train, y_train, X_test, y_test, epochs=300, learning_rate=0.01, batch_size=64
)

# Experiment 2
nn2 = ThreeLayerNN(input_size=X_train.shape[0], hidden_size=64, output_size=num_classes)
nn2.params = nn2.initialize_parameters()
train_losses2, val_losses2, train_accs2, val_accs2 = nn2.train(
    X_train, y_train, X_test, y_test, epochs=300, learning_rate=0.005, batch_size=64
)

print("Experiment 1:")
for i in range(0, 300, 50):
    print(f"Epoch {i}: Train Loss={train_losses1[i]:.4f}, Val Loss={val_losses1[i]:.4f}, Train Acc={train_accs1[i]:.4f}, Val Acc={val_accs1[i]:.4f}")

print("\nExperiment 2:")
for i in range(0, 300, 50):
    print(f"Epoch {i}: Train Loss={train_losses2[i]:.4f}, Val Loss={val_losses2[i]:.4f}, Train Acc={train_accs2[i]:.4f}, Val Acc={val_accs2[i]:.4f}")

epoch 0: train loss = -0.0000, val loss = -0.0000, train acc = 1.0000, val acc = 1.0000
epoch 100: train loss = -0.0000, val loss = -0.0000, train acc = 1.0000, val acc = 1.0000
epoch 200: train loss = -0.0000, val loss = -0.0000, train acc = 1.0000, val acc = 1.0000
epoch 0: train loss = -0.0000, val loss = -0.0000, train acc = 1.0000, val acc = 1.0000
epoch 100: train loss = -0.0000, val loss = -0.0000, train acc = 1.0000, val acc = 1.0000
epoch 200: train loss = -0.0000, val loss = -0.0000, train acc = 1.0000, val acc = 1.0000
Experiment 1:
Epoch 0: Train Loss=-0.0000, Val Loss=-0.0000, Train Acc=1.0000, Val Acc=1.0000
Epoch 50: Train Loss=-0.0000, Val Loss=-0.0000, Train Acc=1.0000, Val Acc=1.0000
Epoch 100: Train Loss=-0.0000, Val Loss=-0.0000, Train Acc=1.0000, Val Acc=1.0000
Epoch 150: Train Loss=-0.0000, Val Loss=-0.0000, Train Acc=1.0000, Val Acc=1.0000
Epoch 200: Train Loss=-0.0000, Val Loss=-0.0000, Train Acc=1.0000, Val Acc=1.0000
Epoch 250: Train Loss=-0.0000, Val Loss=-0.