# ANN From Scratch

## Setup Import

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import time
import joblib
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

## Implementasi ANN


In [2]:
class NeuralNetwork:
    def __init__(self, input_size, hidden_layers, output_size, activation='relu'):
        """
        Initialize Neural Network Architecture
        
        Parameters:
        -----------
        input_size : int
            Number of neurons in input layer (number of features)
        hidden_layers : list
            List containing number of neurons for each hidden layer
        output_size : int
            Number of neurons in output layer (number of classes)
        activation : str
            Activation function to use ('relu' or 'tanh')
        """
        self.input_size = input_size
        self.hidden_layers = hidden_layers
        self.output_size = output_size
        self.activation = activation
        
        # Layer sizes (including input and output)
        self.layer_sizes = [input_size] + hidden_layers + [output_size]
        
        # Initialize weights and biases
        self.weights = []
        self.biases = []
        
        # He initialization for ReLU, Xavier/Glorot for tanh
        for i in range(len(self.layer_sizes) - 1):
            if activation == 'relu' and i < len(self.layer_sizes) - 2:  # ReLU for hidden layers
                scale = np.sqrt(2 / self.layer_sizes[i])  # He initialization
            else:  # tanh or output layer
                scale = np.sqrt(1 / self.layer_sizes[i])  # Xavier initialization
                
            w = np.random.randn(self.layer_sizes[i], self.layer_sizes[i+1]) * scale
            b = np.zeros((1, self.layer_sizes[i+1]))
            
            self.weights.append(w)
            self.biases.append(b)
        
        # Store activation and pre-activation values for backprop
        self.z_values = []  # pre-activation
        self.a_values = []  # activation
        
    # Activation functions
    def relu(self, x):
        """ReLU activation function"""
        return np.maximum(0, x)
    
    def relu_derivative(self, x):
        """Derivative of ReLU activation function"""
        return np.where(x > 0, 1, 0)
    
    def tanh(self, x):
        """tanh activation function"""
        return np.tanh(x)
    
    def tanh_derivative(self, x):
        """Derivative of tanh activation function"""
        return 1 - np.tanh(x)**2
    
    def softmax(self, x):
        """Softmax activation function for output layer"""
        exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))  # Numerical stability
        return exp_x / np.sum(exp_x, axis=1, keepdims=True)
    
    # Forward propagation
    def forward(self, X):
        """
        Forward propagation
        
        Parameters:
        -----------
        X : numpy.ndarray
            Input data with shape (n_samples, input_size)
            
        Returns:
        --------
        numpy.ndarray
            Network output, probabilities for each class
        """
        self.z_values = []
        self.a_values = [X]  # Input layer activation
        
        # Propagate through hidden layers
        for i in range(len(self.weights) - 1):
            z = np.dot(self.a_values[-1], self.weights[i]) + self.biases[i]
            self.z_values.append(z)
            
            # Activation with selected function
            if self.activation == 'relu':
                a = self.relu(z)
            else:  # 'tanh'
                a = self.tanh(z)
                
            self.a_values.append(a)
        
        # Output layer with softmax
        z_out = np.dot(self.a_values[-1], self.weights[-1]) + self.biases[-1]
        self.z_values.append(z_out)
        
        # Softmax activation for output layer
        output = self.softmax(z_out)
        self.a_values.append(output)
        
        return output
    
    # Loss function
    def categorical_crossentropy(self, y_true, y_pred):
        """
        Categorical Crossentropy Loss
        
        Parameters:
        -----------
        y_true : numpy.ndarray
            One-hot encoded true labels
        y_pred : numpy.ndarray
            Predicted probabilities
            
        Returns:
        --------
        float
            Mean loss value
        """
        # Clip values to prevent log(0)
        y_pred = np.clip(y_pred, 1e-15, 1 - 1e-15)
        
        # Calculate cross-entropy
        loss = -np.sum(y_true * np.log(y_pred)) / y_true.shape[0]
        return loss
    
    # Backward propagation
    def backward(self, X, y):
        """
        Backward propagation to calculate gradients
        
        Parameters:
        -----------
        X : numpy.ndarray
            Input data
        y : numpy.ndarray
            One-hot encoded true labels
            
        Returns:
        --------
        list, list
            Gradients for weights and biases
        """
        m = X.shape[0]  # Number of samples
        
        # Initialize dw and db
        dw = [np.zeros_like(w) for w in self.weights]
        db = [np.zeros_like(b) for b in self.biases]
        
        # Output layer error (derivative of softmax+crossentropy)
        delta = self.a_values[-1] - y  # Simplified from softmax + crossentropy backprop
        
        # Backprop for output layer
        dw[-1] = np.dot(self.a_values[-2].T, delta) / m
        db[-1] = np.sum(delta, axis=0, keepdims=True) / m
        
        # Backprop for hidden layers
        for l in range(len(self.weights) - 2, -1, -1):
            # Propagate error
            delta = np.dot(delta, self.weights[l+1].T)
            
            # Apply activation derivative
            if self.activation == 'relu':
                delta *= self.relu_derivative(self.z_values[l])
            else:  # 'tanh'
                delta *= self.tanh_derivative(self.z_values[l])
            
            # Calculate gradients
            dw[l] = np.dot(self.a_values[l].T, delta) / m
            db[l] = np.sum(delta, axis=0, keepdims=True) / m
        
        return dw, db
    
    # Update weights and biases
    def update_params(self, dw, db, learning_rate):
        """
        Update parameters with gradient descent
        
        Parameters:
        -----------
        dw : list
            Gradients for weights
        db : list
            Gradients for biases
        learning_rate : float
            Learning rate
        """
        for i in range(len(self.weights)):
            self.weights[i] -= learning_rate * dw[i]
            self.biases[i] -= learning_rate * db[i]
    
    # Predict function
    def predict(self, X):
        """
        Make predictions
        
        Parameters:
        -----------
        X : numpy.ndarray
            Input data
            
        Returns:
        --------
        numpy.ndarray
            Predicted class indices
        """
        output = self.forward(X)
        return np.argmax(output, axis=1)
    
    # Save and load model
    def save_model(self, filepath):
        """Save model parameters to file"""
        model_params = {
            'weights': self.weights,
            'biases': self.biases,
            'layer_sizes': self.layer_sizes,
            'activation': self.activation
        }
        np.save(filepath, model_params)
        print(f"Model saved to {filepath}")
        
    def load_model(self, filepath):
        """Load model parameters from file"""
        model_params = np.load(filepath, allow_pickle=True).item()
        self.weights = model_params['weights']
        self.biases = model_params['biases']
        self.layer_sizes = model_params['layer_sizes']
        self.activation = model_params['activation']
        print(f"Model loaded from {filepath}")

## Training and Evaluation 

In [3]:
def train(model, X_train, y_train, X_val, y_val, epochs, batch_size, learning_rate):
    """
    Function to train model
    
    Parameters:
    -----------
    model : NeuralNetwork
        Neural network model
    X_train : numpy.ndarray
        Training features
    y_train : numpy.ndarray
        Training labels (one-hot encoded)
    X_val : numpy.ndarray
        Validation features
    y_val : numpy.ndarray
        Validation labels (one-hot encoded)
    epochs : int
        Number of epochs
    batch_size : int
        Batch size
    learning_rate : float
        Learning rate
        
    Returns:
    --------
    dict
        Training history (loss and accuracy)
    """
    n_samples = X_train.shape[0]
    train_losses = []
    train_accuracies = []
    val_losses = []
    val_accuracies = []
    
    for epoch in range(epochs):
        epoch_start_time = time.time()
        
        # Shuffle training data
        indices = np.random.permutation(n_samples)
        X_shuffled = X_train[indices]
        y_shuffled = y_train[indices]
        
        # Mini-batch training
        for i in range(0, n_samples, batch_size):
            X_batch = X_shuffled[i:i+batch_size]
            y_batch = y_shuffled[i:i+batch_size]
            
            # Forward pass
            output = model.forward(X_batch)
            
            # Backward pass
            dw, db = model.backward(X_batch, y_batch)
            
            # Update parameters
            model.update_params(dw, db, learning_rate)
        
        # Calculate loss and accuracy for training data
        train_output = model.forward(X_train)
        train_loss = model.categorical_crossentropy(y_train, train_output)
        train_pred = np.argmax(train_output, axis=1)
        train_true = np.argmax(y_train, axis=1)
        train_acc = accuracy_score(train_true, train_pred)
        
        # Calculate loss and accuracy for validation data
        val_output = model.forward(X_val)
        val_loss = model.categorical_crossentropy(y_val, val_output)
        val_pred = np.argmax(val_output, axis=1)
        val_true = np.argmax(y_val, axis=1)
        val_acc = accuracy_score(val_true, val_pred)
        
        # Store metrics
        train_losses.append(train_loss)
        train_accuracies.append(train_acc)
        val_losses.append(val_loss)
        val_accuracies.append(val_acc)
        
        epoch_time = time.time() - epoch_start_time
        
        # Print progress
        print(f"Epoch {epoch+1}/{epochs} - {epoch_time:.2f}s - "
              f"loss: {train_loss:.4f} - acc: {train_acc:.4f} - "
              f"val_loss: {val_loss:.4f} - val_acc: {val_acc:.4f}")
    
    history = {
        'train_loss': train_losses,
        'train_acc': train_accuracies,
        'val_loss': val_losses,
        'val_acc': val_accuracies
    }
    
    return history

def evaluate_model(model, X_test, y_test, class_names=None):
    """
    Evaluate model with metrics
    
    Parameters:
    -----------
    model : NeuralNetwork
        Neural network model
    X_test : numpy.ndarray
        Test features
    y_test : numpy.ndarray
        Test labels (one-hot encoded)
    class_names : list
        Class names for confusion matrix
        
    Returns:
    --------
    dict
        Evaluation metrics
    """
    # Predictions
    y_pred = model.predict(X_test)
    y_true = np.argmax(y_test, axis=1)
    
    # Accuracy
    acc = accuracy_score(y_true, y_pred)
    print(f"Test Accuracy: {acc:.4f}")
    
    # Confusion Matrix
    cm = confusion_matrix(y_true, y_pred)
    
    # Classification Report
    if class_names is None:
        class_names = [f"Class {i}" for i in range(model.output_size)]
    
    cr = classification_report(y_true, y_pred, target_names=class_names, output_dict=True)
    print("\nClassification Report:")
    cr_df = pd.DataFrame(cr).transpose()
    print(cr_df)
    
    # Plot Confusion Matrix
    plt.figure(figsize=(10, 8))
    plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
    plt.title('Confusion Matrix')
    plt.colorbar()
    tick_marks = np.arange(len(class_names))
    plt.xticks(tick_marks, class_names, rotation=45)
    plt.yticks(tick_marks, class_names)
    
    # Label with counts
    thresh = cm.max() / 2
    for i in range(cm.shape[0]):
        for j in range(cm.shape[1]):
            plt.text(j, i, format(cm[i, j], 'd'),
                     horizontalalignment="center",
                     color="white" if cm[i, j] > thresh else "black")
    
    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.savefig('confusion_matrix.png')
    plt.close()
    
    # Return metrics
    metrics = {
        'accuracy': acc,
        'confusion_matrix': cm,
        'classification_report': cr
    }
    
    return metrics

## Hyperparameter Tuning with Grid Search

In [4]:
def grid_search(X_train, y_train, X_val, y_val, X_test, y_test, param_grid):
    """
    Grid search for hyperparameter tuning
    
    Parameters:
    -----------
    X_train : numpy.ndarray
        Training features
    y_train : numpy.ndarray
        Training labels (one-hot encoded)
    X_val : numpy.ndarray
        Validation features
    y_val : numpy.ndarray
        Validation labels (one-hot encoded)
    X_test : numpy.ndarray
        Test features
    y_test : numpy.ndarray
        Test labels (one-hot encoded)
    param_grid : dict
        Parameter grid for tuning
        
    Returns:
    --------
    dict
        Best parameters and model
    """
    input_size = X_train.shape[1]
    output_size = y_train.shape[1]
    
    results = []
    best_val_acc = 0
    best_params = None
    best_model = None
    
    # All hyperparameter combinations
    hidden_layers_options = param_grid['hidden_layers']
    activations = param_grid['activation']
    learning_rates = param_grid['learning_rate']
    batch_sizes = param_grid['batch_size']
    epochs_options = param_grid['epochs']
    
    total_combinations = (len(hidden_layers_options) * len(activations) * 
                         len(learning_rates) * len(batch_sizes) * len(epochs_options))
    
    print(f"Total combinations to try: {total_combinations}")
    
    combination_count = 0
    start_time = time.time()
    
    # Iterate through all combinations
    for hidden_layers in hidden_layers_options:
        for activation in activations:
            for learning_rate in learning_rates:
                for batch_size in batch_sizes:
                    for epochs in epochs_options:
                        combination_count += 1
                        print(f"\nCombination {combination_count}/{total_combinations}:")
                        print(f"Hidden Layers: {hidden_layers}, Activation: {activation}, "
                              f"Learning Rate: {learning_rate}, Batch Size: {batch_size}, Epochs: {epochs}")
                        
                        # Initialize model
                        model = NeuralNetwork(input_size, hidden_layers, output_size, activation)
                        
                        # Train model
                        history = train(model, X_train, y_train, X_val, y_val, 
                                       epochs, batch_size, learning_rate)
                        
                        # Get validation accuracy
                        val_acc = history['val_acc'][-1]
                        
                        # Evaluate on test set
                        test_pred = model.predict(X_test)
                        test_true = np.argmax(y_test, axis=1)
                        test_acc = accuracy_score(test_true, test_pred)
                        
                        # Store results
                        params_dict = {
                            'hidden_layers': hidden_layers,
                            'activation': activation,
                            'learning_rate': learning_rate,
                            'batch_size': batch_size,
                            'epochs': epochs,
                            'val_accuracy': val_acc,
                            'test_accuracy': test_acc
                        }
                        
                        results.append(params_dict)
                        
                        # Check if this is the best model so far
                        if val_acc > best_val_acc:
                            best_val_acc = val_acc
                            best_params = params_dict
                            best_model = model
                            
                            # Save best model
                            model.save_model('models/best_model.npy')
                            
                            print(f"New best model found! Validation accuracy: {best_val_acc:.4f}")
    
    total_time = time.time() - start_time
    print(f"\nGrid search completed in {total_time:.2f} seconds.")
    
    # Sort results by validation accuracy
    results_df = pd.DataFrame(results)
    results_df = results_df.sort_values('val_accuracy', ascending=False)
    results_df.to_csv('hyperparameter_tuning_results.csv', index=False)
    
    print("\nTop 5 Parameter Combinations:")
    print(results_df.head(5))
    
    return {
        'best_params': best_params,
        'best_model': best_model,
        'results_df': results_df
    }

## Main

In [5]:
def main():
    print("Loading data...")
    # Load data
    X_train = np.load('result/X_train.npy')
    X_test = np.load('result/X_test.npy')
    y_train = np.load('result/y_train.npy')
    y_test = np.load('result/y_test.npy')
    
    print(f"Data loaded - X_train: {X_train.shape}, y_train: {y_train.shape}, X_test: {X_test.shape}, y_test: {y_test.shape}")
    
    # Load OneHotEncoder to get class names
    encoder = joblib.load('result/onehot_encoder.pkl')
    class_names = encoder.categories_[0]
    
    # Split training data into train and validation sets
    np.random.seed(42)
    validation_split = 0.1  # 10% of training data for validation
    n_train = X_train.shape[0]
    indices = np.random.permutation(n_train)
    n_val = int(n_train * validation_split)
    
    X_val = X_train[indices[:n_val]]
    y_val = y_train[indices[:n_val]]
    X_train_new = X_train[indices[n_val:]]
    y_train_new = y_train[indices[n_val:]]
    
    print(f"Train-validation split - X_train: {X_train_new.shape}, X_val: {X_val.shape}")
    
    # Define parameter grid
    param_grid = {
        'hidden_layers': [
            [64, 32],
            [128, 64],
            [128, 128],
            [256, 128],
            [128, 64, 32],
            [256, 128, 64],
            [512, 256, 128]
        ],
        'activation': ['relu', 'tanh'],
        'learning_rate': [0.01, 0.001, 0.0001],
        'batch_size': [32, 64],
        'epochs': [50, 100, 150]
    }
    
    print("\nStarting hyperparameter tuning...")
    tuning_results = grid_search(X_train_new, y_train_new, X_val, y_val, X_test, y_test, param_grid)
    
    print("\nBest parameters:")
    for key, value in tuning_results['best_params'].items():
        print(f"{key}: {value}")
    
    # Final evaluation of best model
    best_model = tuning_results['best_model']
    print("\nFinal evaluation of best model on test set:")
    metrics = evaluate_model(best_model, X_test, y_test, class_names)
    
    # Plot training history
    print("\nTraining completed!")

## Run the Main Function

In [6]:
if __name__ == "__main__":
    main()

Loading data...
Data loaded - X_train: (4940, 289), y_train: (4940, 3), X_test: (1235, 289), y_test: (1235, 3)
Train-validation split - X_train: (4446, 289), X_val: (494, 289)

Starting hyperparameter tuning...
Total combinations to try: 252

Combination 1/252:
Hidden Layers: [64, 32], Activation: relu, Learning Rate: 0.01, Batch Size: 32, Epochs: 50
Epoch 1/50 - 0.04s - loss: 1.0823 - acc: 0.4339 - val_loss: 1.0880 - val_acc: 0.3684
Epoch 2/50 - 0.04s - loss: 1.0709 - acc: 0.4447 - val_loss: 1.0798 - val_acc: 0.4231
Epoch 3/50 - 0.04s - loss: 1.0618 - acc: 0.4537 - val_loss: 1.0762 - val_acc: 0.4028
Epoch 4/50 - 0.04s - loss: 1.0547 - acc: 0.4638 - val_loss: 1.0719 - val_acc: 0.4049
Epoch 5/50 - 0.05s - loss: 1.0478 - acc: 0.4843 - val_loss: 1.0660 - val_acc: 0.4494
Epoch 6/50 - 0.04s - loss: 1.0419 - acc: 0.4843 - val_loss: 1.0624 - val_acc: 0.4453
Epoch 7/50 - 0.04s - loss: 1.0369 - acc: 0.4861 - val_loss: 1.0606 - val_acc: 0.4453
Epoch 8/50 - 0.05s - loss: 1.0312 - acc: 0.4870 - va