<a href="https://colab.research.google.com/github/ThasanickaSivapragasam/AI-mini-project/blob/main/MiniProject.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from tensorflow.keras.metrics import SparseCategoricalAccuracy
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, f1_score, precision_score, recall_score
import matplotlib.pyplot as plt
import joblib

# EventRecommenderNN class
class EventRecommenderNN:
    def __init__(self, input_dim=8, num_categories=8):
        """
        Neural Network for Event Recommendation System
        Args:
            input_dim: Number of input features (8 event category scores)
            num_categories: Number of event categories to predict (8)
        """
        self.input_dim = input_dim
        self.num_categories = num_categories
        self.model = None
        self.scaler = StandardScaler()
        self.event_categories = [
            'Programming & Coding', 'Entrepreneurship', 'Career Guidance',
            'Leadership & Communication', 'Volunteering', 'Mental Health',
            'Arts & Culture', 'Subject-Specific'
        ]

    def build_model(self):
        """
        Build the neural network architecture
        """

        #Layer 1: Dense 128 + BatchNorm + Dropout(0.3)
        inputs = Input(shape=(self.input_dim,), name='student_preferences')
        x = Dense(128, activation='relu')(inputs)
        x = BatchNormalization()(x)
        x = Dropout(0.3)(x)

        # Layer 2: Dense 64 + BatchNorm + Dropout(0.2)
        x = Dense(64, activation='relu')(x)
        x = BatchNormalization()(x)
        x = Dropout(0.2)(x)

        # Layer 3: Dense 32 + BatchNorm + Dropout(0.1)
        x = Dense(32, activation='relu')(x)
        x = BatchNormalization()(x)
        x = Dropout(0.1)(x)

        #Output layer: 8 categories with softmax for probability distribution
        outputs = Dense(self.num_categories, activation='softmax', name='event_choice')(x)

        #Build & compile model
        self.model = Model(inputs=inputs, outputs=outputs)
        self.model.compile(
            optimizer=Adam(learning_rate=0.001),  # Adam optimizer with lr=0.001
            loss=SparseCategoricalCrossentropy(), # Multi-class classification loss
            metrics=[SparseCategoricalAccuracy()] # Accuracy metric
        )
        return self.model

    def preprocess_data(self, X, y=None, fit_scaler=False):
        """
        Preprocess the input data
        """
        if fit_scaler:
            X_scaled = self.scaler.fit_transform(X) # Fit scaler on training data
        else:
            X_scaled = self.scaler.transform(X) # Use previously fitted scaler

        if y is not None:
            return X_scaled, y
        return X_scaled

    def train(self, X_train, y_train, X_val, y_val, epochs=200, batch_size=32):
        """
        Train the model
        """

        # Scale training and validation data
        X_train_scaled, y_train = self.preprocess_data(X_train, y_train, fit_scaler=True)
        X_val_scaled, y_val = self.preprocess_data(X_val, y_val)


        # Callbacks: Stop training early + Reduce learning rate when stuck
        callbacks = [
            tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True),
            tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=7, min_lr=1e-6)
        ]

        # Train the model
        history = self.model.fit(
            X_train_scaled, y_train,
            validation_data=(X_val_scaled, y_val),
            epochs=epochs,
            batch_size=batch_size,
            callbacks=callbacks,
            verbose=1
        )
        return history


# Plot training curves

def plot_training_history(history):
    plt.figure(figsize=(12, 5))

   # Training vs Validation Loss
    plt.subplot(1, 2, 1)
    plt.plot(history.history['loss'], label='Training Loss', linewidth=2)
    plt.plot(history.history['val_loss'], label='Validation Loss', linewidth=2, linestyle='--')
    plt.title('📉 Model Loss Over Time', fontsize=14, fontweight='bold')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.grid(True, alpha=0.3)

    # Training vs Validation Accuracy
    plt.subplot(1, 2, 2)
    plt.plot(history.history['sparse_categorical_accuracy'], label='Training Accuracy', linewidth=2)
    plt.plot(history.history['val_sparse_categorical_accuracy'], label='Validation Accuracy', linewidth=2, linestyle='--')
    plt.title('🎯 Model Accuracy Over Time', fontsize=14, fontweight='bold')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.grid(True, alpha=0.3)

    plt.tight_layout()
    plt.show()


# Load data for training
def load_data_for_training():
    """
    Load training and validation data from CSV files
    """
    try:
        X_train = pd.read_csv('X_train.csv').values
        y_train = pd.read_csv('y_train.csv').values[:, 0].ravel() # Select the first column before flattening
        X_val = pd.read_csv('X_val.csv').values
        y_val = pd.read_csv('y_val.csv').values[:, 0].ravel() # Select the first column before flattening
        print(f"✅ Training set: {X_train.shape[0]} samples")
        print(f"✅ Validation set: {X_val.shape[0]} samples")
        return X_train, y_train, X_val, y_val
    except Exception as e:
        print(f"❌ Error loading training data: {e}")
        raise


# Load data for evaluation
# Load Test Data

def load_data_for_evaluation():
    """
    Load test data from CSV files
    """
    try:
        X_test = pd.read_csv('X_test.csv').values
        y_test = pd.read_csv('y_test.csv').values[:, 0].ravel() # Select the first column before flattening
        print(f"\n✅ Test set: {X_test.shape[0]} samples")
        return X_test, y_test
    except Exception as e:
        print(f"❌ Error loading test data: {e}")
        raise


# Evaluate model performance
def evaluate_model(recommender, X_test, y_test):
    print("\n📊 Evaluating Model...")
    try:
        # Scale test features
        X_test_scaled = recommender.preprocess_data(X_test) # No need to pass y_test for scaling

        # Compute loss and accuracy
        loss, acc = recommender.model.evaluate(X_test_scaled, y_test, verbose=0)

        # Get Predictions
        predictions = recommender.model.predict(X_test_scaled, verbose=0)
        pred_classes = np.argmax(predictions, axis=1)

        # Top-1 Accuracy
        top1_acc = np.mean(y_test == pred_classes)

        # Top-3 Accuracy
        top3_preds = np.argsort(predictions, axis=1)[:, -3:]
        top3_acc = np.mean([y_test[i] in top3_preds[i] for i in range(len(y_test))])

        # Classification Report/ report
        report = classification_report(y_test, pred_classes, output_dict=True)
        f1_macro = report['macro avg']['f1-score']
        f1_weighted = report['weighted avg']['f1-score']
        precision_macro = report['macro avg']['precision']
        recall_macro = report['macro avg']['recall']


        # Print results
        print("\n🎯 Evaluation Results:")
        print("=" * 60)
        print(f"📈 Top-1 Accuracy: {top1_acc:.4f} ({top1_acc*100:.1f}%)")
        print(f"🏅 Top-3 Accuracy: {top3_acc:.4f} ({top3_acc*100:.1f}%)")
        print(f"📉 Test Loss: {loss:.4f}")
        print(f"📊 F1 Score (Macro): {f1_macro:.4f}")
        print(f"📊 F1 Score (Weighted): {f1_weighted:.4f}")
        print(f"📊 Precision (Macro): {precision_macro:.4f}")
        print(f"📊 Recall (Macro): {recall_macro:.4f}")

        return top1_acc, top3_acc, loss, f1_macro, f1_weighted, precision_macro, recall_macro
    except Exception as e:
        print(f"❌ Evaluation error: {e}")
        return None, None, None, None, None, None, None


# Full Training pipeline
def train_pipeline():
    print("🚀 Starting Event Recommender Training Pipeline")
    print("=" * 60)

        #Load Training Data
    try:
        X_train, y_train, X_val, y_val = load_data_for_training()
    except Exception as e:
        print(f"❌ Training aborted: {e}")
        return


        #Build Model
    print("\n🏗 Building Neural Network Model...")
    recommender = EventRecommenderNN(input_dim=8, num_categories=8)
    model = recommender.build_model()
    model.summary()


        # Train Model
    print("\n🎯 Training Model...")
    history = recommender.train(X_train, y_train, X_val, y_val)


        #Visualize training
    print("\n📈 Visualizing Training Progress...")
    plot_training_history(history)



        # Save model + scaler
    print("\n💾 Saving trained model and scaler...")
    try:
        model.save('event_recommender_model.h5')
        joblib.dump(recommender.scaler, 'scaler.pkl')
        print("✅ Model and scaler saved!")
    except Exception as e:
        print(f"⚠ Could not save: {e}")


        # Evaluate model
    print("\n🔍 Evaluating on Test Data...")
    try:
        X_test, y_test = load_data_for_evaluation()
        evaluate_model(recommender, X_test, y_test)
    except Exception as e:
        print(f"❌ Evaluation failed: {e}")

    return recommender

# Main entry point

if __name__ == "__main__":
    train_pipeline()

IndentationError: unindent does not match any outer indentation level (<tokenize>, line 67)