<a href="https://colab.research.google.com/github/Su-ok/MT2025124_ML_Project2/blob/main/MultiNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# ==== IMPORTS ====
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

import warnings
warnings.filterwarnings("ignore")

np.random.seed(42)

# ==== LOAD DATA ====
data = pd.read_csv("/content/drive/MyDrive/ML kaggle data/forest_cover/covtype.csv")

print("Dataset shape:", data.shape)

# ==== PREPROCESSING ====
X = data.drop("Cover_Type", axis=1)
y = data["Cover_Type"]

print("Number of classes:", len(np.unique(y)))

# ==== SCALING ====
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# ==== STRATIFIED TRAIN–VALIDATION SPLIT ====
X_train, X_val, y_train, y_val = train_test_split(
    X_scaled,
    y,
    test_size=0.2,
    random_state=42,
    stratify=y
)

print("X_train:", X_train.shape, "X_val:", X_val.shape)

# ==== DEFINE MULTICLASS MLP ====
mlp = MLPClassifier(
    hidden_layer_sizes=(128, 64),   # two hidden layers
    activation="relu",
    solver="adam",
    alpha=0.0005,                   # L2 regularization to reduce overfitting
    learning_rate_init=0.001,
    early_stopping=True,            # uses 20% of training data as internal validation
    n_iter_no_change=10,
    max_iter=50,                    # MLP on 581k samples will be slow → limit epochs
    random_state=42
)

print("\nTraining Neural Network using adam...")
mlp.fit(X_train, y_train)

# ==== EVALUATION ====
y_train_pred = mlp.predict(X_train)
y_val_pred   = mlp.predict(X_val)

train_acc = accuracy_score(y_train, y_train_pred)
val_acc   = accuracy_score(y_val, y_val_pred)

print(f"\nTraining Accuracy (MLP):   {train_acc:.6f}")
print(f"Validation Accuracy (MLP): {val_acc:.6f}")

# print("\nClassification Report:")
# print(classification_report(y_val, y_val_pred))

# print("\nConfusion Matrix:")
# print(confusion_matrix(y_val, y_val_pred))

print("\nForest Cover MLP training completed successfully.")

Dataset shape: (581012, 55)
Number of classes: 7
X_train: (464809, 54) X_val: (116203, 54)

Training Neural Network using adam...

Training Accuracy (MLP):   0.912601
Validation Accuracy (MLP): 0.904598

Forest Cover MLP training completed successfully.


In [None]:
print("\nClassification Report:")
print(classification_report(y_val, y_val_pred))

print("\nConfusion Matrix:")
print(confusion_matrix(y_val, y_val_pred))


Classification Report:
              precision    recall  f1-score   support

           1       0.90      0.91      0.90     42368
           2       0.92      0.92      0.92     56661
           3       0.86      0.94      0.90      7151
           4       0.81      0.81      0.81       549
           5       0.80      0.69      0.74      1899
           6       0.83      0.76      0.80      3473
           7       0.96      0.87      0.91      4102

    accuracy                           0.90    116203
   macro avg       0.87      0.84      0.85    116203
weighted avg       0.90      0.90      0.90    116203


Confusion Matrix:
[[38415  3766    11     0    37     7   132]
 [ 3764 52033   334     1   270   232    27]
 [    1   124  6698    59    11   258     0]
 [    0     0    81   446     0    22     0]
 [   79   457    47     0  1308     8     0]
 [   11   154   612    45     3  2648     0]
 [  497    35     0     0     1     0  3569]]
