# AI Assignment: Classification Models on Covertype Dataset
Author: [Ravindu Perera]

This notebook implements and evaluates Support Vector Machine (SVM), Naïve Bayes (NB), and Deep Neural Network (DNN) classifiers on the Covertype dataset.

In [None]:
# Install required packages if not already installed
!pip install scikit-learn pandas matplotlib seaborn imbalanced-learn tensorflow

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import fetch_covtype
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, accuracy_score
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import ConfusionMatrixDisplay
from imblearn.over_sampling import SMOTE
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical

## Load and Preprocess Dataset

In [None]:
data = fetch_covtype(as_frame=True)
X, y = data.data, data.target
print("Dataset shape:", X.shape)
print("Number of classes:", len(np.unique(y)))

In [None]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

# Apply SMOTE
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X_train, y_train)

# Normalize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_resampled)
X_test_scaled = scaler.transform(X_test)

## SVM Model

In [None]:
svm = SVC(kernel='rbf')
svm.fit(X_train_scaled, y_resampled)
y_pred_svm = svm.predict(X_test_scaled)
print("SVM Accuracy:", accuracy_score(y_test, y_pred_svm))
print(classification_report(y_test, y_pred_svm))
ConfusionMatrixDisplay.from_predictions(y_test, y_pred_svm)

## Naïve Bayes Model

In [None]:
nb = GaussianNB()
nb.fit(X_train_scaled, y_resampled)
y_pred_nb = nb.predict(X_test_scaled)
print("Naïve Bayes Accuracy:", accuracy_score(y_test, y_pred_nb))
print(classification_report(y_test, y_pred_nb))
ConfusionMatrixDisplay.from_predictions(y_test, y_pred_nb)

## Deep Neural Network (DNN)

In [None]:
# One-hot encode target for DNN
y_train_cat = to_categorical(y_resampled - 1)
y_test_cat = to_categorical(y_test - 1)

# Build model
model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train_scaled.shape[1],)),
    Dense(32, activation='relu'),
    Dense(7, activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train
history = model.fit(X_train_scaled, y_train_cat, epochs=20, batch_size=128, validation_split=0.2, verbose=1)

In [None]:
# Evaluate
loss, accuracy = model.evaluate(X_test_scaled, y_test_cat)
print("DNN Test Accuracy:", accuracy)

# Predict and show classification report
y_pred_dnn = model.predict(X_test_scaled)
y_pred_classes = np.argmax(y_pred_dnn, axis=1) + 1
print(classification_report(y_test, y_pred_classes))
ConfusionMatrixDisplay.from_predictions(y_test, y_pred_classes)

## Accuracy & Loss Plots

In [None]:
# Plot training & validation accuracy/loss
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Val Accuracy')
plt.title('DNN Accuracy')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Val Loss')
plt.title('DNN Loss')
plt.legend()
plt.show()