Import Libraries

In [1]:
import os
import numpy as np
import pandas as pd

from sklearn.preprocessing import LabelEncoder

from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

Load Dataset

In [2]:
print("Loading data...")
X_train = np.load('data/augmented/X_train.npy', allow_pickle=True)
X_test = np.load('data/augmented/X_test.npy', allow_pickle=True)
y_train = np.load('data/augmented/y_train.npy', allow_pickle=True)
y_test = np.load('data/augmented/y_test.npy', allow_pickle=True)

print("Data loaded successfully.")

Loading data...
Data loaded successfully.


Data Inspection & Label Encoding

In [3]:
print("--- Array Shapes ---")
print(f"X_train: {X_train.shape}")
print(f"y_train: {y_train.shape}")
print(f"X_test:  {X_test.shape}")
print(f"y_test:  {y_test.shape}")

# Encode class labels
le = LabelEncoder()
y_train_enc = le.fit_transform(y_train)
y_test_enc = le.transform(y_test)
class_names = le.classes_

print(f"\nClasses detected: {class_names}")
print(f"Original Feature Count: {X_train.shape[1]}")

--- Array Shapes ---
X_train: (3456, 86)
y_train: (3456,)
X_test:  (144, 86)
y_test:  (144,)

Classes detected: ['german' 'italian' 'korean' 'spanish']
Original Feature Count: 86


Initialize Classifiers

In [4]:
models = {
    "KNN": KNeighborsClassifier(n_neighbors=5),
    "Decision Tree": DecisionTreeClassifier(random_state=42),
    "Logistic Regression": LogisticRegression(random_state=42, max_iter=1000),
    "Na√Øve Bayes": GaussianNB(),
    "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42),
    "SVM (RBF)": SVC(kernel='rbf', random_state=42),
    "MLP": MLPClassifier(hidden_layer_sizes=(100, 50), max_iter=500, random_state=42)
}

print("Models initialized.")

Models initialized.


Train Models & Plot Results

In [5]:
# Create the directory
output_dir = 'classifier_result'
if not os.path.exists(output_dir):
    os.makedirs(output_dir)
    print(f"Directory '{output_dir}' created.")

results = []
confusion_matrices = {}

print("Training models...")

for name, model in models.items():
    # Train
    model.fit(X_train, y_train_enc)

    # Predict
    y_pred = model.predict(X_test)

    # Calculate Metrics
    acc = accuracy_score(y_test_enc, y_pred)
    prec = precision_score(y_test_enc, y_pred, average='weighted', zero_division=0)
    rec = recall_score(y_test_enc, y_pred, average='weighted', zero_division=0)
    f1 = f1_score(y_test_enc, y_pred, average='weighted', zero_division=0)

    # Store metrics
    results.append({
        "Model": name,
        "Accuracy": acc,
        "Precision": prec,
        "Recall": rec,
        "F1-Score": f1
    })
    
    # Store Confusion Matrix data for later plotting
    cm = confusion_matrix(y_test_enc, y_pred)
    confusion_matrices[name] = cm

# Save Metrics to CSV
results_df = pd.DataFrame(results).sort_values(by='Accuracy', ascending=False)
csv_path = os.path.join(output_dir, 'metrics.csv')
results_df.to_csv(csv_path, index=False)
print(f"Metrics saved to {csv_path}")

# Save Confusion Matrices and Class Names to NPY file
npy_path = os.path.join(output_dir, 'confusion_matrices.npy')
np.save(npy_path, {'cms': confusion_matrices, 'classes': class_names})
print(f"Confusion matrices data saved to {npy_path}")

print("\nProcessing complete. No plots displayed.")

Directory 'classifier_result' created.
Training models...
Metrics saved to classifier_result/metrics.csv
Confusion matrices data saved to classifier_result/confusion_matrices.npy

Processing complete. No plots displayed.
