In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import random

In [None]:
data = pd.read_csv('result/preprocessing.csv')
X_features = data.drop('label', axis=1).values
y = data['label'].values

le = LabelEncoder()
y_encoded = le.fit_transform(y)

scaler = StandardScaler()
X_features_scaled = scaler.fit_transform(X_features)

x_train, x_test, y_train, y_test = train_test_split(X_features_scaled, y_encoded, test_size=0.25, random_state=42)

In [None]:
param_grid = {'n_estimators': [50, 100, 200], 'max_features': ['sqrt', 'log2'], 'max_depth': [4, 6, 8, 10, 12]}
rf = RandomForestClassifier()
grid_search = GridSearchCV(rf, param_grid, cv=5)
grid_search.fit(x_train, y_train)
best_params = grid_search.best_params_
print(f'Best parameters: {best_params}')

best_rf = grid_search.best_estimator_
best_rf.fit(x_train, y_train)
y_pred_rf = best_rf.predict(x_test)

accuracy_rf = accuracy_score(y_test, y_pred_rf)
precision_rf = precision_score(y_test, y_pred_rf, average='weighted', zero_division=0)
recall_rf = recall_score(y_test, y_pred_rf, average='weighted', zero_division=0)
f1_rf = f1_score(y_test, y_pred_rf, average='weighted', zero_division=0)
svc_model = SVC()
svc_model.fit(x_train, y_train)
y_pred_svc = svc_model.predict(x_test)

accuracy_svc = accuracy_score(y_test, y_pred_svc)
precision_svc = precision_score(y_test, y_pred_svc, average='weighted', zero_division=0)
recall_svc = recall_score(y_test, y_pred_svc, average='weighted', zero_division=0)
f1_svc = f1_score(y_test, y_pred_svc, average='weighted', zero_division=0)

knn_model = KNeighborsClassifier(n_neighbors=5)
knn_model.fit(x_train, y_train)
y_pred_knn = knn_model.predict(x_test)

accuracy_knn = accuracy_score(y_test, y_pred_knn)
precision_knn = precision_score(y_test, y_pred_knn, average='weighted', zero_division=0)
recall_knn = recall_score(y_test, y_pred_knn, average='weighted', zero_division=0)
f1_knn = f1_score(y_test, y_pred_knn, average='weighted', zero_division=0)

In [None]:
def plot_confusion_matrix(y_true, y_pred, title):
    cm = confusion_matrix(y_true, y_pred)
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=le.classes_, yticklabels=le.classes_)
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.title(title)
    plt.show()

plot_confusion_matrix(y_test, y_pred_rf, 'Random Forest Confusion Matrix')
plot_confusion_matrix(y_test, y_pred_svc, 'SVC Confusion Matrix')
plot_confusion_matrix(y_test, y_pred_knn, 'KNN Confusion Matrix')

def print_evaluation_metrics(accuracy, precision, recall, f1, model_name):
    print(f"{model_name} Accuracy: {accuracy * 100:.2f}%")
    print(f"{model_name} Precision: {precision * 100:.2f}%")
    print(f"{model_name} Recall: {recall * 100:.2f}%")
    print(f"{model_name} F1 Score: {f1 * 100:.2f}%")
    print()

print_evaluation_metrics(accuracy_rf, precision_rf, recall_rf, f1_rf, 'Random Forest')
print_evaluation_metrics(accuracy_svc, precision_svc, recall_svc, f1_svc, 'SVC')
print_evaluation_metrics(accuracy_knn, precision_knn, recall_knn, f1_knn, 'KNN')

In [None]:
feature_importances = best_rf.feature_importances_
importance_threshold = 0.01
important_features = np.where(feature_importances > importance_threshold)[0]

if important_features.size == 0:
    important_features = np.arange(X_features_scaled.shape[1])

X_features_selected = X_features_scaled[:, important_features]

x_train_sel, x_test_sel, y_train_sel, y_test_sel = train_test_split(X_features_selected, y_encoded, test_size=0.25, random_state=42)

best_rf.fit(x_train_sel, y_train_sel)
y_pred_rf_sel = best_rf.predict(x_test_sel)
accuracy_rf_sel = accuracy_score(y_test_sel, y_pred_rf_sel)
precision_rf_sel = precision_score(y_test_sel, y_pred_rf_sel, average='weighted', zero_division=0)
recall_rf_sel = recall_score(y_test_sel, y_pred_rf_sel, average='weighted', zero_division=0)
f1_rf_sel = f1_score(y_test_sel, y_pred_rf_sel, average='weighted', zero_division=0)

print("Random Forest with Selected Features:")
print_evaluation_metrics(accuracy_rf_sel, precision_rf_sel, recall_rf_sel, f1_rf_sel, 'Random Forest')

plot_confusion_matrix(y_test_sel, y_pred_rf_sel, 'Random Forest with Selected Features Confusion Matrix')

In [None]:
def visualize_predictions(X_images, y_true, y_pred, title):
    fig, ax = plt.subplots(2, 5, figsize=(15, 6))
    for i in range(2):
        for j in range(5):
            idx = random.randint(0, len(y_true) - 1)
            ax[i, j].imshow(X_images[idx])
            ax[i, j].set_title(f"Pred: {le.inverse_transform([y_pred[idx]])[0]}\nTrue: {le.inverse_transform([y_true[idx]])[0]}")
            ax[i, j].axis('off')
    plt.tight_layout()
    plt.suptitle(title, y=1.05)
    plt.show()

In [None]:
original_images = np.load('original_images.npy')

visualize_predictions(original_images, y_test, y_pred_rf, 'Random Forest Predictions')
visualize_predictions(original_images, y_test_sel, y_pred_rf_sel, 'Random Forest with Selected Features Predictions')

results = pd.DataFrame({
    'Model': ['Random Forest', 'SVC', 'KNN', 'Random Forest with Selected Features'],
    'Accuracy': [accuracy_rf, accuracy_svc, accuracy_knn, accuracy_rf_sel],
    'Precision': [precision_rf, precision_svc, precision_knn, precision_rf_sel],
    'Recall': [recall_rf, recall_svc, recall_knn, recall_rf_sel],
    'F1 Score': [f1_rf, f1_svc, f1_knn, f1_rf_sel]
})
results.to_csv('model_evaluation_results.csv', index=False)