In [7]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from skimage.io import imread
from skimage.transform import resize
from skimage.feature import hog
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix, ConfusionMatrixDisplay

img_size = (48, 48)

try:
    script_dir = os.path.dirname(os.path.abspath(__file__))
except NameError:
    script_dir = os.getcwd()

project_root = os.path.dirname(script_dir)
legend_path = os.path.join(project_root, "facial_expressions", "data", "legend.csv")
image_dir = os.path.join(project_root, "facial_expressions", "images")

# Clean Data
def load_data_hog():
    if not os.path.exists(legend_path):
        print(f"Error: CSV not found at {legend_path}")
        return None, None

    df = pd.read_csv(legend_path)
    df.rename(columns=lambda x: x.lower().strip(), inplace=True)
    df["emotion"] = df["emotion"].astype(str).str.lower().str.strip()
    
    X = []
    y = []

    processed = 0
    valid_rows = df
    
    for idx, row in valid_rows.iterrows():
        img_name = row['image']
        full_path = os.path.join(image_dir, img_name)
        
        if os.path.exists(full_path):
            try:
                img = imread(full_path, as_gray=True)
                img = resize(img, img_size, anti_aliasing=True)

                features = hog(img, orientations=9, pixels_per_cell=(8, 8), 
                               cells_per_block=(2, 2), block_norm="L2-Hys")
                X.append(features)
                y.append(row['emotion'])
                processed += 1
            except Exception:
                pass
        
        if processed % 1000 == 0 and processed > 0:
            print(f"Processed {processed} images...")

    return np.array(X), np.array(y)

# Load Data
X, y = load_data_hog()

if X is None or len(X) == 0:
    print("CRITICAL ERROR: No data loaded.")
    exit()

print(f"Final Data Shape: {X.shape}")

# 80 / 20 Data Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)


# Adaptation Graph
depths = [5, 10, 20, 50, 100, None]
train_accs = []
test_accs = []

print("Running Depth Adaptation...")
for d in depths:
    clf = DecisionTreeClassifier(max_depth=d, min_samples_split=2, class_weight='balanced', random_state=42)
    clf.fit(X_train, y_train)
    
    train_accs.append(accuracy_score(y_train, clf.predict(X_train)))
    test_accs.append(accuracy_score(y_test, clf.predict(X_test)))

plt.figure(figsize=(10, 6))
x_labels = [str(d) if d is not None else "None" for d in depths]
plt.plot(x_labels, train_accs, label='Training Accuracy', marker='o', linestyle='--')
plt.plot(x_labels, test_accs, label='Validation Accuracy', marker='o', linewidth=3)
plt.title('Adaptation: Depth vs. Accuracy (Aggressive Splitting)')
plt.xlabel('Tree Max Depth')
plt.ylabel('Accuracy')
plt.legend()
plt.grid(True, alpha=0.3)
plt.savefig(os.path.join(script_dir, 'adaptation_graph.png'))
print("Graph saved as 'adaptation_graph.png'")


# Hyperparameter Tuning
param_grid = {
    'criterion': ['gini'],
    'max_depth': [None, 50],
    'min_samples_split': [2, 5],
    'ccp_alpha': [0.0, 0.001, 0.002]
}

grid = GridSearchCV(DecisionTreeClassifier(class_weight='balanced', random_state=42), 
                    param_grid, cv=5, n_jobs=-1)
grid.fit(X_train, y_train)

best_model = grid.best_estimator_
print("Best Parameters:", grid.best_params_)

# Results
y_pred = best_model.predict(X_test)
final_acc = accuracy_score(y_test, y_pred)
final_f1 = f1_score(y_test, y_pred, average='weighted')

print("\n" + "="*40)
print("="*40)
print(f"Model:      Decision Tree (Optimized)")
print(f"Accuracy:   {final_acc:.2%}")
print(f"F1-Score:   {final_f1:.4f}")
print("="*40)

# Confusion Matrix Plot
plt.figure(figsize=(10, 8))
ConfusionMatrixDisplay.from_predictions(y_test, y_pred, cmap=plt.cm.Blues, xticks_rotation='vertical')
plt.title(f'Confusion Matrix (Acc={final_acc:.2%})')
plt.tight_layout()
plt.savefig(os.path.join(script_dir, 'confusion_matrix.png'))
print("Confusion Matrix saved as 'confusion_matrix.png'")
plt.show()

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 10 concurrent workers.
[Parallel(n_jobs=-1)]: Done  54 tasks      | elapsed:    1.3s
[Parallel(n_jobs=-1)]: Done 1034 tasks      | elapsed:    2.1s


KeyboardInterrupt: 