In [1]:
import numpy as np
import pandas as pd
from tqdm import tqdm
from skimage import feature
from skimage.feature import hog
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, VotingClassifier
from sklearn.metrics import accuracy_score, classification_report
from xgboost import XGBClassifier
from imblearn.over_sampling import SMOTE
import joblib

In [2]:
# feature extraction
def get_lbp_features(image):
    lbp = feature.local_binary_pattern(image, P=24, R=8, method="uniform")
    (hist, _) = np.histogram(lbp.ravel(), bins=np.arange(0, 27), range=(0, 26))
    hist = hist.astype("float")
    hist /= (hist.sum() + 1e-6)
    return hist

In [3]:
def get_hog_features(image):
    features = hog(image, orientations=9, pixels_per_cell=(8, 8),
                   cells_per_block=(2, 2), block_norm="L2-Hys", visualize=False)
    return features

In [4]:
def extract_features(images):
    features = []
    for img in tqdm(images, desc="Extracting Features"):
        lbp_feat = get_lbp_features(img)
        hog_feat = get_hog_features(img)
        combined = np.hstack([lbp_feat, hog_feat])
        features.append(combined)
    return np.array(features)

In [5]:
# Loading the dataset
df = pd.read_csv("fer2013.csv")

In [6]:
# Loading the dataset
emotion_labels = {0: 'angry', 1: 'disgust', 2: 'fear', 3: 'happy',
                  4: 'sad', 5: 'surprise', 6: 'neutral'}

tqdm.pandas()
df['image'] = df['pixels'].progress_apply(lambda x: np.array(x.split(' '), 'float32').reshape(48, 48))
X = np.array(df['image'].tolist())
y = np.array(df['emotion'].tolist())

100%|██████████████████████████████████████████████████████████████████████████| 35887/35887 [00:22<00:00, 1588.65it/s]


In [7]:
# extracting features
X_features = extract_features(X)

Extracting Features: 100%|██████████████████████████████████████████████████████| 35887/35887 [02:19<00:00, 256.42it/s]


In [8]:
# train test split
X_train, X_test, y_train, y_test = train_test_split(X_features, y, test_size=0.2, stratify=y, random_state=42)

In [9]:
# scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [10]:
# balancing the data with SMOTE
sm = SMOTE(random_state=42)
X_train_bal, y_train_bal = sm.fit_resample(X_train_scaled, y_train)

In [11]:
# PCA
pca = PCA(n_components=150, random_state=42)
X_train_pca = pca.fit_transform(X_train_bal)
X_test_pca = pca.transform(X_test_scaled)

In [12]:
# Save preprocessed data to avoid re-running earlier steps
# joblib.dump((X_train_pca, y_train_bal, X_test_pca, y_test), 'preprocessed_data.pkl')
# print("Preprocessed data saved as 'preprocessed_data.pkl'")

import joblib
X_train_pca, y_train_bal, X_test_pca, y_test = joblib.load('preprocessed_data.pkl')
print("Preprocessed data loaded from 'preprocessed_data.pkl'")

Preprocessed data loaded from 'preprocessed_data.pkl'


In [14]:
# Ensemble model (without SVM)
xgb = XGBClassifier(n_estimators=100, learning_rate=0.2, random_state=42, n_jobs=-1, verbosity=1)
rf = RandomForestClassifier(n_estimators=300, class_weight="balanced", random_state=42, n_jobs=-1)

# Train each model with tqdm
estimators = [('XGradient Boosting', xgb), ('Random Forest', rf)]
print("Training ensemble model...")
for name, estimator in tqdm(estimators, desc="Training Estimators"):
    print(f"Training {name}...")
    estimator.fit(X_train_pca, y_train_bal)

# Create VotingClassifier
ensemble = VotingClassifier(estimators=[('xgb', xgb), ('rf', rf)], voting='soft')

# Save ensemble and scaler and pca
joblib.dump(ensemble, 'ensemble_model_new.pkl')
joblib.dump(scaler, 'scaler_new.pkl')
joblib.dump(pca, 'pca_new.pkl')
print("Ensemble model and scaler saved as 'ensemble_model_new.pkl' and 'scaler_new.pkl'")

Training ensemble model...


Training Estimators:   0%|                                                                       | 0/2 [00:00<?, ?it/s]

Training XGradient Boosting...


Training Estimators:  50%|███████████████████████████████▌                               | 1/2 [00:16<00:16, 16.70s/it]

Training Random Forest...


Training Estimators: 100%|███████████████████████████████████████████████████████████████| 2/2 [00:57<00:00, 28.78s/it]


Ensemble model and scaler saved as 'ensemble_model_new.pkl' and 'scaler_new.pkl'


In [15]:
# Evaluation

In [16]:
import joblib
import numpy as np
from sklearn.ensemble import VotingClassifier
from sklearn.metrics import accuracy_score, classification_report

# Define emotion labels
emotion_labels = {0: 'angry', 1: 'disgust', 2: 'fear', 3: 'happy',
                  4: 'sad', 5: 'surprise', 6: 'neutral'}

# Load preprocessed data
try:
    X_train_pca, y_train_bal, X_test_pca, y_test = joblib.load('preprocessed_data.pkl')
    print("Loaded preprocessed data")
    print(f"X_test_pca shape: {X_test_pca.shape}, y_test shape: {y_test.shape}")
except FileNotFoundError:
    print("Error: 'preprocessed_data.pkl' not found. Please ensure the file exists.")
    exit()

# Load the ensemble model
try:
    ensemble = joblib.load('ensemble_model_new.pkl')
    print("Loaded ensemble model")
    print("Ensemble estimators:", ensemble.estimators)
    print("Has fitted estimators_:", hasattr(ensemble, 'estimators_'))
except FileNotFoundError:
    print("Error: 'ensemble_model_new.pkl' not found. Please ensure the file exists.")
    exit()

# Check if estimators are fitted
xgb, rf = ensemble.estimators[0][1], ensemble.estimators[1][1]
xgb_fitted = hasattr(xgb, 'get_booster') and xgb.get_booster().num_boosted_rounds() > 0
rf_fitted = hasattr(rf, 'estimators_') and len(rf.estimators_) > 0
print("XGBoost fitted:", xgb_fitted)
print("Random Forest fitted:", rf_fitted)

# If not fitted, raise an error (retraining should be done separately)
if not (xgb_fitted and rf_fitted):
    print("Error: One or both estimators are not fitted. Please retrain using the training code.")
    exit()

# Initialize VotingClassifier for soft voting (quick, ~1-5 seconds)
ensemble.fit(X_train_pca, y_train_bal)
print("VotingClassifier initialized for soft voting")

# Evaluation
y_pred = ensemble.predict(X_test_pca)
accuracy = accuracy_score(y_test, y_pred)

print(f"\nEnsemble Model Accuracy: {accuracy*100:.2f}%")
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=emotion_labels.values()))

# Save the properly fitted ensemble
joblib.dump(ensemble, 'ensemble_model_fitted.pkl')
print("Fitted ensemble model saved as 'ensemble_model_fitted.pkl'")

Loaded preprocessed data
X_test_pca shape: (7178, 150), y_test shape: (7178,)
Loaded ensemble model
Ensemble estimators: [('xgb', XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, device=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric=None, feature_types=None,
              feature_weights=None, gamma=None, grow_policy=None,
              importance_type=None, interaction_constraints=None,
              learning_rate=0.2, max_bin=None, max_cat_threshold=None,
              max_cat_to_onehot=None, max_delta_step=None, max_depth=None,
              max_leaves=None, min_child_weight=None, missing=nan,
              monotone_constraints=None, multi_strategy=None, n_estimators=100,
              n_jobs=-1, num_parallel_tree=None, ...)), ('rf', RandomForestClassifier(class_weight='balanced', n_estimators=300, n_jobs=-1,
                       rand