In [2]:
import numpy as np
import pandas as pd
from tqdm import tqdm
from skimage import feature
from skimage.feature import hog
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, VotingClassifier
from sklearn.metrics import accuracy_score, classification_report
from imblearn.over_sampling import SMOTE

In [3]:
# feature extraction
def get_lbp_features(image):
    lbp = feature.local_binary_pattern(image, P=24, R=8, method="uniform")
    (hist, _) = np.histogram(lbp.ravel(), bins=np.arange(0, 27), range=(0, 26))
    hist = hist.astype("float")
    hist /= (hist.sum() + 1e-6)
    return hist

In [4]:
def get_hog_features(image):
    features = hog(image, orientations=9, pixels_per_cell=(8, 8),
                   cells_per_block=(2, 2), block_norm="L2-Hys", visualize=False)
    return features

In [5]:
def extract_features(images):
    features = []
    for img in tqdm(images, desc="Extracting Features"):
        lbp_feat = get_lbp_features(img)
        hog_feat = get_hog_features(img)
        combined = np.hstack([lbp_feat, hog_feat])
        features.append(combined)
    return np.array(features)

In [6]:
# Loading the dataset
df = pd.read_csv("fer2013.csv")


In [7]:
# Loading the dataset
emotion_labels = {0: 'angry', 1: 'disgust', 2: 'fear', 3: 'happy',
                  4: 'sad', 5: 'surprise', 6: 'neutral'}

tqdm.pandas()
df['image'] = df['pixels'].progress_apply(lambda x: np.array(x.split(' '), 'float32').reshape(48, 48))
X = np.array(df['image'].tolist())
y = np.array(df['emotion'].tolist())

100%|██████████████████████████████████████████████████████████████████████████| 35887/35887 [00:21<00:00, 1695.44it/s]


In [8]:
# extracting features
X_features = extract_features(X)

Extracting Features: 100%|██████████████████████████████████████████████████████| 35887/35887 [02:08<00:00, 280.12it/s]


In [9]:
X_features.shape

(35887, 926)

In [10]:
# train test split
X_train, X_test, y_train, y_test = train_test_split(X_features, y, test_size=0.2, stratify=y, random_state=42)

In [11]:
# scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [12]:
import imblearn
print(imblearn.__version__)

0.14.0


In [13]:
# balancing the data with SMOTE
sm = SMOTE(random_state=42)
X_train_bal, y_train_bal = sm.fit_resample(X_train_scaled, y_train)

In [14]:
# PCA
pca = PCA(n_components=100, random_state=42)
X_train_pca = pca.fit_transform(X_train_bal)
X_test_pca = pca.transform(X_test_scaled)

In [14]:
# # Ensemble model
# svm = SVC(kernel='linear', C=10, gamma='scale', probability=True, class_weight='balanced', random_state=42, verbose=True)
# rf = RandomForestClassifier(n_estimators=300, class_weight="balanced", random_state=42, n_jobs=-1)
# gb = GradientBoostingClassifier(n_estimators=200, random_state=42)

In [15]:
# ensemble = VotingClassifier(
#     estimators=[('svm', svm), ('rf', rf), ('gb', gb)],
#     voting='soft'
# )

In [None]:
from tqdm import tqdm
import joblib

# Save preprocessed data
joblib.dump((X_train_pca, y_train_bal, X_test_pca, y_test), 'preprocessed_data.pkl')
print("Preprocessed data saved as 'preprocessed_data.pkl'")

# Ensemble model
rf = RandomForestClassifier(n_estimators=300, class_weight="balanced", random_state=42, n_jobs=-1)
svm = SVC(kernel='linear', C=10, probability=True, class_weight='balanced', random_state=42, verbose=True)
gb = GradientBoostingClassifier(n_estimators=200, random_state=42)

# Train each model with tqdm
estimators = [('Random Forest', rf), ('SVM', svm), ('Gradient Boosting', gb)]
print("Training ensemble model...")
for name, estimator in tqdm(estimators, desc="Training Estimators"):
    print(f"Training {name}...")
    estimator.fit(X_train_pca, y_train_bal)

# Create VotingClassifier
ensemble = VotingClassifier(estimators=[('svm', svm), ('rf', rf), ('gb', gb)], voting='soft')

# Save ensemble
joblib.dump(ensemble, 'ensemble_model.pkl')
joblib.dump(scaler, 'scaler_ensemble_model.pkl')
print("Ensemble model saved as 'ensemble_model.pkl and scaler_ensemble_model.pkl'")

Preprocessed data saved as 'preprocessed_data.pkl'
Training ensemble model...


Training Estimators:   0%|                                                                       | 0/3 [00:00<?, ?it/s]

Training Random Forest...


Training Estimators:  33%|█████████████████████                                          | 1/3 [00:33<01:07, 33.67s/it]

Training SVM...
[LibSVM]

In [None]:
# evaluation
y_pred = ensemble.predict(X_test_pca)
accuracy = accuracy_score(y_test, y_pred)

print(f"\nEnsemble Model Accuracy: {accuracy*100:.2f}%")
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=emotion_labels.values()))

In [None]:
# model saving
# import joblib

In [None]:
# # Save the ensemble model and the scaler to disk
# joblib.dump(ensemble, 'ensemble_model.pkl')
# joblib.dump(scaler, 'scalernew.pkl')

# print("Ensemble model and scaler saved as 'ensemble_model.pkl' and 'scaler.pkl'")

# # Download the files to your local machine
# files.download('ensemble_model.pkl')
# files.download('scalernew.pkl')