In [1]:
# Emotion Detection ML Web App - Dual Mode (Text + Face)
import os
import cv2
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn import metrics
from sklearn.svm import SVC
from skimage.feature import hog
from tqdm import tqdm
from sklearn.svm import SVC
from joblib import Parallel, delayed

# ---------------------
# SECTION 1: TEXT-BASED EMOTION DETECTION
# ---------------------
print("=== TEXT MODE ===")

text_path = "text_dataset"
train_file = os.path.join(text_path, "train.txt")
val_file = os.path.join(text_path, "val.txt")

# Check if files exist
if not os.path.exists(train_file) or not os.path.exists(val_file):
    raise FileNotFoundError(f"Missing files in {text_path}. Ensure 'train.txt' and 'val.txt' exist.")

# Load text data with correct delimiter
df_train = pd.read_csv(train_file, sep=";", names=["text", "label"])
df_val = pd.read_csv(val_file, sep=";", names=["text", "label"])


# Drop rows with missing values
df_train.dropna(subset=['text', 'label'], inplace=True)
df_val.dropna(subset=['text', 'label'], inplace=True)

# Strip and remove empty text after stripping whitespace
df_train['text'] = df_train['text'].astype(str).str.strip()
df_val['text'] = df_val['text'].astype(str).str.strip()
df_train = df_train[df_train['text'].str.len() > 0]
df_val = df_val[df_val['text'].str.len() > 0]
# Combine for vectorization
df_all = pd.concat([df_train, df_val], axis=0)
X_text = df_all['text']
y_text = df_all['label']

# Text Vectorization
vectorizer = TfidfVectorizer(max_features=3000)
try:
    X_vect = vectorizer.fit_transform(X_text).toarray()
except ValueError as e:
    print("Vectorization failed:", e)
    print("Sample input texts:", X_text.head())
    raise

# Train/test split
X_train_text, X_test_text, y_train_text, y_test_text = train_test_split(
    X_vect, y_text, test_size=0.2, random_state=42)

# Drop NaNs if they exist (extra safeguard)
valid_idx = y_train_text.notnull()
X_train_text = X_train_text[valid_idx]
y_train_text = y_train_text[valid_idx]

valid_idx = y_test_text.notnull()
X_test_text = X_test_text[valid_idx]
y_test_text = y_test_text[valid_idx]

# Train Logistic Regression
text_model = LogisticRegression(max_iter=1000)
text_model.fit(X_train_text, y_train_text)

# Evaluate
y_pred_text = text_model.predict(X_test_text)
print("Text Classification Report:\n", metrics.classification_report(y_test_text, y_pred_text))
print("Text Accuracy: {:.2f}%".format(metrics.accuracy_score(y_test_text, y_pred_text) * 100))


# ---------------------
# SECTION 3: FACE-BASED EMOTION DETECTION
# ---------------------
print("\n=== FACE MODE ===")

# Paths
face_root = "face_dataset"
train_csv_path = os.path.join(face_root, "train_labels.csv")
test_csv_path = os.path.join(face_root, "test_labels.csv")

# Check if files exist
if not os.path.exists(train_csv_path) or not os.path.exists(test_csv_path):
    raise FileNotFoundError(f"Missing face CSV files in {face_root}. Ensure 'train_labels.csv' and 'test_labels.csv' exist.")

# Load CSVs
full_train_csv = pd.read_csv(train_csv_path)
test_csv = pd.read_csv(test_csv_path)

# Optional: Enable lite mode
lite_mode = False
if lite_mode:
    full_train_csv = full_train_csv.sample(800, random_state=42)
    test_csv = test_csv.sample(200, random_state=42)

# Split into train and validation
train_csv, val_csv = train_test_split(full_train_csv, test_size=0.2, random_state=42)

# Parallel image processing function
def process_image(row, base_path):
    label = row['label']
    filename = row['image']
    img_path = os.path.join(face_root, base_path, str(label), filename)
    if not os.path.exists(img_path):
        return None, None
    img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
    if img is None:
        return None, None
    img_resized = cv2.resize(img, (64, 64))
    hog_feat = hog(img_resized, pixels_per_cell=(8, 8), cells_per_block=(2, 2), feature_vector=True)
    return hog_feat, label

# Parallel loader
def load_face_data_parallel(df, base_path):
    results = Parallel(n_jobs=-1)(delayed(process_image)(row, base_path) for _, row in tqdm(df.iterrows(), total=len(df)))
    features, labels = zip(*[(f, l) for f, l in results if f is not None])
    return np.array(features), np.array(labels)

# Load features
X_train_face, y_train_face = load_face_data_parallel(train_csv, "DATASET/train")
X_val_face, y_val_face = load_face_data_parallel(val_csv, "DATASET/train")
X_test_face, y_test_face = load_face_data_parallel(test_csv, "DATASET/test")

# Validate loaded data
if len(X_train_face) == 0 or len(X_test_face) == 0:
    raise ValueError("Face data loading failed. Ensure image paths and directory structure are correct.")

# Scale features
scaler_face = StandardScaler()
X_train_face = scaler_face.fit_transform(X_train_face)
X_val_face = scaler_face.transform(X_val_face)
X_test_face = scaler_face.transform(X_test_face)

# Train SVM
face_model = SVC(kernel='linear', probability=True)
face_model.fit(X_train_face, y_train_face)

# Evaluate on validation set
y_pred_val = face_model.predict(X_val_face)
print("Validation Classification Report:\n", metrics.classification_report(y_val_face, y_pred_val))
print("Validation Accuracy: {:.2f}%".format(metrics.accuracy_score(y_val_face, y_pred_val) * 100))

# Evaluate on test set
y_pred_test = face_model.predict(X_test_face)
print("Test Classification Report:\n", metrics.classification_report(y_test_face, y_pred_test))
print("Test Accuracy: {:.2f}%".format(metrics.accuracy_score(y_test_face, y_pred_test) * 100))



# ---------------------
# SECTION 4: WRAPPER FUNCTIONS (FOR UI INTEGRATION)
# ---------------------
def predict_text_emotion(text_input):
    vect_input = vectorizer.transform([text_input]).toarray()
    return text_model.predict(vect_input)[0]

def predict_face_emotion(img_path):
    if not os.path.exists(img_path):
        return "Image not found."
    img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
    if img is None:
        return "Invalid image."
    img_resized = cv2.resize(img, (64, 64))
    hog_feat = hog(img_resized, pixels_per_cell=(8, 8), cells_per_block=(2, 2), feature_vector=True)
    hog_feat = scaler_face.transform([hog_feat])
    return face_model.predict(hog_feat)[0]


=== TEXT MODE ===
Text Classification Report:
               precision    recall  f1-score   support

       anger       0.92      0.79      0.85       462
        fear       0.85      0.74      0.79       415
         joy       0.81      0.96      0.88      1176
        love       0.86      0.64      0.73       318
     sadness       0.89      0.94      0.91      1092
    surprise       0.87      0.50      0.63       137

    accuracy                           0.86      3600
   macro avg       0.87      0.76      0.80      3600
weighted avg       0.86      0.86      0.85      3600

Text Accuracy: 85.75%

=== FACE MODE ===


100%|█████████████████████████████████████████████████████████████████████████████| 9816/9816 [00:23<00:00, 421.16it/s]
100%|█████████████████████████████████████████████████████████████████████████████| 2455/2455 [00:04<00:00, 603.55it/s]
100%|█████████████████████████████████████████████████████████████████████████████| 3068/3068 [00:04<00:00, 656.42it/s]


Validation Classification Report:
               precision    recall  f1-score   support

           1       0.48      0.61      0.54       236
           2       0.23      0.37      0.28        52
           3       0.24      0.33      0.27       158
           4       0.79      0.79      0.79       983
           5       0.42      0.38      0.40       395
           6       0.52      0.45      0.48       155
           7       0.53      0.41      0.46       476

    accuracy                           0.57      2455
   macro avg       0.46      0.48      0.46      2455
weighted avg       0.58      0.57      0.58      2455

Validation Accuracy: 57.43%
Test Classification Report:
               precision    recall  f1-score   support

           1       0.51      0.63      0.56       329
           2       0.25      0.39      0.31        74
           3       0.20      0.31      0.24       160
           4       0.79      0.81      0.80      1185
           5       0.50      0.46      0

In [3]:
import joblib

# Save text mode components
joblib.dump(text_model, 'text_model.pkl')
joblib.dump(vectorizer, 'vectorizer.pkl')

# Save face mode components
joblib.dump(face_model, 'face_model.pkl')
joblib.dump(scaler_face, 'face_scaler.pkl')

print("✅ All models saved successfully.")


✅ All models saved successfully.
