In [None]:
# AGE & EMOTION MODEL TRAINING

import os
import pandas as pd
import librosa
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score
import pickle

# Load dataset
df = pd.read_excel("dataset/Book1_deduplicated.xlsx")
df = df[df['gender'] == 'male']
df = df[df['path'].notna()]
df['path'] = df['path'].apply(lambda x: os.path.join("data", "male", x))

# Extract MFCC
def extract_mfcc(path):
    y, sr = librosa.load(path, sr=22050)
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    return np.mean(mfcc.T, axis=0)

df['features'] = df['path'].apply(lambda x: extract_mfcc(x))
df = df.dropna(subset=['features'])

#Train Age Model
X = np.array(df['features'].tolist())
y = np.array(df['age'])

X_train, X_test = X[:int(0.8*len(X))], X[int(0.8*len(X)):]
y_train, y_test = y[:int(0.8*len(y))], y[int(0.8*len(y)):]

age_model = LinearRegression()
age_model.fit(X_train, y_train)
preds = age_model.predict(X_test)

rounded_preds = np.clip(np.round(preds), 0, 100)
age_acc = np.mean(np.abs(rounded_preds - y_test) <= 5)
print("🎯 Age Prediction Accuracy (±5 years):", age_acc)

pickle.dump(age_model, open("models/age_model.pkl", "wb"))
print("✅ Age model saved.")

# 🔴 Train Emotion Model (age > 60)
df_senior = df[df['age'] > 60]
X = np.array(df_senior['features'].tolist())
y = df_senior['emotion'].astype(str)

from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y_enc = le.fit_transform(y)

X_train, X_test = X[:int(0.8*len(X))], X[int(0.8*len(X)):]
y_train, y_test = y_enc[:int(0.8*len(y_enc))], y_enc[int(0.8*len(y_enc)):]

emotion_model = RandomForestClassifier()
emotion_model.fit(X_train, y_train)
y_pred = emotion_model.predict(X_test)

print("Emotion Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred, average='weighted'))
print("Recall:", recall_score(y_test, y_pred, average='weighted'))

pickle.dump(emotion_model, open("models/emotion_model.pkl", "wb"))
pickle.dump(le, open("models/emotion_encoder.pkl", "wb"))
print("✅ Emotion model and label encoder saved.")
