In [None]:
%load_ext autoreload
%autoreload 2

from sklearn.ensemble import RandomForestClassifier
from models.gender_age import GenderAgePipeline
from xgboost import XGBClassifier

gender_pipe = XGBClassifier(class_weight='balanced')
male_pipe = XGBClassifier(class_weight='balanced')
female_pipe = XGBClassifier(class_weight='balanced')

gender_age_pipe = GenderAgePipeline(gender_pipe, male_pipe, female_pipe)

In [None]:
import joblib
import numpy as np
from sklearn.model_selection import train_test_split

# Load joblib files directly
X_train = joblib.load('./trials/features/mfcc150_hfcc150_spectral5_alpha_ratio1/X_train.joblib')
X_test = joblib.load('./trials/features/mfcc150_hfcc150_spectral5_alpha_ratio1/X_test.joblib')
y_train = joblib.load('./trials/features/mfcc150_hfcc150_spectral5_alpha_ratio1/y_train.joblib')
y_test = joblib.load('./trials/features/mfcc150_hfcc150_spectral5_alpha_ratio1/y_test.joblib')

X = np.concatenate((X_train, X_test), axis=0)
y = np.concatenate((y_train, y_test), axis=0)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.05, random_state=42, stratify=y)

# X_male = [x for x, y in zip(X, y) if y in [0, 2]]
# y_male = [c//2 for c in y if c in [0, 2]]
# X_female = [x for x, y in zip(X, y) if y in [1, 3]]
# y_female = [c//2 for c in y if c in [1, 3]]
# X_gender = X
# y_gender = [c%2 for c in y]

# X_male_train, X_male_test, y_male_train, y_male_test = train_test_split(X_male, y_male, test_size=0.2, random_state=42)
# X_female_train, X_female_test, y_female_train, y_female_test = train_test_split(X_female, y_female, test_size=0.2, random_state=42)
# X_gender_train, X_gender_test, y_gender_train, y_gender_test = train_test_split(X_gender, y_gender, test_size=0.2, random_state=42)


In [None]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

gender_age_pipe = gender_age_pipe.fit(X_train, np.array(y_train))
y_pred = gender_age_pipe.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred))
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))

In [None]:
from sklearn.model_selection import GridSearchCV

param_grid = {
    'gender_model__n_estimators': [100, 200],
    'gender_model__max_depth': [3, 6],
    'gender_model__learning_rate': [0.1, 0.01],

    'male_age_model__n_estimators': [100, 200],
    'male_age_model__max_depth': [3, 6],
    'male_age_model__learning_rate': [0.1, 0.01],

    'female_age_model__n_estimators': [100, 200],
    'female_age_model__max_depth': [3, 6],
    'female_age_model__learning_rate': [0.1, 0.01],
}

grid = GridSearchCV(gender_age_pipe, param_grid, cv=3, scoring='accuracy', verbose=2, n_jobs=-1)
grid.fit(X_train, y_train)

print("Best Score:", grid.best_score_)
print("Best Params:", grid.best_params_)

y_pred = grid.best_estimator_.predict(X_test)
print("Test Accuracy:", accuracy_score(y_test, y_pred))
print("\nTest Classification Report:")
print(classification_report(y_test, y_pred))
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))