In [13]:
%load_ext autoreload
%autoreload 2

from sklearn.ensemble import RandomForestClassifier
from models.gender_age import GenderAgePipeline
from xgboost import XGBClassifier
from sklearn.neighbors import KNeighborsClassifier as knn
import joblib
import numpy as np

gender_pipe = knn(n_neighbors=4, weights='distance', algorithm='auto', n_jobs=-1) 
male_pipe = knn(n_neighbors=4, weights='distance', algorithm='auto', n_jobs=-1)
female_pipe = knn(n_neighbors=4, weights='distance', algorithm='auto', n_jobs=-1)

gender_age_pipe = GenderAgePipeline(gender_pipe, male_pipe, female_pipe)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [14]:
X_train = joblib.load('./trials/features/hfcc150_pitch_range/X_train.joblib')
X_test = joblib.load('./trials/features/hfcc150_pitch_range/X_test.joblib')

y_train = joblib.load('./trials/features/hfcc150_pitch_range/y_train.joblib')
y_test = joblib.load('./trials/features/hfcc150_pitch_range/y_test.joblib')

X_train2= joblib.load('./trials/features/48k_mfcc150/X_train.joblib')
X_test2= joblib.load('./trials/features/48k_mfcc150/X_test.joblib')

X_train_combined = np.concatenate((X_train, X_train2), axis=1) 
X_train_combined = np.delete(X_train_combined, np.s_[151:152], axis=1)
X_test_combined = np.concatenate((X_test, X_test2), axis=1)
X_test_combined = np.delete(X_test_combined, np.s_[151:152], axis=1)

X_gender_train = X_train_combined
y_gender_train = y_train % 2

female_train = y_train % 2 == 1
x_female_train = np.delete(X_train_combined[female_train], np.s_[151:152], axis=1)
y_female_train = y_train[female_train]

female_test = y_test % 2 == 1
x_female_test = X_test_combined[female_test]
y_female_test = y_test[female_test]

male_train = y_train % 2 == 0
x_male_train = X_train_combined[male_train]
y_male_train = y_train[male_train]

male_test = y_test % 2 == 0
x_male_test = X_test_combined[male_test]
y_male_test = y_test[male_test]

In [15]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

gender_age_pipe = gender_age_pipe.fit(X_train_combined, np.array(y_train))
y_pred = gender_age_pipe.predict(X_test_combined)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred))
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))

Accuracy: 0.9748185616890257

Classification Report:
              precision    recall  f1-score   support

           0       0.98      0.99      0.98      6217
           1       0.98      0.95      0.96      1084
           2       0.93      0.94      0.94       971
           3       0.96      0.98      0.97       822

    accuracy                           0.97      9094
   macro avg       0.96      0.96      0.96      9094
weighted avg       0.97      0.97      0.97      9094


Confusion Matrix:
[[6126   16   57   18]
 [  39 1026    5   14]
 [  56    1  910    4]
 [  10    6    3  803]]


In [None]:
from sklearn.model_selection import GridSearchCV

param_grid = {
    'gender_model__n_estimators': [100, 200],
    'gender_model__max_depth': [3, 6],
    'gender_model__learning_rate': [0.1, 0.01],

    'male_age_model__n_estimators': [100, 200],
    'male_age_model__max_depth': [3, 6],
    'male_age_model__learning_rate': [0.1, 0.01],

    'female_age_model__n_estimators': [100, 200],
    'female_age_model__max_depth': [3, 6],
    'female_age_model__learning_rate': [0.1, 0.01],
}

grid = GridSearchCV(gender_age_pipe, param_grid, cv=3, scoring='accuracy', verbose=2, n_jobs=-1)
grid.fit(X_train, y_train)

print("Best Score:", grid.best_score_)
print("Best Params:", grid.best_params_)

y_pred = grid.best_estimator_.predict(X_test)
print("Test Accuracy:", accuracy_score(y_test, y_pred))
print("\nTest Classification Report:")
print(classification_report(y_test, y_pred))
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))