In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import matplotlib.pyplot as plt
from google.colab import drive

In [None]:
drive.mount("/content/drive", force_remount=True)

In [None]:
file_path = '/content/drive/MyDrive/extracted_features_withtrans(new).xlsx'
feature_df = pd.read_excel(file_path)

In [None]:
X = feature_df.drop(columns=['audio_file', 'gender'])
y = feature_df['gender']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

In [None]:


classifier = RandomForestClassifier(
    n_estimators=300, max_depth=25, random_state=42, class_weight='balanced'
)
classifier.fit(X_train, y_train)

y_pred = classifier.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)


In [None]:
print(f"\n Model Accuracy: {accuracy*100:.2f}%")
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred, labels=['female','male','trans']))

In [None]:

importances = classifier.feature_importances_
feat_importance_df = pd.DataFrame({
    'Feature': X.columns,
    'Importance': importances
}).sort_values(by='Importance', ascending=False)
plt.figure(figsize=(10,6))
plt.barh(feat_importance_df['Feature'][:10], feat_importance_df['Importance'][:10], color='skyblue')
plt.gca().invert_yaxis()
plt.xlabel("Importance")
plt.title("Top 10 Important Features")
plt.show()

In [None]:
feature_df['Predicted_Gender'] = classifier.predict(X)
comparison_df = pd.DataFrame({
    'Actual': y,
    'Predicted': feature_df['Predicted_Gender']
})

print("\nSample Comparison:")
print(comparison_df.head(10))

In [None]:
comparison_df.to_csv('/content/drive/MyDrive/comparison_results_withtrans.csv', index=False)
print("\n Comparison results saved as 'comparison_results_withtrans.csv'")