In [1]:
import os
import numpy as np
import pandas as pd
import pickle
from sklearn.mixture import GaussianMixture as GMM

In [2]:
# Directories
train_dir = "../Datasets_mfcc_22/Environment 1/Training"
model_save_dir = "../GMM_speaker_models"
os.makedirs(model_save_dir, exist_ok=True)


In [3]:
# Training the GMM models
def train_gmm_models(train_dir, model_save_dir):
    for speaker_folder in os.listdir(train_dir):
        speaker_path = os.path.join(train_dir, speaker_folder)
        if not os.path.isdir(speaker_path):
            continue

        print(f"Training GMM for speaker: {speaker_folder}")
        
        # Collect all speaker data (CSV files)
        mfcc_data = []
        for file in os.listdir(speaker_path):
            if file.endswith('.csv'):
                file_path = os.path.join(speaker_path, file)
                data = pd.read_csv(file_path, header=None).values
                mfcc_data.append(data)

        # Combine all data
        mfcc_data = np.vstack(mfcc_data)

        # Train GMM
        gmm = GMM(n_components=16, covariance_type='diag', max_iter=200, random_state=42)
        gmm.fit(mfcc_data)

        # Save the model
        pickle_file = f"{speaker_folder}.gmm"
        pickle_path = os.path.join(model_save_dir, pickle_file)
        with open(pickle_path, 'wb') as f:
            pickle.dump(gmm, f)
        print(f"Model saved for {speaker_folder}: {pickle_path}")

In [4]:
# Train the models
train_gmm_models(train_dir, model_save_dir)

Training GMM for speaker: Abdelrahman
Model saved for Abdelrahman: ../GMM_speaker_models\Abdelrahman.gmm
Training GMM for speaker: Omar
Model saved for Omar: ../GMM_speaker_models\Omar.gmm
Training GMM for speaker: Reem
Model saved for Reem: ../GMM_speaker_models\Reem.gmm
Training GMM for speaker: Renad
Model saved for Renad: ../GMM_speaker_models\Renad.gmm
Training GMM for speaker: Sherif
Model saved for Sherif: ../GMM_speaker_models\Sherif.gmm
Training GMM for speaker: Youssef
Model saved for Youssef: ../GMM_speaker_models\Youssef.gmm


In [11]:
# Directories for testing
test_dirs = [
    "../Datasets_mfcc_22/Environment 1/Testing",
    "../Datasets_mfcc_22/Environment 2"
]


In [12]:
# Function to load GMM models
def load_gmm_models(model_save_dir):
    models = {}
    for model_file in os.listdir(model_save_dir):
        if model_file.endswith(".gmm"):
            model_path = os.path.join(model_save_dir, model_file)
            speaker_name = model_file.replace(".gmm", "")
            with open(model_path, 'rb') as f:
                models[speaker_name] = pickle.load(f)
    return models

# Evaluate models on testing data
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

# Function to evaluate GMM models
def evaluate_gmm_models(gmm_models, test_dir):
    results = []
    y_true = []
    y_pred = []
    
    # Iterate through all CSV files in the test directory
    for folder in os.listdir(test_dir):
        folder_path = os.path.join(test_dir, folder)
        if os.path.isdir(folder_path):  # Ensure we're only looking at folders
            for file in os.listdir(folder_path):
                if file.endswith(".csv"):  # Only consider CSV files
                    file_path = os.path.join(folder_path, file)
                    data = pd.read_csv(file_path).values  # Read the CSV data
                    
                    # Extract the actual speaker's name from the folder name
                    actual_speaker = folder
                    
                    # Compute likelihoods for each GMM model
                    scores = []
                    for speaker, model in gmm_models.items():
                        score = model.score(data)
                        scores.append(score)
                    
                    # Predict the speaker with the highest score
                    predicted_speaker = max(zip(gmm_models.keys(), scores), key=lambda x: x[1])[0]
                    
                    # Append results for each file
                    results.append((file, actual_speaker, predicted_speaker))
                    y_true.append(actual_speaker)
                    y_pred.append(predicted_speaker)
    
    return results, y_true, y_pred



In [13]:
# Load models
gmm_models = load_gmm_models(model_save_dir)
gmm_models

{'Abdelrahman': GaussianMixture(covariance_type='diag', max_iter=200, n_components=16,
                 random_state=42),
 'Omar': GaussianMixture(covariance_type='diag', max_iter=200, n_components=16,
                 random_state=42),
 'Reem': GaussianMixture(covariance_type='diag', max_iter=200, n_components=16,
                 random_state=42),
 'Renad': GaussianMixture(covariance_type='diag', max_iter=200, n_components=16,
                 random_state=42),
 'Sherif': GaussianMixture(covariance_type='diag', max_iter=200, n_components=16,
                 random_state=42),
 'Youssef': GaussianMixture(covariance_type='diag', max_iter=200, n_components=16,
                 random_state=42)}

In [15]:
for test_dir in test_dirs:
    print(f"\nEvaluating models on: {test_dir}")
    
    # Evaluate models and get results, true labels, and predicted labels
    results, y_true, y_pred = evaluate_gmm_models(gmm_models, test_dir)
    
    # Print file-level evaluation results
    # for file, actual_speaker, predicted_speaker in results:
    #     print(f"File: {file}, Actual: {actual_speaker}, Predicted: {predicted_speaker}")
    
    # Calculate accuracy and other metrics
    accuracy = accuracy_score(y_true, y_pred)
    precision, recall, f1, _ = precision_recall_fscore_support(y_true, y_pred, average='weighted')  # Weighted average for multi-class
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision (weighted): {precision:.4f}")
    print(f"Recall (weighted): {recall:.4f}")
    print(f"F1 Score (weighted): {f1:.4f}")
    
    # Optionally, save the results in a CSV file
    results_df = pd.DataFrame(results, columns=["File", "Actual", "Predicted"])
    result_file = f"gmm_evaluation_results_{test_dir.split('/')[-1]}.csv"
    results_df.to_csv(result_file, index=False)
    print(f"Evaluation results saved to: {result_file}")


Evaluating models on: ../Datasets_mfcc_22/Environment 1/Testing
Accuracy: 1.0000
Precision (weighted): 1.0000
Recall (weighted): 1.0000
F1 Score (weighted): 1.0000
Evaluation results saved to: gmm_evaluation_results_Testing.csv

Evaluating models on: ../Datasets_mfcc_22/Environment 2
Accuracy: 0.3125
Precision (weighted): 0.2816
Recall (weighted): 0.3125
F1 Score (weighted): 0.2831
Evaluation results saved to: gmm_evaluation_results_Environment 2.csv
