In [None]:
import pickle
from collections import Counter
import numpy as np

from hmmlearn import hmm
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, f1_score, accuracy_score

In [169]:
with open("../../data/extracted_features/mfcc_stats_that_v2/mfcc_features.pickle", "rb") as file:
    mfcc_stats_dict = pickle.load(file)

for reader in mfcc_stats_dict.keys():
    print(f"reader: {reader} | # samples {len(mfcc_stats_dict[reader])}")
    # for mfcc, file_id in mfcc_stats_dict[reader]:
    #     print(f"\t 1st 2 mfcc: {mfcc} | # features: {mfcc.shape[0]} | file ID: {file_id}")

reader: 1069 | # samples 73
reader: 201 | # samples 79
reader: 311 | # samples 105
reader: 3240 | # samples 85
reader: 4297 | # samples 107
reader: 7800 | # samples 103
reader: 87 | # samples 103


In [None]:
# reader | gender
# 201    |  man
# 311    |  man
# 3240   |  man
# 87     | woman
# 4297   | woman
# 7800   | woman
# 1069   | woman (low sample size) (deprecated past mfcc_40_features.pickle)

In [170]:
def get_lowest_data(mfcc_dict):
    number_of_samples = []
    for reader in mfcc_dict.keys():
        number_of_samples.append(len(mfcc_dict[reader]))
    
    return min(number_of_samples)

def partition_data(reader, max, split=.8):
    mfccs = [mfcc for mfcc, _ in reader]
    mfccs = mfccs[0:max]

    mfccs_train = mfccs[0:round(len(mfccs)*split)]
    mfccs_test  = mfccs[round(len(mfccs)*split):len(mfccs)]

    return mfccs_train, mfccs_test

def separate_labels(labels, speaker_keys):
    separated_labels = []
    current_index = 0
    for key in speaker_keys:
        separated_labels.append(labels[current_index:current_index+key])
        current_index += key
    return(separated_labels)

def calculate_percentage(data):
    percentages = []
    for sublist in data:
        total_count = len(sublist)
        if total_count == 0:
            percentages.append({})
            continue
        
        count_dict = {}
        for num in sublist:
            count_dict[num] = count_dict.get(num, 0) + 1
        
        percentage_dict = {num: (count / total_count) * 100 for num, count in count_dict.items()}
        percentages.append(percentage_dict)
    
    return percentages

def format_percentages(percentages):
    for i, percentage_dict in enumerate(percentages):
        if not percentage_dict:
            print(f"Sublist {i + 1}: No data")
            continue
        
        # Sort by percentage in descending order
        sorted_percentages = sorted(percentage_dict.items(), key=lambda x: x[1], reverse=True)
        
        # Format and print each sublist
        formatted_str = f"Speaker {i + 1}:\n\t" + ", ".join(
            f"{num}: {percent:.2f}%" for num, percent in sorted_percentages
        )
        print(formatted_str)

def create_truth_list(samples, label_order):
    """
    label_order: 
        0 for 1st half 0 and 2nd half 1
        1 for 1st half 1 and 2nd half 0
    """
    if label_order == 0:
        first_half  = [0 for _ in range(0, samples//2)]
        second_half = [1 for _ in range(0, samples//2)]        
    elif label_order == 1:
        first_half  = [1 for _ in range(0, samples//2)]
        second_half = [0 for _ in range(0, samples//2)]
    else:
        print("use 0 or 1")
    
    first_half.extend(second_half)
    return first_half

def create_binary_labels(length):
    return [(0 if i<length else 1) for i in range(0,2*length)]

def generate_confusion_matrix(model_a, model_b, test_a, test_b):
    """
    Generates a confusion matrix for the given HMM models and test sets.

    Parameters:
    - model_a: Trained HMM model for class A
    - model_b: Trained HMM model for class B
    - test_a: List of test samples for class A
    - test_b: List of test samples for class B

    Returns:
    - [TP, FP
       FN, TN].
    """
    # Define the true labels (0 for class A, 1 for class B)
    true_labels = [0] * len(test_a) + [1] * len(test_b)
    predicted_labels = []

    # Predict class for each sample in test_a
    for sample in test_a:
        log_likelihood_a = model_a.score(sample.reshape(1,-1))
        log_likelihood_b = model_b.score(sample.reshape(1,-1))
        
        predicted_label = 0 if log_likelihood_a > log_likelihood_b else 1
        predicted_labels.append(predicted_label)

    # Predict class for each sample in test_b
    for sample in test_b:
        log_likelihood_a = model_a.score(sample.reshape(1,-1))
        log_likelihood_b = model_b.score(sample.reshape(1,-1))
        
        predicted_label = 0 if log_likelihood_a > log_likelihood_b else 1
        predicted_labels.append(predicted_label)

    # Generate the confusion matrix
    matrix = confusion_matrix(true_labels, predicted_labels)
    return matrix


In [171]:
def train_hmm(features):
    model = hmm.GaussianHMM(n_components=5, covariance_type="diag", n_iter=1000,random_state=42)
    model.fit(features)
    return model

def recognize_speaker(models, test_features):
    scores = {}
    for speaker, model in models.items():
        score = model.score(test_features)
        scores[speaker] = score
    return max(scores, key=scores.get)

In [None]:
####################
# different gender #
# 87 woman 201 man #
####################

max_num_samples = 79

print(f"max number of samples 87 & 201: {max_num_samples}")

train_87_f, test_87_f = partition_data(mfcc_stats_dict["87"], max_num_samples)
train_4297_f, test_4297_f = partition_data(mfcc_stats_dict["4297"], max_num_samples)
train_7800_f, test_7800_f = partition_data(mfcc_stats_dict["7800"], max_num_samples)

train_201_m, test_201_m = partition_data(mfcc_stats_dict["201"], max_num_samples)
train_311_m, test_311_m = partition_data(mfcc_stats_dict["311"], max_num_samples)
train_3240_m, test_3240_m = partition_data(mfcc_stats_dict["3240"], max_num_samples)

speaker_87_model_f = train_hmm(train_87_f)
speaker_4297_model_f = train_hmm(train_4297_f)
speaker_7800_model_f = train_hmm(train_7800_f)

speaker_201_model_m = train_hmm(train_201_m)
speaker_311_model_m = train_hmm(train_311_m)

speaker_models = {"87": speaker_87_model_f, "201": speaker_201_model_m, "311": speaker_311_model_m}

predicted_speaker_87 = recognize_speaker(speaker_models, test_87)
predicted_speaker_201 = recognize_speaker(speaker_models, test_201)



max number of samples 87 & 201: 79


In [160]:
confusion_mat = generate_confusion_matrix(speaker_87_model, speaker_201_model, test_87, test_201)

print(confusion_mat)

[[16  0]
 [ 2 14]]


In [161]:
# [[16  0]
#  [ 1 15]]

In [162]:
confusion_mat = generate_confusion_matrix(speaker_311_model, speaker_201_model, test_311, test_201)

print(confusion_mat)

[[11  5]
 [ 0 16]]


In [None]:
# [[ 5 11]
#  [ 0 16]]

In [118]:
def average_score(model,test_data):
    scores = []
    for data_point in test_data:
        scores.append(model.score(data_point.reshape(1,-1)))
    return scores, np.mean(scores)

In [164]:
scores, avg_score = average_score(speaker_87_model, test_87)
print(f"model 87 and 87: {avg_score}")

scores, avg_score = average_score(speaker_87_model, test_311)
print(f"model 87 and 311: {avg_score}")

scores, avg_score = average_score(speaker_87_model, test_201)
print(f"model 87 and 201: {avg_score}")


model 87 and 87: -596.4729633121453
model 87 and 311: -860.6547799319397
model 87 and 201: -766.22397302437


In [134]:
# model 87 and 87: -441.6667726157509
# model 87 and 311: -693.6383601026669
# model 87 and 201: -612.5665587149906

In [166]:
scores, avg_score = average_score(speaker_201_model, test_201)
print(f"model 201 and 201: {avg_score}")

scores, avg_score = average_score(speaker_201_model, test_311)
print(f"model 201 and 311: {avg_score}")

scores, avg_score = average_score(speaker_201_model, test_87)
print(f"model 201 and 87: {avg_score}")


model 201 and 201: -608.1176310787316
model 201 and 311: -666.0221240448473
model 201 and 87: -846.0205529979579


In [None]:
# model 201 and 201: -435.009762183381
# model 201 and 311: -482.7640005389828
# model 201 and 87: -634.9915213900033