In [382]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import multivariate_normal
import random

def read_blocks(datafile):
    blocks = []
    current_block = []

    with open(datafile, 'r') as f:
        for line in f:
            stripped_line = line.strip()
            if stripped_line == "":
                if current_block:
                    blocks.append(np.array(current_block))
                    current_block = []
            else:
                numbers = list(map(float, line.split()))
                if len(numbers) == 13:
                    current_block.append(numbers)

    if current_block:
        blocks.append(np.array(current_block))

    return blocks


def calc_distance(frame, mean):
    dist = 0
    for i in range(len(mean)):
        dist += (frame[i] - mean[i])**2
    return dist


def calc_mean(cluster):
    new_mean = [0 for _ in range(13)]
    for frame in cluster:
        for mfcc in range(len(frame)):
            new_mean[mfcc] += frame[mfcc]
    for mfcc in range(len(new_mean)):
        new_mean[mfcc] = new_mean[mfcc] / len(cluster)
    return new_mean


def calc_covariance(cluster, mean):
    output = [[0 for _ in range(13)] for _ in range(13)]
    if (len(cluster) == 0):
        return output

    # full covariance
    # for i in range(13):
    #     for j in range(13):
    #         cov = 0
    #         for x in range(len(cluster)):
    #             cov += (cluster[x][i] - mean[i]) * (cluster[x][j] - mean[j])
    #         output[i][j] = cov / len(cluster)

    # distinct diagonal matrix
    # for i in range(13):
    #     cov = 0
    #     for x in range(len(cluster)):
    #         cov += (cluster[x][i] - mean[i])**2
    #     output[i][i] = cov / len(cluster)

    # distinct spherical matrix
    all_ = 0
    for i in range(13):
        for frame in cluster:
            all_ += (frame[i] - mean[i]) ** 2
    
    for i in range(13):
        output[i][i] = all_ / (13 * len(cluster))
        
    return output


def count(clusters):
    total = 0
    for c in clusters:
        total += len(c)
    return total


def probability(clusters, total):
    probabilities = [0 for _ in range(len(clusters))]
    for i in range(len(clusters)):
        if (len(clusters[i]) == 0):
            continue
        probabilities[i] = (len(clusters[i])/total)
    return probabilities

In [415]:
def calc_tied_covariance(clusters, means):
    output = [[0 for _ in range(13)] for _ in range(13)]

    # tied full matrix
    # count = 0
    # for c in range(len(clusters)):
    #     cluster = clusters[c]
    #     mean = means[c]
    #     count += len(cluster)
    #     if (len(clusters) == 0):
    #         return output
    #     for i in range(13):
    #         for j in range(13):
    #             for frame in cluster:
    #                 output[i][j] += (frame[i] - mean[i]) * (frame[j] - mean[j])

    # for i in range(13):
    #     for j in range(13):
    #         output[i][j] = output[i][j] / count

    # tied diagonal matrix
    # count = 0
    # for c in range(len(clusters)):
    #     cluster = clusters[c]
    #     mean = means[c]
    #     count += len(cluster)
    #     if len(clusters) == 0:
    #         return output
    
    #     for frame in cluster:
    #         for i in range(13):
    #             output[i][i] += (frame[i] - mean[i]) ** 2
    
    # for i in range(13):
    #     output[i][i] = output[i][i] / count

    # tied spherical
    count = 0
    all_ = 0
    for c in range(len(clusters)):
        cluster = clusters[c]
        mean = means[c]
        count += len(cluster)
        if (len(clusters) == 0):
            return output
    
        for frame in cluster:
            for i in range(13):
                all_ += (frame[i] - mean[i]) ** 2
    
    for i in range(13):
        output[i][i] = all_ / (13 * count)

    return output

In [290]:
def k_means(number_clusters, digit_blocks, means):
    # do k means on single digit with 3 means / clusters -> phonemes
    max_iterations = 105
    for i in range(max_iterations):
        # reset phoneme clusters
        clusters = [[] for _ in range(number_clusters)]
    
        # calculate min distance from each frame to mean
        for block in digit_blocks:
            for frame in block:
                distances = [0 for _ in range(number_clusters)]
                for d in range(len(distances)):
                    distances[d] = calc_distance(frame, means[d])
    
                min_distance = distances[0]
                min_dist_index = 0
                for d in range(len(distances)):
                    if distances[d] < min_distance:
                        min_distance = distances[d]
                        min_dist_index = d
    
                clusters[min_dist_index].append(frame)
    
        # recalculate cluster mean after each iteration
        for m in range(len(means)):
            if len(clusters[m]) > 0:
                means[m] = calc_mean(clusters[m])

    return means, clusters

In [292]:
# separate blocks corresponding to a single digit
file = 'Train_Arabic_Digit.txt'
allblocks = read_blocks(file)

number_clusters = 3
number_mfcc = 13

digit_info = [[] for _ in range(10)]

# for each digit, find the means through k-means, then find probabilities and covariance with cluster assignments
for i in range(10):
    digit = i
    a = digit*660
    b = (digit+1)*660
    digit_blocks = allblocks[a:b]
    
    means = [[random.uniform(-2, 2) for _ in range(number_mfcc)] for _ in range(number_clusters)]
    # covariances is a list of 2D arrays corresponding to cluster covariance
    covariances = [np.identity(13) for _ in range(number_clusters)]
    probabilities = [0.5 for _ in range(number_clusters)]
    
    clusters = [[] for _ in range(number_clusters)]
    
    means, clusters = k_means(number_clusters, digit_blocks, means)
    
    number_points = count(clusters)
    probabilities = probability(clusters, number_points)
    covariances = [[[0 for _ in range(number_mfcc)] for _ in range(number_mfcc)] for _ in range(number_clusters)]
    for j in range(len(clusters)):
        covariances[j] = calc_covariance(clusters[j], means[j])
    print(digit_info[i])
    digit_info[i].append(probabilities)
    digit_info[i].append(means)
    digit_info[i].append(covariances)
    print(digit_info[i])

[]
[[0.46825736806031526, 0.32637936943111723, 0.2053632625085675], [[3.3407770759308475, -2.3201856273759813, -0.8704317473396773, -1.842543199484498, -1.2995509648265515, -1.280702162355687, -0.7243978069353217, -0.5353288678087091, 0.010927109765895244, -0.48370889256884014, -0.35965514806952825, -0.24073792924123938, -0.15963305946208015], [-1.4349858443268149, -3.19526502134401, 0.6304359072547583, -0.19764261097742503, 0.013610649956293517, -0.20101451881769272, 0.0657664125395721, -0.03747013555059719, 0.22792718285181796, 0.09735432925830172, 0.16995990202441233, 0.06939547033665829, 0.07303626771492329], [1.0697427514810145, -7.476658260325402, 0.8427045996412202, -0.7821971942782631, 0.6650807369795595, -0.09185192083229056, 0.37496127109511934, 0.31978923653400115, 0.32880323687526064, 0.25897095479620375, 0.23412709915102153, 0.32309110122861817, 0.13255506561952496]], [[[2.259756460635748, -0.3707975923201876, -0.5947874364108047, -0.0801176465838664, -0.3244336203682572, 

In [417]:

covariances = [[[0 for _ in range(number_mfcc)] for _ in range(number_mfcc)] for _ in range(number_clusters)]
for i in range(10):
    covar = calc_tied_covariance(clusters, means)
    for j in range(len(clusters)):
        covariances[j] = covar
    digit_info[i][2] = covariances

In [419]:
from scipy.stats import multivariate_normal

def get_likelihood(x, mean, cov):
    n = multivariate_normal.pdf(x, mean=mean, cov=cov)
    return n

In [421]:
digit_likelihood = []

file = 'Test_Arabic_Digit.txt'
test_blocks = read_blocks(file)

number_clusters = 3

all_predictions = []

for i in range(10):
    digit = i
    a = digit*220
    b = (digit+1)*220
    digit_blocks = test_blocks[a:b]

    digit_predictions = []

    for utterance in digit_blocks:
        max_utterance_likelihood = float('-inf')
        max_utterance_likelihood_digit = 0
    
        # find the likelihood that an utterance is each digit
        for d in range(10):
            likelihood_utterance_is_digit = 0
            
            for frame in utterance:
                max_cluster_likelihood = float('-inf')
                digit_probabilities = digit_info[d][0]
                digit_means = digit_info[d][1]
                digit_covariances = digit_info[d][2]
                
                for j in range(number_clusters):
                    covariance = digit_covariances[j]
                    mean = digit_means[j]
                    prob = digit_probabilities[j]
        
                    epsilon = 1e-10
                    lik = np.log(np.maximum(calc_likelihood(frame, mean, covariance), epsilon))
                    li = np.log(prob) + lik
                    if (li > max_cluster_likelihood):
                        max_cluster_likelihood = li
                
                likelihood_utterance_is_digit += max_cluster_likelihood
                
            if (likelihood_utterance_is_digit > max_utterance_likelihood):
                max_utterance_likelihood = likelihood_utterance_is_digit
                max_utterance_likelihood_digit = d
                
        digit_predictions.append(max_utterance_likelihood_digit)
        
    all_predictions.append(digit_predictions)

print(len(all_predictions))
print(all_predictions[0])

10
[6, 6, 7, 0, 0, 7, 7, 0, 7, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 4, 0, 0, 4, 0, 0, 0, 0, 4, 0, 5, 0, 5, 5, 0, 5, 5, 5, 5, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 7, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 6, 6, 9, 9, 6, 0, 2, 0, 7, 9, 0, 7, 9, 7, 9, 1, 9, 9, 0, 9, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 6, 6, 6, 0, 0, 0, 0, 9, 7, 0, 9, 0, 9, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 9, 3, 3, 3, 3, 0, 9, 9, 0, 9, 9, 9, 9, 0, 9, 9, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0]


In [422]:
print(len(all_predictions))
print(all_predictions[9])

10
[7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 5, 7, 5, 5, 7, 9, 7, 7, 9, 9, 7, 5, 9, 7, 9, 5, 9, 5, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 5, 5, 5, 5, 5, 5, 5, 5, 7, 5, 9, 9, 2, 2, 9, 2, 2, 2, 2, 2, 9, 7, 7, 7, 9, 7, 7, 9, 7, 7, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 7, 9, 7, 9, 9, 6, 6, 2, 9, 5, 5, 5, 5, 9, 6, 9, 9, 9, 9, 4, 4, 4, 4, 4, 4, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 3, 9, 3, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 4, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 6, 9, 9, 3, 3, 9, 9, 9, 9, 9, 7, 9, 9, 9, 9, 9, 3, 9, 9, 9, 9, 9, 9, 3, 9, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7]


In [423]:
confusion_matrix = [[0 for _ in range(10)] for _ in range (10)]
for i in range(len(all_predictions)):
    for j in range(len(all_predictions[i])):
        p = all_predictions[i][j]
        confusion_matrix[i][p] += 1

for i in range(10):
    print(confusion_matrix[i])

[146, 1, 1, 12, 3, 10, 10, 13, 0, 24]
[1, 191, 6, 3, 3, 3, 0, 2, 4, 7]
[0, 1, 203, 3, 1, 5, 0, 1, 6, 0]
[40, 10, 34, 123, 0, 0, 0, 11, 2, 0]
[0, 10, 0, 0, 153, 0, 0, 48, 0, 9]
[14, 0, 10, 1, 6, 100, 0, 41, 0, 48]
[41, 0, 16, 3, 0, 4, 138, 11, 0, 7]
[6, 3, 1, 2, 57, 11, 0, 122, 0, 18]
[0, 11, 37, 2, 0, 0, 0, 1, 164, 5]
[0, 0, 8, 6, 7, 19, 4, 43, 0, 133]


In [424]:
accuracy = [0 for _ in range(10)]
total_accuracy = 0
for i in range(10):
    t = confusion_matrix[i][i]
    total_accuracy += confusion_matrix[i][i]
    accuracy[i] = t/220
total_accuracy /= 220 * 10

In [425]:
for i in range(10):
    print(f"{accuracy[i]:.2%}")
print("\n")
print(f"{total_accuracy:.2%}")

66.36%
86.82%
92.27%
55.91%
69.55%
45.45%
62.73%
55.45%
74.55%
60.45%


66.95%
