In [1]:
"""
Phoneme articulations are naturally distinguishable on the manifold of SPD matrices.

Subjects articulate 38 distinct phonemes that span the entire English language phonetic space in `silent' and `audible' manner. 

38 phonemes are (labels are given in brackets beside the phoneme)

Bilabial consonants: Baa (0), Paa (1), Maa (2)
Labiodental consonants: Faa (3), Vaa (4)
Dental consonants: Thaa (5), Dhaa (6)
Alvelor consonants: Taa (7), Daa (8), Naa (9), Saa (10), Zaa (11)
Post vaelor consonants: Chaa (12), Shaa (13), Jhaa (14), Zhaa (15)
Velar consonants: Kaa (!6), Gaa (17), NGaa (18)
Approximant consonants: Yaa (19), Raa (20), Laa (21), Waa (22)
Vowels:
OY as in bOY (23), OW as in nOW (24),
AO as in OUght (25), AA as in fAther (26),
AE as in At (27), EH as in mEt (28),
EY as in mAte (29), IY as in mEET (30),
IH as in It (31), AH as in HUt (32),
UW as in fOOD (33), ER as in hER (34),
UH as in hOOD (35)

DATA is given in a numpy array of dimensions (380, 22, 7500) - (38 phonemes each repeated 10 times, 22 channels, 7500 time samples).
Raw data was filtered using 3rd order Butterworth bandpass filter between 80 and 1000 Hertz.
"""

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from basicOperations.manifoldOperations import matrixDistance, frechetMean

In [2]:
subjects = ["Subject1", "Subject2", "Subject3", "Subject4", "Subject5", "Subject6", "Subject7", "Subject8", "Subject9", "Subject10", "Subject12"]

In [3]:
numberPhonemes = 38
numberConsonantPhoneme = 23
numberVowelPhoneme = 15

trialsPerPhoneme = 10
numberTrials = numberPhonemes * trialsPerPhoneme
numberChannels = 22
windowLength = 7500

In [4]:
geodesicDistance = matrixDistance()
manifoldMean = frechetMean()

In [5]:
allAccuracyVoiced = []
for subject in subjects:
    print(subject)

    DATA = np.load("Experiment1/Phoneme/Voiced" + subject + ".npy")
   
    mean = np.mean(DATA, axis = -1)
    std = np.std(DATA, axis = -1)
    voicedDATA = (DATA - mean[..., np.newaxis])/(std[..., np.newaxis] + 1e-5)

    phonemeMatrices = voicedDATA
    labelsByPhonemes = np.array([[i] * trialsPerPhoneme for i in range(numberPhonemes)]).reshape(numberTrials)

    Indices =  {}
    for i in range(numberPhonemes):
        Indices[i] = []
    for i in range(len(labelsByPhonemes)):
        Indices[labelsByPhonemes[i]].append(i)

    covariancesLabels = np.zeros((numberPhonemes, trialsPerPhoneme, numberChannels, numberChannels))
    for i in range(numberPhonemes):
        for j in range(trialsPerPhoneme):
            covariancesLabels[i, j] = 1/windowLength * (phonemeMatrices[Indices[i][j]] @ phonemeMatrices[Indices[i][j]].T)

    trainFeatures = np.zeros((numberPhonemes * 6, numberChannels, numberChannels))
    trainLabels = np.zeros((numberPhonemes * 6))
    count = 0
    for i in range(numberPhonemes):
        trainFeatures[count:count + 3] = covariancesLabels[i, :3]
        trainFeatures[count + 3:count + 6] = covariancesLabels[i, 5:8]
        trainLabels[count:count + 6] = [i] * 6
        count += 6

    testFeatures = np.zeros((numberPhonemes * 4, numberChannels, numberChannels))
    testLabels = np.zeros((numberPhonemes * 4))
    count = 0
    for i in range(numberPhonemes):
        testFeatures[count:count + 2] = covariancesLabels[i, 3:5]
        testFeatures[count + 2:count + 4] = covariancesLabels[i, 8:10]
        testLabels[count:count + 4] = [i] * 4
        count += 4

    trainCentroid = np.zeros((numberPhonemes, numberChannels, numberChannels))
    for i in range(numberPhonemes):
        trainCentroid[i, :, :] = manifoldMean.mean(trainFeatures[i * 6: i * 6 + 6])

    predictLabels = np.zeros((4 * numberPhonemes))
    for k in range(4 * numberPhonemes):
        distances = np.zeros((numberPhonemes))
        for m in range(numberPhonemes):
            distances[m] = geodesicDistance.distance(testFeatures[k], trainCentroid[m])
        predictLabels[k] = np.argmin(distances)

    correct = (predictLabels == testLabels)
    print(np.mean(correct))
    print(" ")
    allAccuracyVoiced.append(np.mean(correct))

Subject1
0.375
 
Subject2
0.5131578947368421
 
Subject3
0.2631578947368421
 
Subject4
0.19736842105263158
 
Subject5
0.47368421052631576
 
Subject6
0.5
 
Subject7
0.19078947368421054
 
Subject8
0.32894736842105265
 
Subject9
0.35526315789473684
 
Subject10
0.4144736842105263
 
Subject12
0.34868421052631576
 


In [8]:
print(np.mean(allAccuracyVoiced))

0.3600478468899521


In [7]:
allAccuracyUnvoiced = []
for subject in subjects:
    print(subject)

    DATA = np.load("Experiment1/Phoneme/Unvoiced" + subject + ".npy")
   
    mean = np.mean(DATA, axis = -1)
    std = np.std(DATA, axis = -1)
    unvoicedDATA = (DATA - mean[..., np.newaxis])/(std[..., np.newaxis] + 1e-5)

    phonemeMatrices = unvoicedDATA
    labelsByPhonemes = np.array([[i] * trialsPerPhoneme for i in range(numberPhonemes)]).reshape(numberTrials)

    Indices =  {}
    for i in range(numberPhonemes):
        Indices[i] = []
    for i in range(len(labelsByPhonemes)):
        Indices[labelsByPhonemes[i]].append(i)

    covariancesLabels = np.zeros((numberPhonemes, trialsPerPhoneme, numberChannels, numberChannels))
    for i in range(numberPhonemes):
        for j in range(trialsPerPhoneme):
            covariancesLabels[i, j] = 1/windowLength * (phonemeMatrices[Indices[i][j]] @ phonemeMatrices[Indices[i][j]].T)

    trainFeatures = np.zeros((numberPhonemes * 6, numberChannels, numberChannels))
    trainLabels = np.zeros((numberPhonemes * 6))
    count = 0
    for i in range(numberPhonemes):
        trainFeatures[count:count + 3] = covariancesLabels[i, :3]
        trainFeatures[count + 3:count + 6] = covariancesLabels[i, 5:8]
        trainLabels[count:count + 6] = [i] * 6
        count += 6

    testFeatures = np.zeros((numberPhonemes * 4, numberChannels, numberChannels))
    testLabels = np.zeros((numberPhonemes * 4))
    count = 0
    for i in range(numberPhonemes):
        testFeatures[count:count + 2] = covariancesLabels[i, 3:5]
        testFeatures[count + 2:count + 4] = covariancesLabels[i, 8:10]
        testLabels[count:count + 4] = [i] * 4
        count += 4

    trainCentroid = np.zeros((numberPhonemes, numberChannels, numberChannels))
    for i in range(numberPhonemes):
        trainCentroid[i, :, :] = manifoldMean.mean(trainFeatures[i * 6: i * 6 + 6])

    predictLabels = np.zeros((4 * numberPhonemes))
    for k in range(4 * numberPhonemes):
        distances = np.zeros((numberPhonemes))
        for m in range(numberPhonemes):
            distances[m] = geodesicDistance.distance(testFeatures[k], trainCentroid[m])
        predictLabels[k] = np.argmin(distances)

    correct = (predictLabels == testLabels)
    print(np.mean(correct))
    print(" ")
    allAccuracyUnvoiced.append(np.mean(correct))

Subject1
0.5
 
Subject2
0.4934210526315789
 
Subject3
0.28289473684210525
 
Subject4
0.32894736842105265
 
Subject5
0.4473684210526316
 
Subject6
0.45394736842105265
 
Subject7
0.26973684210526316
 
Subject8
0.3026315789473684
 
Subject9
0.23026315789473684
 
Subject10
0.3092105263157895
 
Subject12
0.3157894736842105
 


In [9]:
print(np.mean(allAccuracyUnvoiced))

0.3576555023923445


In [10]:
allConsonantAccuracyVoiced = []
for subject in subjects:
    print(subject)

    DATA = np.load("Experiment1/Phoneme/Voiced" + subject + ".npy")
   
    mean = np.mean(DATA, axis = -1)
    std = np.std(DATA, axis = -1)
    voicedDATA = (DATA - mean[..., np.newaxis])/(std[..., np.newaxis] + 1e-5)

    phonemeMatrices = voicedDATA
    labelsByPhonemes = np.array([[i] * trialsPerPhoneme for i in range(numberConsonantPhoneme)]).reshape(numberConsonantPhoneme * trialsPerPhoneme)

    Indices =  {}
    for i in range(numberConsonantPhoneme):
        Indices[i] = []
    for i in range(len(labelsByPhonemes)):
        Indices[labelsByPhonemes[i]].append(i)

    covariancesLabels = np.zeros((numberConsonantPhoneme, trialsPerPhoneme, numberChannels, numberChannels))
    for i in range(numberConsonantPhoneme):
        for j in range(trialsPerPhoneme):
            covariancesLabels[i, j] = 1/windowLength * (phonemeMatrices[Indices[i][j]] @ phonemeMatrices[Indices[i][j]].T)

    trainFeatures = np.zeros((numberConsonantPhoneme * 6, numberChannels, numberChannels))
    trainLabels = np.zeros((numberConsonantPhoneme * 6))
    count = 0
    for i in range(numberConsonantPhoneme):
        trainFeatures[count:count + 3] = covariancesLabels[i, :3]
        trainFeatures[count + 3:count + 6] = covariancesLabels[i, 5:8]
        trainLabels[count:count + 6] = [i] * 6
        count += 6

    testFeatures = np.zeros((numberConsonantPhoneme * 4, numberChannels, numberChannels))
    testLabels = np.zeros((numberConsonantPhoneme * 4))
    count = 0
    for i in range(numberConsonantPhoneme):
        testFeatures[count:count + 2] = covariancesLabels[i, 3:5]
        testFeatures[count + 2:count + 4] = covariancesLabels[i, 8:10]
        testLabels[count:count + 4] = [i] * 4
        count += 4

    trainCentroid = np.zeros((numberConsonantPhoneme, numberChannels, numberChannels))
    for i in range(numberConsonantPhoneme):
        trainCentroid[i, :, :] = manifoldMean.mean(trainFeatures[i * 6: i * 6 + 6])

    predictLabels = np.zeros((4 * numberConsonantPhoneme))
    for k in range(4 * numberConsonantPhoneme):
        distances = np.zeros((numberConsonantPhoneme))
        for m in range(numberConsonantPhoneme):
            distances[m] = geodesicDistance.distance(testFeatures[k], trainCentroid[m])
        predictLabels[k] = np.argmin(distances)

    correct = (predictLabels == testLabels)
    print(np.mean(correct))
    print(" ")
    allConsonantAccuracyVoiced.append(np.mean(correct))

Subject1
0.33695652173913043
 
Subject2
0.4673913043478261
 
Subject3
0.18478260869565216
 
Subject4
0.22826086956521738
 
Subject5
0.5217391304347826
 
Subject6
0.45652173913043476
 
Subject7
0.25
 
Subject8
0.45652173913043476
 
Subject9
0.2608695652173913
 
Subject10
0.41304347826086957
 
Subject12
0.41304347826086957
 


In [11]:
print(np.mean(allConsonantAccuracyVoiced))

0.36264822134387353


In [12]:
allConsonantAccuracyUnvoiced = []
for subject in subjects:
    print(subject)

    DATA = np.load("Experiment1/Phoneme/Unvoiced" + subject + ".npy")
   
    mean = np.mean(DATA, axis = -1)
    std = np.std(DATA, axis = -1)
    unvoicedDATA = (DATA - mean[..., np.newaxis])/(std[..., np.newaxis] + 1e-5)

    phonemeMatrices = unvoicedDATA
    labelsByPhonemes = np.array([[i] * trialsPerPhoneme for i in range(numberConsonantPhoneme)]).reshape(numberConsonantPhoneme * trialsPerPhoneme)

    Indices =  {}
    for i in range(numberConsonantPhoneme):
        Indices[i] = []
    for i in range(len(labelsByPhonemes)):
        Indices[labelsByPhonemes[i]].append(i)

    covariancesLabels = np.zeros((numberConsonantPhoneme, trialsPerPhoneme, numberChannels, numberChannels))
    for i in range(numberConsonantPhoneme):
        for j in range(trialsPerPhoneme):
            covariancesLabels[i, j] = 1/windowLength * (phonemeMatrices[Indices[i][j]] @ phonemeMatrices[Indices[i][j]].T)

    trainFeatures = np.zeros((numberConsonantPhoneme * 6, numberChannels, numberChannels))
    trainLabels = np.zeros((numberConsonantPhoneme * 6))
    count = 0
    for i in range(numberConsonantPhoneme):
        trainFeatures[count:count + 3] = covariancesLabels[i, :3]
        trainFeatures[count + 3:count + 6] = covariancesLabels[i, 5:8]
        trainLabels[count:count + 6] = [i] * 6
        count += 6

    testFeatures = np.zeros((numberConsonantPhoneme * 4, numberChannels, numberChannels))
    testLabels = np.zeros((numberConsonantPhoneme * 4))
    count = 0
    for i in range(numberConsonantPhoneme):
        testFeatures[count:count + 2] = covariancesLabels[i, 3:5]
        testFeatures[count + 2:count + 4] = covariancesLabels[i, 8:10]
        testLabels[count:count + 4] = [i] * 4
        count += 4

    trainCentroid = np.zeros((numberConsonantPhoneme, numberChannels, numberChannels))
    for i in range(numberConsonantPhoneme):
        trainCentroid[i, :, :] = manifoldMean.mean(trainFeatures[i * 6: i * 6 + 6])

    predictLabels = np.zeros((4 * numberConsonantPhoneme))
    for k in range(4 * numberConsonantPhoneme):
        distances = np.zeros((numberConsonantPhoneme))
        for m in range(numberConsonantPhoneme):
            distances[m] = geodesicDistance.distance(testFeatures[k], trainCentroid[m])
        predictLabels[k] = np.argmin(distances)

    correct = (predictLabels == testLabels)
    print(np.mean(correct))
    print(" ")
    allConsonantAccuracyUnvoiced.append(np.mean(correct))

Subject1
0.4673913043478261
 
Subject2
0.42391304347826086
 
Subject3
0.29347826086956524
 
Subject4
0.3804347826086957
 
Subject5
0.41304347826086957
 
Subject6
0.41304347826086957
 
Subject7
0.2826086956521739
 
Subject8
0.32608695652173914
 
Subject9
0.17391304347826086
 
Subject10
0.22826086956521738
 
Subject12
0.391304347826087
 


In [13]:
print(np.mean(allConsonantAccuracyUnvoiced))

0.34486166007905134


In [15]:
allVowelAccuracyVoiced = []
for subject in subjects:
    print(subject)

    DATA = np.load("Experiment1/Phoneme/Voiced" + subject + ".npy")
   
    mean = np.mean(DATA, axis = -1)
    std = np.std(DATA, axis = -1)
    voicedDATA = (DATA - mean[..., np.newaxis])/(std[..., np.newaxis] + 1e-5)

    phonemeMatrices = voicedDATA
    labelsByPhonemes = np.array([[i] * trialsPerPhoneme for i in range(numberVowelPhoneme)]).reshape(numberVowelPhoneme * trialsPerPhoneme)

    Indices =  {}
    for i in range(numberVowelPhoneme):
        Indices[i] = []
    for i in range(len(labelsByPhonemes)):
        Indices[labelsByPhonemes[i]].append(i)

    covariancesLabels = np.zeros((numberVowelPhoneme, trialsPerPhoneme, numberChannels, numberChannels))
    for i in range(numberVowelPhoneme):
        for j in range(trialsPerPhoneme):
            covariancesLabels[i, j] = 1/windowLength * (phonemeMatrices[230 + Indices[i][j]] @ phonemeMatrices[230 + Indices[i][j]].T)

    trainFeatures = np.zeros((numberVowelPhoneme * 6, numberChannels, numberChannels))
    trainLabels = np.zeros((numberVowelPhoneme * 6))
    count = 0
    for i in range(numberVowelPhoneme):
        trainFeatures[count:count + 3] = covariancesLabels[i, :3]
        trainFeatures[count + 3:count + 6] = covariancesLabels[i, 5:8]
        trainLabels[count:count + 6] = [i] * 6
        count += 6

    testFeatures = np.zeros((numberVowelPhoneme * 4, numberChannels, numberChannels))
    testLabels = np.zeros((numberVowelPhoneme * 4))
    count = 0
    for i in range(numberVowelPhoneme):
        testFeatures[count:count + 2] = covariancesLabels[i, 3:5]
        testFeatures[count + 2:count + 4] = covariancesLabels[i, 8:10]
        testLabels[count:count + 4] = [i] * 4
        count += 4

    trainCentroid = np.zeros((numberVowelPhoneme, numberChannels, numberChannels))
    for i in range(numberVowelPhoneme):
        trainCentroid[i, :, :] = manifoldMean.mean(trainFeatures[i * 6: i * 6 + 6])

    predictLabels = np.zeros((4 * numberVowelPhoneme))
    for k in range(4 * numberVowelPhoneme):
        distances = np.zeros((numberVowelPhoneme))
        for m in range(numberVowelPhoneme):
            distances[m] = geodesicDistance.distance(testFeatures[k], trainCentroid[m])
        predictLabels[k] = np.argmin(distances)

    correct = (predictLabels == testLabels)
    print(np.mean(correct))
    print(" ")
    allVowelAccuracyVoiced.append(np.mean(correct))

Subject1
0.5166666666666667
 
Subject2
0.6666666666666666
 
Subject3
0.45
 
Subject4
0.25
 
Subject5
0.5333333333333333
 
Subject6
0.5833333333333334
 
Subject7
0.2
 
Subject8
0.3333333333333333
 
Subject9
0.6
 
Subject10
0.48333333333333334
 
Subject12
0.38333333333333336
 


In [16]:
print(np.mean(allVowelAccuracyVoiced))

0.45454545454545453


In [17]:
allVowelAccuracyUnvoiced = []
for subject in subjects:
    print(subject)

    DATA = np.load("Experiment1/Phoneme/Unvoiced" + subject + ".npy")
   
    mean = np.mean(DATA, axis = -1)
    std = np.std(DATA, axis = -1)
    unvoicedDATA = (DATA - mean[..., np.newaxis])/(std[..., np.newaxis] + 1e-5)

    phonemeMatrices = unvoicedDATA
    labelsByPhonemes = np.array([[i] * trialsPerPhoneme for i in range(numberVowelPhoneme)]).reshape(numberVowelPhoneme * trialsPerPhoneme)

    Indices =  {}
    for i in range(numberVowelPhoneme):
        Indices[i] = []
    for i in range(len(labelsByPhonemes)):
        Indices[labelsByPhonemes[i]].append(i)

    covariancesLabels = np.zeros((numberVowelPhoneme, trialsPerPhoneme, numberChannels, numberChannels))
    for i in range(numberVowelPhoneme):
        for j in range(trialsPerPhoneme):
            covariancesLabels[i, j] = 1/windowLength * (phonemeMatrices[230 + Indices[i][j]] @ phonemeMatrices[230 + Indices[i][j]].T)

    trainFeatures = np.zeros((numberVowelPhoneme * 6, numberChannels, numberChannels))
    trainLabels = np.zeros((numberVowelPhoneme * 6))
    count = 0
    for i in range(numberVowelPhoneme):
        trainFeatures[count:count + 3] = covariancesLabels[i, :3]
        trainFeatures[count + 3:count + 6] = covariancesLabels[i, 5:8]
        trainLabels[count:count + 6] = [i] * 6
        count += 6

    testFeatures = np.zeros((numberVowelPhoneme * 4, numberChannels, numberChannels))
    testLabels = np.zeros((numberVowelPhoneme * 4))
    count = 0
    for i in range(numberVowelPhoneme):
        testFeatures[count:count + 2] = covariancesLabels[i, 3:5]
        testFeatures[count + 2:count + 4] = covariancesLabels[i, 8:10]
        testLabels[count:count + 4] = [i] * 4
        count += 4

    trainCentroid = np.zeros((numberVowelPhoneme, numberChannels, numberChannels))
    for i in range(numberVowelPhoneme):
        trainCentroid[i, :, :] = manifoldMean.mean(trainFeatures[i * 6: i * 6 + 6])

    predictLabels = np.zeros((4 * numberVowelPhoneme))
    for k in range(4 * numberVowelPhoneme):
        distances = np.zeros((numberVowelPhoneme))
        for m in range(numberVowelPhoneme):
            distances[m] = geodesicDistance.distance(testFeatures[k], trainCentroid[m])
        predictLabels[k] = np.argmin(distances)

    correct = (predictLabels == testLabels)
    print(np.mean(correct))
    print(" ")
    allVowelAccuracyUnvoiced.append(np.mean(correct))

Subject1
0.5666666666666667
 
Subject2
0.6333333333333333
 
Subject3
0.3
 
Subject4
0.43333333333333335
 
Subject5
0.5833333333333334
 
Subject6
0.5833333333333334
 
Subject7
0.3333333333333333
 
Subject8
0.4166666666666667
 
Subject9
0.5
 
Subject10
0.6333333333333333
 
Subject12
0.4
 


In [18]:
print(np.mean(allVowelAccuracyUnvoiced))

0.4893939393939393
