In [1]:
"""
Word articulations are naturally distinguishable on the manifold of SPD matrices.

Subjects articulate 36 distinct words that span the entire English language phonetic space in `silent' and `audible' manner. 


They are - 
0. Eager 
1. lift
2. eight
3. edge
4. cap
5. matted
6. tub
7. box
8. rune
9. rook
10. folder
11. block
12. fun
13. mop
14. pod
15. very
16. went
17. throat
18. this
19. tango
20. doubt
21. not
22. pretty
23. xerox
24. rodent
25. limb
26. batch
27. jeep
28. ship
29. beige
30. yes
31. echo
32. gold
33. sing
34. Uh-oh
35. hiccup

DATA is given in a numpy array of dimensions (360, 22, 7500) - (36 words each repeated 10 times, 22 channels, 7500 time samples).
Raw data was filtered using 3rd order Butterworth bandpass filter between 80 and 1000 Hertz. 
"""

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from basicOperations.manifoldOperations import matrixDistance, frechetMean

In [2]:
subjects = ["Subject1", "Subject2", "Subject3", "Subject4", "Subject5", "Subject6", "Subject7", "Subject8", "Subject9", "Subject10", "Subject11", "Subject12"]

In [4]:
numberWords = 36
trialsPerWord = 10
numberTrials = numberWords * trialsPerWord
numberChannels = 22
windowLength = 7500

In [5]:
geodesicDistance = matrixDistance()
manifoldMean = frechetMean()

In [7]:
allAccuracyVoiced = []
for subject in subjects:
    print(subject)

    DATA = np.load("Experiment1/Words/Voiced" + subject + ".npy")
   
    mean = np.mean(DATA, axis = -1)
    std = np.std(DATA, axis = -1)
    voicedDATA = (DATA - mean[..., np.newaxis])/(std[..., np.newaxis] + 1e-5)

    wordMatrices = voicedDATA
    labelsByWords = np.array([[i] * trialsPerWord for i in range(numberWords)]).reshape(numberTrials)

    Indices =  {}
    for i in range(numberWords):
        Indices[i] = []
    for i in range(len(labelsByWords)):
        Indices[labelsByWords[i]].append(i)

    covariancesLabels = np.zeros((numberWords, trialsPerWord, numberChannels, numberChannels))
    for i in range(numberWords):
        for j in range(trialsPerWord):
            covariancesLabels[i, j] = 1/windowLength * (wordMatrices[Indices[i][j]] @ wordMatrices[Indices[i][j]].T)

    trainFeatures = np.zeros((numberWords * 6, numberChannels, numberChannels))
    trainLabels = np.zeros((numberWords * 6))
    count = 0
    for i in range(numberWords):
        trainFeatures[count:count + 3] = covariancesLabels[i, :3]
        trainFeatures[count + 3:count + 6] = covariancesLabels[i, 5:8]
        trainLabels[count:count + 6] = [i] * 6
        count += 6

    testFeatures = np.zeros((numberWords * 4, numberChannels, numberChannels))
    testLabels = np.zeros((numberWords * 4))
    count = 0
    for i in range(numberWords):
        testFeatures[count:count + 2] = covariancesLabels[i, 3:5]
        testFeatures[count + 2:count + 4] = covariancesLabels[i, 8:10]
        testLabels[count:count + 4] = [i] * 4
        count += 4

    trainCentroid = np.zeros((numberWords, numberChannels, numberChannels))
    for i in range(numberWords):
        trainCentroid[i, :, :] = manifoldMean.mean(trainFeatures[i * 6: i * 6 + 6])

    predictLabels = np.zeros((4 * numberWords))
    for k in range(4 * numberWords):
        distances = np.zeros((numberWords))
        for m in range(numberWords):
            distances[m] = geodesicDistance.distance(testFeatures[k], trainCentroid[m])
        predictLabels[k] = np.argmin(distances)

    correct = (predictLabels == testLabels)
    print(np.mean(correct))
    print(" ")
    allAccuracyVoiced.append(np.mean(correct))

Subject1
0.6319444444444444
 
Subject2
0.6597222222222222
 
Subject3
0.3888888888888889
 
Subject4
0.2638888888888889
 
Subject5
0.6319444444444444
 
Subject6
0.6875
 
Subject7
0.3819444444444444
 
Subject8
0.5486111111111112
 
Subject9
0.6111111111111112
 
Subject10
0.625
 
Subject11
0.5972222222222222
 
Subject12
0.5
 


In [8]:
print(np.mean(allAccuracyVoiced))

0.5439814814814815


In [10]:
allAccuracyUnvoiced = []
for subject in subjects:
    print(subject)

    DATA = np.load("Experiment1/Words/Unvoiced" + subject + ".npy")
   
    mean = np.mean(DATA, axis = -1)
    std = np.std(DATA, axis = -1)
    unvoicedDATA = (DATA - mean[..., np.newaxis])/(std[..., np.newaxis] + 1e-5)

    wordMatrices = unvoicedDATA
    labelsByWords = np.array([[i] * trialsPerWord for i in range(numberWords)]).reshape(numberTrials)

    Indices =  {}
    for i in range(numberWords):
        Indices[i] = []
    for i in range(len(labelsByWords)):
        Indices[labelsByWords[i]].append(i)

    covariancesLabels = np.zeros((numberWords, trialsPerWord, numberChannels, numberChannels))
    for i in range(numberWords):
        for j in range(trialsPerWord):
            covariancesLabels[i, j] = 1/windowLength * (wordMatrices[Indices[i][j]] @ wordMatrices[Indices[i][j]].T)

    trainFeatures = np.zeros((numberWords * 6, numberChannels, numberChannels))
    trainLabels = np.zeros((numberWords * 6))
    count = 0
    for i in range(numberWords):
        trainFeatures[count:count + 3] = covariancesLabels[i, :3]
        trainFeatures[count + 3:count + 6] = covariancesLabels[i, 5:8]
        trainLabels[count:count + 6] = [i] * 6
        count += 6

    testFeatures = np.zeros((numberWords * 4, numberChannels, numberChannels))
    testLabels = np.zeros((numberWords * 4))
    count = 0
    for i in range(numberWords):
        testFeatures[count:count + 2] = covariancesLabels[i, 3:5]
        testFeatures[count + 2:count + 4] = covariancesLabels[i, 8:10]
        testLabels[count:count + 4] = [i] * 4
        count += 4

    trainCentroid = np.zeros((numberWords, numberChannels, numberChannels))
    for i in range(numberWords):
        trainCentroid[i, :, :] = manifoldMean.mean(trainFeatures[i * 6: i * 6 + 6])

    predictLabels = np.zeros((4 * numberWords))
    for k in range(4 * numberWords):
        distances = np.zeros((numberWords))
        for m in range(numberWords):
            distances[m] = geodesicDistance.distance(testFeatures[k], trainCentroid[m])
        predictLabels[k] = np.argmin(distances)

    correct = (predictLabels == testLabels)
    print(np.mean(correct))
    print(" ")
    allAccuracyUnvoiced.append(np.mean(correct))

Subject1
0.3888888888888889
 
Subject2
0.5902777777777778
 
Subject3
0.2638888888888889
 
Subject4
0.24305555555555555
 
Subject5
0.7083333333333334
 
Subject6
0.5694444444444444
 
Subject7
0.125
 
Subject8
0.4722222222222222
 
Subject9
0.3819444444444444
 
Subject10
0.4444444444444444
 
Subject11
0.6736111111111112
 
Subject12
0.4027777777777778
 


In [21]:
print(np.mean(allAccuracyUnvoiced))

0.4386574074074074
