In [1]:
import numpy as np

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim

from g2p_en import G2p
import re

from basicOperations.manifoldOperations import matrixDistance, frechetMean
import torch.nn.utils as utils

from rnn import euclideanRnnNato
import math

import pickle
from Levenshtein import distance
import os

import Levenshtein

In [None]:
""" 
Proof for Figure 5 and Table 7.

Here, we train the model with just 16 articulations of each of the NATO word (total 16 * 26 = 416 articulation in the training set).
We test our model on a much larger dataset of 1967 nato word articulations from the RAINBOW and GRANDFATHER passages.
"""

In [3]:
""" Convert NATO words to phoneme sequences."""

natoAlphabets = [
    "Alfa", "Bravo", "Charlie", "Delta", "Echo", 
    "Foxtrot", "Golf", "Hotel", "India", "Juliette",
    "Kilo", "Lima", "Mike", "November", "Oscar", 
    "Papa", "Quebec", "Romeo", "Sierra", "Tango",
    "Uniform", "Victor", "Whiskey", "X-ray", "Yankee",
    "Zulu"]

PHONE_DEF = ['AA', 'AE', 'AH', 'AO', 'AY', 'B', 'CH', 'D', 'EH', 'ER', 'EY', 'F', 'G', 'HH', 'IH',
 'IY', 'JH', 'K', 'L', 'M', 'N', 'NG', 'OW', 'P', 'R', 'S', 'T', 'UW', 'V', 'W', 'Y', 'Z', 'SIL']

def phoneToId(p):
    return PHONE_DEF.index(p)

g2p = G2p()

phonemizedAlphabets = []
for i in range(len(natoAlphabets)):
    alphabet = natoAlphabets[i].strip()
    alphabet = re.sub(r'[^a-zA-Z\- \']', '', alphabet)
    alphabet = alphabet.replace('--', '').lower()
    phones = []
    for p in g2p(alphabet):
        p = re.sub(r'[0-9]', '', p)   
        if re.match(r'[A-Z]+', p):   
            phones.append(p)
    phonemizedAlphabets.append(phones)

phone2index = []
for i in range(len(phonemizedAlphabets)):
    current = phonemizedAlphabets[i]
    phoneID = []
    for j in range(len(current)):
        phoneID.append(phoneToId(current[j]))
    phone2index.append(phoneID)

phonemizedLabels = np.zeros((26, 8))
for i in range(26):
    phonemizedLabels[i, 0:len(phone2index[i])] = phone2index[i]

labelLengths = np.zeros((26))
for i in range(len(phone2index)):
    labelLengths[i] = len(phone2index[i])

In [4]:
def trainOperation(model,  device, trainLoader, rnnOptimizer, Loss):
    model.train()
    totalLoss = 0
    for inputs, targets, inputLengths, targetLengths in trainLoader:
        inputs, targets = inputs.to(device), targets.to(device)
        inputLengths, targetLengths = inputLengths.to(device), targetLengths.to(device)
        
        rnnOptimizer.zero_grad()

        outputs = model(inputs, inputLengths.cpu())
        loss = Loss(outputs, targets, inputLengths, targetLengths)
        loss.backward()
        rnnOptimizer.step()

        totalLoss += loss.item()
        
    
    return totalLoss / len(trainLoader)


def valOperation(model, device, valLoader, Loss):
    model.eval()
    totalLoss = 0
    with torch.no_grad():
        for inputs, targets, inputLengths, targetLengths in valLoader:
            inputs, targets = inputs.to(device), targets.to(device)
            inputLengths, targetLengths = inputLengths.to(device), targetLengths.to(device)
            
            outputs = model(inputs, inputLengths.cpu()) 
            loss = Loss(outputs, targets, inputLengths, targetLengths)
            totalLoss += loss.item()

    return totalLoss / len(valLoader)

""" Number of NATO aplhabets in training set."""

numberAlphabets = 26
trialsPerAlphabet = 20
numberTrials = numberAlphabets * trialsPerAlphabet
numberChannels = 22
windowLength = 7500

In [5]:
""" Upload data."""

subjectNumber = 1
subject = "Subject" + str(subjectNumber)
DATA = np.load("DATA/" + subject + "/trainSet.npy")

In [6]:
"""z-normalize and chunk data."""

mean = np.mean(DATA, axis = -1)
std = np.std(DATA, axis = -1)
DATA = (DATA - mean[..., np.newaxis])/(std[..., np.newaxis] + 1e-5)

slicedMatrices = np.zeros((numberAlphabets * trialsPerAlphabet, 46, numberChannels, numberChannels))
for j in range(numberAlphabets * trialsPerAlphabet):
    for i in range(46):
        where = i * 150 + 300
        start = where - 300
        End = where + 450
        temp = 1/750 * (DATA[j, :, start:End] @ DATA[j, :, start:End].T)
        slicedMatrices[j, i] = 0.9 * temp + 0.1 * np.trace(temp) * np.eye(numberChannels)

In [7]:
"""Approximately diagonalize the data."""

manifoldMean = frechetMean()
MEAN = manifoldMean.mean(slicedMatrices.reshape(-1, numberChannels, numberChannels))
eigenvalues, eigenvectors = np.linalg.eig(MEAN)

afterMatrices = np.zeros((numberAlphabets * trialsPerAlphabet, 46, numberChannels, numberChannels))
for i in range(numberAlphabets * trialsPerAlphabet):
    for j in range(46):
        temp = eigenvectors.T @ slicedMatrices[i, j] @ eigenvectors
        afterMatrices[i, j] = temp
labelsByAlphabet = np.array([[i] * trialsPerAlphabet for i in range(numberAlphabets)]).reshape(numberTrials)

Indices =  {}
for i in range(numberAlphabets):
    Indices[i] = []
for i in range(len(labelsByAlphabet)):
    Indices[labelsByAlphabet[i]].append(i)

covariancesLabels = np.zeros((numberAlphabets, trialsPerAlphabet, 46, numberChannels, numberChannels))
for i in range(numberAlphabets):
    for j in range(trialsPerAlphabet):
        covariancesLabels[i, j] = afterMatrices[Indices[i][j]] 

In [8]:
np.save("DATA/ckptsNatoWords/eigenVectors" + str(subjectNumber) + ".npy", eigenvectors)

In [9]:
class BaseDataset(Dataset):
    def __init__(self, data, labels, targetLength):
        self.data = data 
        self.labels = labels
        self.targetLength = targetLength

    def __getitem__(self, index):
        inputSeq = self.data[index].astype('float32')  
        targetSeq = self.labels[index]
        inputLength = int(self.data.shape[1])
        targetLength = int(self.targetLength[index])
        return inputSeq, targetSeq, inputLength, targetLength

    def __len__(self):
        return len(self.data)

In [10]:
"""TRAIN-VAL data split."""

trainFeatures = np.zeros((numberAlphabets * 16, 46, numberChannels, numberChannels))
trainLabels = np.zeros((numberAlphabets * 16, 8))
trainLabelLengths = np.zeros((numberAlphabets * 16))
count = 0
for i in range(numberAlphabets):
    trainFeatures[count:count + 4] = covariancesLabels[i, :4]
    trainFeatures[count + 4:count + 8] = covariancesLabels[i, 5:9]
    trainFeatures[count + 8:count + 12] = covariancesLabels[i, 10:14]
    trainFeatures[count + 12:count + 16] = covariancesLabels[i, 15:19]
    
    trainLabels[count:count + 16] = np.tile(phonemizedLabels[i], (16, 1))
    trainLabelLengths[count:count + 16] = np.tile(labelLengths[i], (16))

    count += 16

valFeatures = np.zeros((numberAlphabets * 4, 46, numberChannels, numberChannels))
valLabels = np.zeros((numberAlphabets * 4, 8))
valLabelLengths = np.zeros((numberAlphabets * 4))
count = 0
for i in range(numberAlphabets):
    valFeatures[count] = covariancesLabels[i,4]
    valFeatures[count + 1] = covariancesLabels[i, 9]
    valFeatures[count + 2] = covariancesLabels[i, 14]
    valFeatures[count + 3] = covariancesLabels[i, 19]

    valLabels[count:count + 4] = np.tile(phonemizedLabels[i], (4, 1))
    valLabelLengths[count:count + 4] = np.tile(labelLengths[i], (4))

    count += 4

trainDataset = BaseDataset(trainFeatures, trainLabels, trainLabelLengths)
valDataset = BaseDataset(valFeatures, valLabels, valLabelLengths)
trainDataloader = DataLoader(trainDataset, batch_size = 32, shuffle = True)
valDataloader = DataLoader(valDataset, batch_size = 32, shuffle = False)

In [11]:
dev = "cuda:0"
device = torch.device(dev)

numberEpochs = 100


model = euclideanRnnNato.RnnNet(33, 23, device, numLayers = 1).to(device)
numParams = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(numParams)
lossFunction = nn.CTCLoss(blank = 32, zero_infinity = True)
rnnOptimizer = optim.Adam(model.parameters(), lr = 0.001, weight_decay = 1e-3)

1280121


In [None]:
"""Train the model."""
valLOSS = []
minLOSS = 100
for epoch in range(numberEpochs):
    trainLoss = trainOperation(model, device, trainDataloader, rnnOptimizer, lossFunction)
    valLoss = valOperation(model, device, valDataloader, lossFunction)
    valLOSS.append(valLoss)
    if minLOSS > valLoss:
        minLOSS = valLoss
    torch.save(model.state_dict(), "ckpts/natoEuclidean/" + str(epoch) + ".pt")
    print(f'Epoch: {epoch + 1}/{numberEpochs}, Training loss: {trainLoss:.4f}, Val loss: {valLoss:.4f}')

In [None]:
np.save("ckpts/natoEuclidean/valLoss.npy", valLOSS)