## AstNet inference code for EMA Time-Scaling

#### Import libraries and setup matplotlib

In [None]:
import matplotlib
%matplotlib inline
import matplotlib.pylab as plt

import IPython.display as ipd

import sys
sys.path.append('waveglow/')
import numpy as np
import torch

from hparams import create_hparams
from model import Tacotron2
from layers import TacotronSTFT, STFT
from audio_processing import griffin_lim
from train import load_model

#my libraries
import scipy.io as sio
from phoneme_to_seq import *
from fastdtw import fastdtw
import random
from scipy.spatial.distance import euclidean
from scipy.stats import pearsonr

os.environ["CUDA_VISIBLE_DEVICES"]="1"

In [None]:
def plot_data(data, figsize=(16, 4)):
    fig, axes = plt.subplots(1, len(data), figsize=figsize)
    for i in range(len(data)):
        axes[i].imshow(data[i], aspect='auto', origin='bottom', 
                       interpolation='none')

#### Setup hparams

In [None]:
hparams = create_hparams()
hparams.sampling_rate = 22050
hparams.max_decoder_steps=1500

#### Load model from checkpoint

In [None]:
checkpoint_path = "checkpoint page"
print("loading: \n",checkpoint_path)
model = load_model(hparams)
model.load_state_dict(torch.load(checkpoint_path)['state_dict'])

_ = model.cuda().eval().half()

#### Prepare text input

In [None]:
dist_arr=np.zeros((4,5,115))
lengths=np.zeros((4,5,2,115))
Big_C=np.zeros((4,5))

In [None]:
# Inference over all subjects
predictions={}
for f_no in range(1,5):
    dd=[];
    sub_list=[] # subject list
    for sub in sub_list:
        if f_no==1:
            predictions[sub]={}
        checkpoint_path = "" # respective subject checkpoint path
        print("loading: \n",checkpoint_path)
        model = load_model(hparams)
        model.load_state_dict(torch.load(checkpoint_path)['state_dict'])
        _ = model.cuda().eval().half()

        print(sub)
        test_file= open("path to test filelist",'r')
        print("Testin on : ",test_file.name)
        lines=test_file.readlines()
        coefficients=[]
        rMSE=[]
        dist=[]
        for line in lines:
            path=line.split('|')
            sequence = np.expand_dims(clip_ema_silence(path[1][:-1]),axis=0)
            #print(sequence.shape)
            sequence = torch.autograd.Variable(torch.from_numpy(sequence)).cuda().half()

            ##### Decode text input and plot results

            torch.manual_seed(1234)
            mel_outputs, mel_outputs_postnet, _, alignments = model.inference(sequence.transpose(1,2))
            X=clip_ema_silence(path[0])
            Y=mel_outputs[0].cpu().detach().numpy().transpose()
            #storing predictions
            predictions[sub][str(lines.index(line)+(f_no-1)*115)]=Y
            ##lengths of gt and predicted sequences
            lengths[f_no-1][sub_list.index(sub)][0][lines.index(line)]=X.shape[0]
            lengths[f_no-1][sub_list.index(sub)][1][lines.index(line)]=Y.shape[0]
            #print(X.shape,Y.shape)
            dis,pth=fastdtw(X,Y, dist=euclidean)
            dist.append(dis/len(pth))
            for artic in range(0,18):
                out=[]
                gt=[]
                for i in range(0,len(pth)):
                    out.append(Y[pth[i][1]][artic])
                    #for i in range(0,Yout.shape[0]):
                    gt.append(X[pth[i][0]][artic])
                coef=pearsonr(out,gt)
                coefficients.append(coef)
                rMSE.append(np.sqrt(np.mean(np.square(np.asarray(out)-np.asarray(gt)))))
        Big_C[f_no-1][sub_list.index(sub)]=np.mean(coefficients,axis=0)[0]
        dist_arr[f_no-1][sub_list.index(sub)]=np.asarray(dist)
        print('RMSE : ',np.mean(rMSE,axis=0))      
        print('CC : ',np.mean(coefficients,axis=0))
        print('DTW_distance : ',np.mean(dist,axis=0))
        dd.append(np.mean(dist,axis=0))
    print(np.mean(np.asarray(dd)))
    

In [None]:
Big_C

In [None]:
predictions['Advith'].keys()
#sio.savemat('predictions_n2f.mat',{"predictions":predictions})
#import pickle
with open('pred_n2f.pickle','wb') as hdl:
    pickle.dump(predictions,hdl)

In [None]:
fig,a=plt.subplots(1,2)
print(a)
a[0].hist(lengths[0][0][0])
a[0].set_title("Ground-Truth")
a[1].hist(lengths[0][0][1],20)
a[1].set_title("Predicted")
plt.savefig('hist_fold1_n2s.jpg')

In [None]:
#get standard deviation and saving dtw distances
sio.savemat("N2F_pool_36500.mat",{"data":dist_arr})
np.mean(dist_arr,axis=-1)
np.std(dist_arr,axis=-1)

#### Decode text input and plot results

In [None]:
c=np.asarray(coefficients)
c=c[:,0].reshape(575,18)
c=np.mean(c,axis=0)
print("Correlation : \n",c)
rmse=np.asarray(rMSE)
rmse=rmse[:].reshape(575,18)
rmse=np.mean(rmse,axis=0)
print("RMSE : \n", rmse)

In [None]:
##write xlsx
import xlsxwriter
workbk=xlsxwriter.Workbook('excel_sheets/results_'+sub_list[sub]+'.xlsx')
wrksht=workbk.add_worksheet()
print("Writing to ",'excel_sheets/results_'+sub_list[sub]+'.xlsx')
row=0
col=0
for x,y,i in zip(c,rmse,range(0,12)):
    wrksht.write(row,col,x)
    wrksht.write(row+5,col,y)
    col+=1
workbk.close()

<h3>Best Articulatory Plots</h3

In [None]:
coeffi=[]
path=lines[int(coefficients.index(min(coefficients))/18)].split('|')
sequence = np.expand_dims(clip_ema_silence(path[1][:-1]),axis=0)
print(sequence.shape)
sequence = torch.autograd.Variable(torch.from_numpy(sequence)).cuda().half()
##### Decode text input and plot results
torch.manual_seed(1234)
mel_outputs, mel_outputs_postnet, _, alignments = model.inference(sequence.transpose(1,2))
#####plot with ema gt
####plot with dtw aligned and correlation
X=clip_ema_silence(path[0])
Y=mel_outputs[0].cpu().detach().numpy().transpose()
dis,pth=fastdtw(X,Y, dist=euclidean)
for artic in range(0,18):
    out=[]
    gt=[]
    for i in range(0,len(pth)):
        out.append(Y[pth[i][1]][artic])
        #for i in range(0,Yout.shape[0]):
        gt.append(X[pth[i][0]][artic])
    coef=pearsonr(out,gt)
    coeffi.append(coef)
    plt.plot(gt)
    plt.plot(out)
    plt.figure()
print(coeffi[coefficients.index(max(coefficients))%12])
print(np.mean(coeffi,axis=0))

<h1> Alignment for sentence</h1>

In [None]:
#dct=prep_dct()
path='../RateExp/Advith/Neutral/EmaClean/s001Nl01m0001.mat'
sequence = np.expand_dims(clip_ema_silence(path),axis=0)
print(sequence.shape)
sequence = torch.autograd.Variable(torch.from_numpy(sequence)).cuda().half()

##### Decode text input and plot results

torch.manual_seed(1234)
mel_outputs, mel_outputs_postnet, _, alignments = model.inference(sequence.transpose(1,2))
print(sequence.shape)
print(alignments.shape)
plot_data((mel_outputs.float().data.cpu().numpy()[0],
           mel_outputs_postnet.float().data.cpu().numpy()[0],
           alignments.float().data.cpu().numpy()[0].T))
import scipy.io as sio
sio.savemat('alignment_slow.mat',{'align':alignments.float().data.cpu().numpy()[0].T})