In [1]:
import sys
from collections import defaultdict
import itertools
import math

import torch
import torch.nn as nn
from torch.autograd import Variable
import time
from sklearn import metrics
import numpy as np
import json
import torchtext.vocab

NUM_EPOCHS = 500

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# all the one-hot embedding etc. has to be handled here, rather in data generation,
# because we have to know which word indice go to which real words in order to do the embedding.

#--------------- Model --------------
class Neural_Net(nn.Module):
    def __init__(self, input_size, output_size, layer_lens = [30,40,50], nonlins = [nn.Tanh(), nn.ReLU()], drop_freqs= [.5,.6]):
        super(Neural_Net, self).__init__()

        self.input_size = input_size
        self.output_size = output_size

        self.layers = nn.ModuleList()
        self.act_ftns = nonlins
        self.dropouts = []

        curr_layer_len = input_size
        for l in layer_lens:
            self.layers.append(nn.Linear(curr_layer_len, l, bias = True))
            curr_layer_len = l
        self.layers.append( nn.Linear(curr_layer_len,self.output_size))

        for f in drop_freqs:
             self.dropouts.append(nn.Dropout(f))

               

    def forward(self, x):
        import itertools
        components = list(itertools.zip_longest(self.layers,self.act_ftns, self.dropouts))
        for layer, act_ftn, dropout in components:
            #init components and forward through
            if layer != None:
                x = layer(x)
            if act_ftn != None:
                x = act_ftn(x)
            if dropout != None:
                x = dropout(x)
        x = nn.LogSoftmax()(x)
        return x

#------------------ Loss & Optimizer --------------------
def cross_entropy_loss(logp_hats, ps):
    return torch.mean(torch.sum(- ps * logp_hats, 1))


In [2]:
#-------------- Metrics ---------------------
def cross_entropy(log_pred, targets):
    log_pred = Variable(torch.Tensor(log_pred))
    targets = Variable(torch.Tensor(targets))

    result = sum(torch.sum(-targets * log_pred, 1).data.numpy()) #I should as Professor Singh for help here as torch.sum is returning an array over the 6 instances. Does summing those before returning them make sense? And does it make sense to take the average once all those instances have been collected (as shown in line 86). Right now it doesn't make sense because cross entropy is really high compared to mse
    return result

def accuracy(pred, targets):
    return sum(np.argmax(pred,axis=1) == np.argmax(targets,axis=1))/len(pred)


In [3]:
import numpy as np
data = eval(open('fig_5_labels.json','r').read())
contexts = list(data.keys())
contexts.sort()
utters = [1,2,3,4,5]
states = [1,2,3,4,5]

def build_one_hot_utter(u):
    one_hot_utter = np.zeros(len(utters))
    one_hot_utter[utters.index(u)] = 1
    return one_hot_utter
def build_one_hot_state(s):
    one_hot_state = np.zeros(len(states))
    one_hot_state[states.index(s)] = 1
    return one_hot_state
def build_one_hot_context(c):
    one_hot_context = np.zeros(len(contexts))
    one_hot_context[contexts.index(c)] = 1
    return one_hot_context

In [4]:
x = []
y = []
for context, vals in data.items():
    one_hot_c = build_one_hot_context(context)
    for utter, state in vals.items():
        one_hot_u = build_one_hot_utter(utter)
        x.append(np.concatenate((one_hot_c,one_hot_u),axis=None))
        y.append(state)
x = np.array(x)
y = np.array(y)

In [5]:
print(x.shape)
print(y.shape)

(45, 14)
(45, 5)


In [6]:
def train(model, optimizer, loss_fn, training_data, num_epochs):
    for epoch in range(num_epochs):
        
        x = Variable(torch.Tensor(training_data[0]))
        y = Variable(torch.Tensor(training_data[1]))

        log_y_pred = model(x)

        optimizer.zero_grad()
        loss = loss_fn(log_y_pred, y)
        print(epoch, loss.item(), end='\r')

        loss.backward()
        optimizer.step()                         

In [7]:
def evaluate(model, data):
    x = Variable(torch.Tensor(data[0]))
    y = data[1]
    
    y_hat = model(x).data.numpy()
    
    ce = cross_entropy(y_hat,y)
    acc = accuracy(y_hat,y)
    print("ce: ", ce)
    print("acc: ", acc)

In [8]:
def main():
    loss_fn = cross_entropy_loss
    for learning_rate in [1e-3,1e-4,1e-2]:
        for nonlin in [[nn.Tanh(),nn.ReLU()]]:
            for num_units in [[70,30],[70,80,90]]:
                model = Neural_Net(x.shape[1],y.shape[1],layer_lens=num_units,
                                  nonlins=nonlin).to(device)
                optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=.0001)
                train(model,optimizer, loss_fn, (x,y), num_epochs=NUM_EPOCHS)
    evaluate(model,(x,y))
main()

0 1.64592754840850831 1.64606475830078122 1.66026473045349123 1.65013647079467774 1.62899267673492435 1.6310199499130256 1.6355521678924567 1.62988436222076428 1.62117195129394539 1.6262789964675903



ce:  44.556146785616875
acc:  0.9333333333333333
