In [1]:
import sys
from collections import defaultdict
import itertools
import math

import torch
import torch.nn as nn
from torch.autograd import Variable
import time
from sklearn import metrics
import numpy as np
import json
import torchtext.vocab


SAVE_PATH = 'models/model_'
NUM_EPOCHS = 1000

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# all the one-hot embedding etc. has to be handled here, rather in data generation,
# because we have to know which word indice go to which real words in order to do the embedding.

#--------------- Model --------------
class Neural_Net(nn.Module):
    def __init__(self, input_size, output_size, layer_lens = [30,40,50], nonlins = [nn.Tanh(), nn.ReLU()], drop_freqs= [.5,.6]):
        super(Neural_Net, self).__init__()

        self.input_size = input_size
        self.output_size = output_size

        self.layers = nn.ModuleList()
        self.act_ftns = nonlins
        self.dropouts = []

        curr_layer_len = input_size
        for l in layer_lens:
            self.layers.append(nn.Linear(curr_layer_len, l, bias = True))
            curr_layer_len = l
        self.layers.append( nn.Linear(curr_layer_len,self.output_size))

        for f in drop_freqs:
             self.dropouts.append(nn.Dropout(f))

               

    def forward(self, x):
        import itertools
        components = list(itertools.zip_longest(self.layers,self.act_ftns, self.dropouts))
        for layer, act_ftn, dropout in components:
            #init components and forward through
            if layer != None:
                x = layer(x)
            if act_ftn != None:
                x = act_ftn(x)
            if dropout != None:
                x = dropout(x)
        x = nn.LogSoftmax()(x)
        return x

#------------------ Loss & Optimizer --------------------
def cross_entropy_loss(pred, soft_targets):
    logsoftmax = nn.LogSoftmax()
    return torch.mean(torch.sum(- soft_targets * logsoftmax(pred), 1))

In [2]:
#-------------- Metrics ---------------------
def cross_entropy(y_true,y_hat,  eps=1e-15):
    return -(y_true * np.log(y_hat)).sum(axis=1).mean()


def accuracy(pred, targets):
    return sum(np.argmax(pred,axis=1) == np.argmax(targets,axis=1))/len(pred)


In [3]:
import numpy as np
data = eval(open('fig_5_labels.json','r').read())
contexts = eval(open('irony/prior_states.json').read())
utters = [1,2,3,4,5]
states = [1,2,3,4,5]

def build_one_hot_utter(u):
    one_hot_utter = np.zeros(len(utters))
    one_hot_utter[utters.index(u)] = 1
    return one_hot_utter
def build_one_hot_state(s):
    one_hot_state = np.zeros(len(states))
    one_hot_state[states.index(s)] = 1
    return one_hot_state
def build_context(c):
    priors_dict = contexts[c]
    state_priors = [priors_dict[s] for s in states]
    return state_priors

In [4]:
x = []
y = []
for c, vals in data.items():
    context_prior = build_context(c)
    for utter, state in vals.items():
        one_hot_u = build_one_hot_utter(utter)
        x.append(np.concatenate((context_prior,one_hot_u),axis=None))
        y.append(state)
x = np.array(x)
y = np.array(y)

In [5]:
def train(model, optimizer, loss_fn, training_data, num_epochs):
    for epoch in range(num_epochs):
        
        x = Variable(torch.Tensor(training_data[0]))
        y = Variable(torch.Tensor(training_data[1]))

        log_y_pred = model(x)

        optimizer.zero_grad()
        loss = loss_fn(log_y_pred, y)
        print(epoch, loss.item(), end='\r')

        loss.backward()
        optimizer.step()                         

In [6]:
def append_to_sheet(model, train_metrics = ("N/A","N/A","N/A"), dev_metrics= ("N/A","N/A","N/A"), test_metrics= ("N/A","N/A","N/A")):
    import gspread
    from oauth2client.service_account import ServiceAccountCredentials

    scope = ['https://www.googleapis.com/auth/spreadsheets','https://www.googleapis.com/auth/drive']

    credentials = ServiceAccountCredentials.from_json_keyfile_name('creds.json', scope)

    gc = gspread.authorize(credentials)

    wks = gc.open('RSA-NN').sheet1

    row = str(len(wks.get_all_records()) +2)
    
    torch.save(model.state_dict(),SAVE_PATH+row+'.pt')

    wks.append_row([
                train_metrics[0], train_metrics[1], train_metrics[2],
                dev_metrics[0], dev_metrics[1], dev_metrics[2],
                test_metrics[0], test_metrics[1], test_metrics[2]])

In [7]:
def evaluate(model, data):
    x = Variable(torch.Tensor(data[0]))
    y = data[1]
    
    y_hat = model(x).data.numpy()
    
    ce = cross_entropy(y_hat,y)
    acc = accuracy(y_hat,y)
    mse = metrics.mean_squared_error(y_hat,y)
    print("ce: ", ce)
    print("acc: ", acc)
    print("mse: ", mse)
    append_to_sheet(model,(acc,mse,ce))

In [8]:
def main():
    loss_fn = cross_entropy_loss
    for learning_rate in [1e-3,1e-4,1e-2]:
        for nonlin in [[nn.Tanh(),nn.ReLU()]]:
            for num_units in [[70,80,90]]:
                model = Neural_Net(x.shape[1],y.shape[1],layer_lens=num_units,
                                  nonlins=nonlin).to(device)
                optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=.0001)
                train(model,optimizer, loss_fn, (x,y), num_epochs=NUM_EPOCHS)
                evaluate(model,(x,y))
main()



ce:  -49.379173443465604
acc:  0.8444444444444444
mse:  11.354224718051302




ce:  -42.88926511532396
acc:  0.7777777777777778
mse:  8.97692709677953




ce:  -48.38013969659788
acc:  0.8666666666666667
mse:  10.684594520143417


In [9]:
def main():
    loss_fn = cross_entropy_loss
    for learning_rate in [1e-2]:
        for nonlin in [[nn.ReLU()]]:
            for num_units in [[70,50,90]]:
                model = Neural_Net(x.shape[1],y.shape[1],layer_lens=num_units,
                                  nonlins=nonlin).to(device)
                optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=.0001)
                train(model,optimizer, loss_fn, (x,y), num_epochs=NUM_EPOCHS)
                evaluate(model,(x,y))
main()



ce:  -43.95086988374255
acc:  0.8
mse:  9.053936590332864


In [10]:
#cross entropy lower bound
z = Variable(torch.Tensor(y))
cross_entropy_loss(z,z)



tensor(1.3384)