In [1]:
import sys
from collections import defaultdict
import itertools
import math

import torch
import torch.nn as nn
from torch.autograd import Variable
import time
from sklearn import metrics
import numpy as np
import json
import torchtext.vocab


SAVE_PATH = 'models/model_'
NUM_EPOCHS = 1000

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# all the one-hot embedding etc. has to be handled here, rather in data generation,
# because we have to know which word indice go to which real words in order to do the embedding.

#--------------- Model --------------
class Neural_Net(nn.Module):
    def __init__(self, input_size, output_size, layer_lens = [30,40,50], nonlins = [nn.Tanh(), nn.ReLU()], drop_freqs= []):
        super(Neural_Net, self).__init__()

        self.input_size = input_size
        self.output_size = output_size

        self.layers = nn.ModuleList()
        self.act_ftns = nonlins
        self.dropouts = []

        curr_layer_len = input_size
        for l in layer_lens:
            self.layers.append(nn.Linear(curr_layer_len, l, bias = True))
            curr_layer_len = l
        self.layers.append( nn.Linear(curr_layer_len,self.output_size))

        for f in drop_freqs:
             self.dropouts.append(nn.Dropout(f))

               

    def forward(self, x):
        import itertools
        components = list(itertools.zip_longest(self.layers,self.act_ftns, self.dropouts))
        for layer, act_ftn, dropout in components:
            #init components and forward through
            if layer != None:
                x = layer(x)
            if act_ftn != None:
                x = act_ftn(x)
            if dropout != None:
                x = dropout(x)
        x = nn.Softmax()(x)
        return x

#------------------ Loss & Optimizer --------------------
def cross_entropy_loss(y_true, y_hat):
    return torch.mean(torch.sum(- y_true * torch.log(y_hat), 1))

In [2]:
#-------------- Metrics ---------------------
def cross_entropy(y_true,y_hat,  eps=1e-15):
    return -(y_true * np.log(y_hat)).sum(axis=1).mean()

def accuracy(pred, targets):
    return sum(np.argmax(pred,axis=1) == np.argmax(targets,axis=1))/len(pred)

In [3]:
import numpy as np
synthetic_labels = eval(open('synthetic_fig_5_labels.json','r').read())
synthetic_priors = eval(open('irony/synthetic_prior_states.json').read())
utters = [1,2,3,4,5]
states = [1,2,3,4,5]

def build_one_hot_utter(u):
    one_hot_utter = np.zeros(len(utters))
    one_hot_utter[utters.index(u)] = 1
    return one_hot_utter

def build_context(c,priors):
    priors_dict = priors[c]
    state_priors = [priors_dict[s] for s in states]
    return state_priors

In [4]:
def inp_out(data, priors):
    x = []
    y = []
    for c, vals in data.items():
        context_prior = build_context(c,priors)
        for utter, state in vals.items():
            one_hot_u = build_one_hot_utter(utter)
            x.append(np.concatenate((context_prior,one_hot_u),axis=None))
            y.append(state)            
    x = np.array(x)
    y = np.array(y)
    return x,y

In [5]:
def train(model, optimizer, loss_fn, training_data, num_epochs):
    prev_loss = -100000000
    
    for epoch in range(num_epochs):
        
        x = Variable(torch.Tensor(training_data[0]))
        y = Variable(torch.Tensor(training_data[1]))

        y_hat = model(x)

        optimizer.zero_grad()
        loss = loss_fn(y,y_hat)
        print(epoch, loss.item(), end='\r')

        loss.backward()
        optimizer.step()       
        
        if epoch > 350:
            dev_y_hat = predict(model, dev_x)
            dev_loss = cross_entropy(dev_y,dev_y_hat)
            if prev_loss < dev_loss:
                break
            else:
                prev_loss = loss


In [6]:
def append_to_sheet(model, train_metrics = ("N/A","N/A","N/A"), dev_metrics= ("N/A","N/A","N/A"), test_metrics= ("N/A","N/A","N/A")):
    import gspread
    from oauth2client.service_account import ServiceAccountCredentials

    scope = ['https://www.googleapis.com/auth/spreadsheets','https://www.googleapis.com/auth/drive']

    credentials = ServiceAccountCredentials.from_json_keyfile_name('creds.json', scope)

    gc = gspread.authorize(credentials)

    wks = gc.open('RSA-NN').sheet1

    row = str(len(wks.get_all_records()) +2)
    
    torch.save(model.state_dict(),SAVE_PATH+row+'.pt')

    wks.append_row([
                train_metrics[0], train_metrics[1], train_metrics[2],
                dev_metrics[0], dev_metrics[1], dev_metrics[2],
                test_metrics[0], test_metrics[1], test_metrics[2]])

In [7]:
def predict(model, data):
    x = Variable(torch.Tensor(data))
    
    y_hat = model(x).data.numpy()
    row_sums = y_hat.sum(axis=1)
    y_hat = y_hat / row_sums[:, np.newaxis]
    return y_hat

def evaluate(y,y_hat):
    ce = cross_entropy(y,y_hat)
    acc = accuracy(y_hat,y)
    mse = metrics.mean_squared_error(y_hat,y)
    print("ce: ", ce)
    print("acc: ", acc)
    print("mse: ", mse)
    #append_to_sheet(model,(acc,mse,ce))

In [8]:
x,y = inp_out(synthetic_labels, synthetic_priors)

split_1 = int(.8 * len(x))
split_2 = int(.9 * len(x))

train_x, train_y = x[:split_1], y[:split_1]
dev_x, dev_y = x[split_1:split_2], y[split_1:split_2]
test_x, test_y = x[split_2:], y[split_2:]

In [9]:
final_model = None
final_model_ce = 1000000

loss_fn = cross_entropy_loss
for learning_rate in [1e-3,1e-4,1e-2]:
    for nonlin in [[nn.Tanh(),nn.ReLU()]]:
        for num_units in [[70,80,90]]:
            model = Neural_Net(train_x.shape[1],train_y.shape[1],layer_lens=num_units,
                              nonlins=nonlin).to(device)
            optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=.0001)
            train(model,optimizer, loss_fn, (train_x,train_y), num_epochs=NUM_EPOCHS)
            y_hat = predict(model,train_x)
            evaluate(train_y,y_hat)
            
            #find optimal model
            ce = cross_entropy(train_y,y_hat)
            if ce < final_model_ce:
                final_model = model
                final_model_ce = ce



ce:  1.0365256577383264
acc:  0.9525
mse:  0.0007171337404917757
24 1.5860933065414429



ce:  1.1123113583747073
acc:  0.8625
mse:  0.006070090217469137
53 1.0600376129150395



ce:  1.0234051260229162
acc:  0.9875
mse:  0.00014475210691958343


In [10]:
torch.save(final_model.state_dict(), "irony_models/model.pth")

In [11]:
y_hat = predict(model,test_x)
evaluate(test_y,y_hat)

ce:  1.0976889162806824
acc:  0.94
mse:  0.0015724501474155001




In [20]:
fig_5_labels = eval(open('fig_5_labels.json','r').read())
priors = eval(open('irony/prior_states.json').read())

x_real,y_real = inp_out(fig_5_labels, priors)
y_hat = predict(model,x_real)
evaluate(y_real,y_hat)

ce:  1.0041271231281907
acc:  0.9555555555555556
mse:  0.00416327250902703


