In [2]:
import pandas as pd
import math
import time
import torch

from dataloading import *
from model import *

def train(model, inputs, targets, optimizer, criterion, computing_device):
    model = model.to(computing_device)
    
    model.train()
    
    epoch_loss = 0
    
    #shuffle
    indices=list(range(len(targets)))
    np.random.shuffle(indices)
    
    for i in indices:
        
        src = inputs[i].to(computing_device)
        trg = targets[i].to(computing_device)
        
        optimizer.zero_grad()
        
        outputs = model(src, trg, teacher_forcing_ratio=1.0)
        
        labels = torch.argmax(trg, dim=2) # grab indices for loss function
        
        #targets = [trg sent len, batch size]
        #outputs = [trg sent len, batch size, output dim]
        
        #print('expected')
        #print(src.size())
        #print(labels)
        #print(torch.argmax(outputs, dim=2))
        
        #outputs = outputs[1:].view(-1, outputs.shape[-1]) 
        #labels = labels[1:].view(-1)
        
        outputs = outputs.view(-1, outputs.shape[-1]) 
        labels = labels.view(-1)
        
        outputs = outputs.to(computing_device)
        
        #targets = [(trg sent len - 1) * batch size]- trg should be list of indicies
        #outputs = [(trg sent len - 1) * batch size, output dim]
        
        loss = criterion(outputs, labels)
        
        loss.backward()
        
        #torch.nn.utils.clip_grad_norm_(model.parameters(), clip)
        
        optimizer.step()
        
        epoch_loss += loss.item()
        
    return epoch_loss / len(targets)

def interpret_output(outputs):
    outputs = F.softmax(outputs, dim=2)
    outputs = torch.argmax(outputs, dim=2)
    nums = []
    seen_period=False
    for j in range(outputs.size()[1]):
        num=''
        for i in range(1, outputs.size()[0]):
            if outputs[i][j]>=13:
                break
            dig = indexToDigit(outputs[i][j])
            if dig=='.':
                if seen_period:
                    continue
                seen_period=True
            num+=dig
            
        #print(num)
        nums.append(float(num))
    return nums


def evaluate(model, inputs, targets, optimizer, criterion, computing_device):
    model=model.to(computing_device)
    model.eval()
    
    total_mse=0.0
    total_loss=0.0
    
    for i in range(len(targets)):
            src = inputs[i].to(computing_device)
            trg = targets[i].to(computing_device)
            
            #print(len(src))
            #print(len(trg))

            optimizer.zero_grad()

            outputs = model(src, trg, teacher_forcing_ratio=0.0)
            outputs = outputs.to(computing_device)

            #print('expected')
            #print(trg.size())
            #print(torch.argmax(trg, dim=2))
            #print(torch.argmax(outputs, dim=2))
            
            #loss = criterion(outputs, labels)
            #total_loss+=loss
            
            num_labels = interpret_output(trg)

            #print(num_labels)

            num_predictions = interpret_output(outputs)

            #print(num_predictions)

            # shape = [seq_len, batch_size]
            mse = mean_squared_error(num_labels,num_predictions)
            total_mse+=mse
            #print('MSE',mse)
            
    total_mse/=len(targets)
    #total_loss/=len(targets)
    #return total_mse,total_loss
    return total_mse

In [3]:
computing_device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

n_digits = 10
#OUTPUT_DIM = n_digits + 2
n_chars=256
INPUT_DIM = n_chars+4
OUTPUT_DIM = n_digits + 5
#ENC_EMB_DIM = n_chars+1
#DEC_EMB_DIM = OUTPUT_DIM

HID_DIM = 512
N_LAYERS = 2
ENC_DROPOUT = 0.0
DEC_DROPOUT = 0.0
device=None

criterion = nn.CrossEntropyLoss(ignore_index=output_pad_index)

print('done')

done


In [4]:
import os

data_dir = 'data/numerical_data_set_simple_torch'

filenames = []
filenames_by_type = {'A':[], 'B':[], 'C':[], 'D':[], 'E':[]}
index_to_type = {0:'A', 1:'B', 2:'C', 3:'D', 4:'E'}

for file in os.listdir(data_dir):
    filename, file_extension = os.path.splitext(file)
    
    typ = filename[-1]
    if typ in filenames_by_type:
        filenames.append(file)
        filenames_by_type[typ].append(file)
        
print(len(filenames))
print(filenames_by_type)
for key in filenames_by_type:
    print(len(filenames_by_type[key]))

enc = Encoder(INPUT_DIM, HID_DIM, N_LAYERS, ENC_DROPOUT)
dec = Decoder(OUTPUT_DIM, HID_DIM, N_LAYERS, DEC_DROPOUT)
model = Seq2Seq(enc, dec)#.to(device)
optimizer = optim.Adam(model.parameters())


for typ in filenames_by_type:
    filenames_by_type[typ] = filenames_by_type[typ][:2]
    
print(filenames_by_type)

27
{'A': ['labelled_gen_data1_A', 'labelled_gen_data3_A', 'labelled_gen_data2_A', 'labelled_extr_data1_A', 'labelled_extr_data3_A'], 'B': ['labelled_extr_data19_B', 'labelled_extr_data18_B', 'labelled_gen_data5_B', 'labelled_gen_data4_B', 'labelled_gen_data6_B'], 'C': ['labelled_gen_data10_C', 'labelled_gen_data11_C', 'labelled_gen_data7_C', 'labelled_gen_data8_C', 'labelled_gen_data9_C'], 'D': ['labelled_extr_data2_D', 'labelled_gen_data12_D', 'labelled_gen_data13_D', 'labelled_extr_data4_D'], 'E': ['labelled_extr_data20_E', 'labelled_gen_data16_E', 'labelled_dir_data91_E', 'labelled_gen_data15_E', 'labelled_dir_data92_E', 'labelled_gen_data14_E', 'labelled_dir_data39_E', 'labelled_dir_data49_E']}
5
5
5
4
8
{'A': ['labelled_gen_data1_A', 'labelled_gen_data3_A'], 'B': ['labelled_extr_data19_B', 'labelled_extr_data18_B'], 'C': ['labelled_gen_data10_C', 'labelled_gen_data11_C'], 'D': ['labelled_extr_data2_D', 'labelled_gen_data12_D'], 'E': ['labelled_extr_data20_E', 'labelled_gen_data16_

In [6]:
K=5
num_epochs = 6
BATCH_SIZE=800

min_val_loss=100.0
opt_num_layers=None
opt_hid_dim=None

for hid_dim in [512,2048,1024,256]:
    for n_layers in [4,8,2,6,1]:
        avg_val_loss=0.0
        for k in range(K):
            print('k={}, num_layers={}, hid_dim={}'.format(k,n_layers,hid_dim))
            
            # init model
            enc = Encoder(INPUT_DIM, hid_dim, n_layers, ENC_DROPOUT)
            dec = Decoder(OUTPUT_DIM, hid_dim, n_layers, DEC_DROPOUT)
            model = Seq2Seq(enc, dec)
            optimizer = optim.Adam(model.parameters())
            
            # get train data
            start=time.time()
            print('...loading data')
            if index_to_type[k] != 'A':
                init='A'
            else:
                init='B'
            filename=filenames_by_type[init][0]
            q = torch.load(os.path.join(data_dir,filename))
            inputs,targets = q[0],q[1]
                
            for typ in filenames_by_type:
                if typ==index_to_type[k]:
                    continue
                if typ==init:
                    for filename in filenames_by_type[typ][1:]:
                        q = torch.load(os.path.join(data_dir,filename))
                        src,trg = q[0],q[1]
                        inputs=torch.cat([inputs,src],dim=1)
                        targets=torch.cat([targets,trg],dim=1)
                else:
                    for filename in filenames_by_type[typ]:
                        q = torch.load(os.path.join(data_dir,filename))
                        src,trg = q[0],q[1]
                        inputs=torch.cat([inputs,src],dim=1)
                        targets=torch.cat([targets,trg],dim=1)
                        
            n_chunks = math.ceil(inputs.size()[1]/BATCH_SIZE)
            inputs = torch.chunk(inputs, n_chunks, dim=1) 
            targets = torch.chunk(targets, n_chunks, dim=1) 
            
            # get val data
            val = index_to_type[k]
            filename=filenames_by_type[val][0]
            q = torch.load(os.path.join(data_dir,filename))
            val_inputs,val_targets = q[0],q[1]
            
            for filename in filenames_by_type[val][1:]:
                q = torch.load(os.path.join(data_dir,filename))
                src,trg = q[0],q[1]
                val_inputs=torch.cat([val_inputs,src],dim=1)
                val_targets=torch.cat([val_targets,trg],dim=1)
                
            n_chunks = math.ceil(val_inputs.size()[1]/BATCH_SIZE)
            val_inputs = torch.chunk(val_inputs, n_chunks, dim=1) 
            val_targets = torch.chunk(val_targets, n_chunks, dim=1) 
            
            print('   load time: {}'.format(time.time()-start))
            
            # train 
            print('...training')
            for epoch in range(num_epochs):
                start=time.time()
                loss = train(model, inputs, targets, optimizer, criterion, computing_device)
                print('   epoch {}: train_loss:{}, time:{}'.format(epoch,loss,time.time()-start))
            
            #validate
            print('...validating')
            start=time.time()
            _,val_loss = evaluate(model, val_inputs, val_targets, optimizer, criterion, computing_device)
            print('   epoch {}: val_loss:{}, time:{}'.format(epoch,loss,time.time()-start))
            
            avg_val_loss+=val_loss
        avg_val_loss/=K
        
        print('epoch {}: AVG VAL LOSS:{}, time:{}'.format(epoch,loss,time.time()-start))
        
        # save model
        PATH = "./output/model_{}_{}.pt".format(num_layers,hid_dim)
        torch.save(model.state_dict(), PATH)
        
        # update optimal hyperparams
        if avg_val_loss < min_val_loss:
            min_val_loss=avg_val_loss
            opt_num_layers=num_layers
            opt_hid_dim=hid_dim

k=0, num_layers=4, hid_dim=512
...loading data
   load time: 54.85864496231079
...training
   epoch 0: train_loss:1.857205251252876, time:389.5321190357208
   epoch 1: train_loss:1.6910399448196842, time:384.7863883972168
   epoch 2: train_loss:1.5757047309065766, time:383.1034688949585
   epoch 3: train_loss:1.5738551254542368, time:390.4987704753876
   epoch 4: train_loss:1.538471963889194, time:383.0964455604553
   epoch 5: train_loss:1.5215876445455372, time:387.5026705265045
...validating


RuntimeError: Expected object of type torch.cuda.FloatTensor but found type torch.FloatTensor for argument #4 'mat1'