In [1]:
import numpy as np
import pickle
import sys
import argparse
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_recall_fscore_support
from sklearn.metrics import accuracy_score
np.random.seed(1337)
import torch.backends.cudnn as cudnn
import torch, torch.nn as nn, math, torch.nn.functional as F
from torch.autograd import Variable
import torch.optim as optim
from torchvision import datasets
from torchvision import transforms
from torch.utils.data.sampler import SubsetRandomSampler
import torch.utils.data
import os
import warnings

In [2]:
# Batch_size = 10
# Epochs = 200
# validation_split = 0.2
shuffle = True

def data_load(model, batch_size = 10, valid_size = 0.2):
    # train, validation, test data 
    # question 1, if the train data and test data are from the same data distribution, will this be problem that the model overfit to the dataset
    
    with open('./input/'+model+'.pickle', 'rb') as handle:
                (train_x, train_y, test_x, test_y, maxlen, train_len, test_len) = pickle.load(handle)
    train_len = np.array(train_len)
    test_len = np.array(test_len)
    
    train_x = torch.from_numpy(train_x).double()
    train_y = torch.from_numpy(train_y).double()
    test_x = torch.from_numpy(test_x).double()
    test_y = torch.from_numpy(test_y).double()
    train_len = torch.from_numpy(train_len)
    test_len = torch.from_numpy(test_len)
    
    num_train = len(train_x) 
    indices = list(range(num_train))
    split = int(np.floor(valid_size * num_train))
    random_seed = 2018
    if shuffle:
        np.random.seed(random_seed)
        np.random.shuffle(indices)
        
    train_idx, valid_idx = indices[split:], indices[:split]
    train_sampler = SubsetRandomSampler(train_idx)
    valid_sampler = SubsetRandomSampler(valid_idx)
    
    train = torch.utils.data.TensorDataset(train_x, train_y, train_len)
    test = torch.utils.data.TensorDataset(test_x, test_y, test_len)
    
    train_loader = torch.utils.data.DataLoader(train, batch_size= batch_size, sampler=train_sampler, num_workers=2)
    valid_loader = torch.utils.data.DataLoader(train, batch_size= batch_size, sampler=valid_sampler, num_workers=2)
    test_loader = torch.utils.data.DataLoader(test, batch_size= batch_size, shuffle= True, num_workers=2)
    return train_loader, valid_loader, test_loader

In [3]:
class Unimodel(nn.Module):
    def __init__(self, input_size, hidden_size = 300, out_size = 100):
        super(Unimodel, self).__init__()
        self.hidden_size = hidden_size
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers=1,batch_first=True, bidirectional=True)
        self.dropout1 = nn.Dropout(p = 0.6)
        self.fc1 = nn.Linear(600,out_size) 
        self.dropout2 = nn.Dropout(p = 0.9)
        self.tanh = nn.Hardtanh(-1,1)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(out_size, 2) 
       
    def forward(self, inputs, sequence_len): # input is 10 * 63 * 100
        # training details:
        pack = torch.nn.utils.rnn.pack_padded_sequence(inputs, sequence_len, batch_first=True)
        hidden = (
        Variable(torch.zeros(2, inputs.size(0), self.hidden_size),requires_grad=True),
        Variable(torch.zeros(2, inputs.size(0), self.hidden_size),requires_grad=True))
        #print(pack)
        out, hidden = self.lstm(pack, hidden)
        unpacked, unpacked_len = torch.nn.utils.rnn.pad_packed_sequence(out, batch_first=True)
        #print(unpacked.size())
        output = self.dropout2(self.tanh(unpacked)) # apply drop out
        inter1 = self.dropout2(self.relu(self.fc1(output))) # 100
        output = self.fc2(inter1) # 2
        #print(output)
        return output, inter1

In [None]:
def sorted_sequence(input_x,sequences,y):
    # return the sorted sequence based on input_x
    diction = {}
    for i in range(int(sequences.size(0))):
        diction[i] = sequences[i]
    new_sequence = []
    new_x = torch.zeros(input_x.size(0),input_x.size(1),input_x.size(2))
    new_y = torch.zeros(y.size(0),y.size(1))
    count = 0
    for i, value in sorted(diction.items(), key=lambda x:x[1], reverse = True):
        new_sequence.append(int(value))
        new_x[count] = input_x[i]
        new_y[count] = y[i]
        count +=1
    return new_x,new_sequence,new_y

def cast_y(max_len, y):
    new_y = torch.zeros(y.size(0), max_len)
    for i in range(y.size(0)):
        new_y[i] = y[i][:max_len]
    return new_y

def save_model(net, optim, epoch, ckpt_fname):  
        state_dict = net.state_dict()                                                                                                                                                                         
        for key in state_dict.keys():                                                                                                                                                                                
            state_dict[key] = state_dict[key].cpu()                                                                                                                                                                                                                                                                                                                                                                               
        torch.save({                                                                                                                                                                                                 
            'epoch': epoch,                                                                                                                                                                                     
            'state_dict': state_dict,                                                                                                                                                                                
            'optimizer': optim},                                                                                                                                                                                     
            ckpt_fname)
        
def one_hot(train_y):
    maxlabel = 1
    new_y = torch.zeros((train_y.size(0),train_y.size(1), maxlabel+1))
    for i in range(int(train_y.size(0))):
        for j in range(int(train_y.size(1))):
            new_y[i,j,int(train_y[i,j])] = 1 
    return new_y
def generate_weight(sequence_length, y):
    mask = torch.zeros((y.size(0),y.size(1)))
    for i in range(y.size(0)):
        for j in range(sequence_length[i]):
            mask[i,j] = 1.0 
    return mask
            

In [59]:
from collections import defaultdict
epochs = 30
models = ['audio', 'video','text']
input_size = {'text': 100, 'audio': 73, 'video':100}
#use_gpu = torch.cuda.is_available()
use_gpu = False
print(use_gpu)
unimodal_activations = {}

train_loss = defaultdict(list)
train_acc = defaultdict(list)
test_loss = defaultdict(list)
test_acc = defaultdict(list)
valid_acc = defaultdict(list)
valid_loss = defaultdict(list)

unimodal_activations = {}
criterion = nn.CrossEntropyLoss(size_average = False)

def train():
    for mode in models:
        unimodal_activations = {}
        model = Unimodel(input_size[mode])
        #train_loader, valid_loader, test_loader = data_load(mode, 10, 0.2)
        if(use_gpu):
            model.cuda()
        optimizer = optim.Adagrad(params = model.parameters(), lr = 0.01)
        running_loss = 0
        total = 0
        correct = 0
        print('begin training for unimodel ' + mode)
        for epoch in range(epochs):
            train_loader, valid_loader, test_loader = data_load(mode, 10, 0.2)
            model.train()
            for e, data in enumerate(train_loader):
                optimizer.zero_grad()
                input_x, y, sequence_len = data[0], data[1], data[2]
                input_x, sequence_len, y = sorted_sequence(input_x,sequence_len, y)
                if use_gpu:
                    input_x = Variable(input_x.cuda(), requires_grad=True)
                else:
                    input_x = Variable(input_x, requires_grad=True)
                y = cast_y(sequence_len[0], y).long()
                if use_gpu:
                    y = Variable(y.cuda())
                predict_y, inter1 = model(input_x, sequence_len)

                loss = criterion(predict_y.view(-1,2), y.view(-1))/sum(sequence_len)
                _, predicted = torch.max(predict_y.view(-1,2).data, 1)
                train_mask = generate_weight(sequence_len, y)
                predicted = train_mask.view(-1).long() * predicted
                total += sum(sequence_len)
                correct += (predicted.data == y.view(-1).data).sum().int().data[0] - y.view(-1).size(0) + sum(sequence_len)
                running_loss += loss.data[0]
                
                loss.backward()
                optimizer.step()
            train_loss[mode].append( 1. * running_loss)
            train_acc[mode].append(1.* (correct.item()) /total)
            running_loss = 0
            correct = 0
            total = 0
            model.eval()
            for e, data in enumerate(valid_loader):
                    input_x, y, sequence_len = data[0], data[1], data[2]
                    input_x, sequence_len, y = sorted_sequence(input_x,sequence_len, y)
                    if use_gpu:
                        input_x = Variable(input_x.cuda(), requires_grad=False)
                    else:
                        input_x = Variable(input_x, requires_grad=False)
                    y = cast_y(sequence_len[0], y).long()
                    if use_gpu:
                        y = Variable(y.cuda())
                    predict_y, inter2 = model(input_x, sequence_len) 
                    loss = criterion(predict_y.view(-1,2), y.view(-1))/sum(sequence_len)
                    running_loss += loss.data[0]
                    _, predicted = torch.max(predict_y.view(-1,2).data, 1)
    
                    total += sum(sequence_len)
                    train_mask = generate_weight(sequence_len, y)
                    predicted = train_mask.view(-1).long() * predicted
                    correct += (predicted.data == y.view(-1).data).sum().int().data[0] - y.view(-1).size(0) + sum(sequence_len)
                    
            if(epoch %1 == 0):
                    print("epoch %d train acc %g valid acc % f" %(epoch, train_acc[mode][-1], 1.* correct.item()/total))
                    valid_loss[mode].append(1.*running_loss)
                    valid_acc[mode].append(1.* correct.item() /total)
                    running_loss = 0
                    correct = 0
                    total = 0
            for e, data in enumerate(test_loader):
                    input_x, y, sequence_len = data[0], data[1], data[2]
                    input_x, sequence_len, y = sorted_sequence(input_x,sequence_len, y)
                    if use_gpu:
                        input_x = Variable(input_x.cuda(), requires_grad=False)
                    else:
                        input_x = Variable(input_x, requires_grad=False)
                    y = cast_y(sequence_len[0], y).long()
                    if use_gpu:
                        y = Variable(y.cuda())
                    predict_y, inter2 = model(input_x, sequence_len) 
                    loss = criterion(predict_y.view(-1,2), y.view(-1))/sum(sequence_len)
                    _, predicted = torch.max(predict_y.view(-1,2).data, 1)
                    train_mask = generate_weight(sequence_len, y)
                    predicted = train_mask.view(-1).long() * predicted
                    total += sum(sequence_len)
                    correct += (predicted.data == y.view(-1).data).sum().int().data[0] - y.view(-1).size(0) + sum(sequence_len)
                    running_loss += loss.data[0]
                
            if epoch % 5 == 0:
                    print("test loss %f acc %g" %(running_loss, 1.* correct.item()/total))
            test_loss[mode].append(1.*running_loss)
            test_acc[mode].append(1.* correct.item()/total)
            running_loss = 0
            correct = 0
            total = 0
            if(epoch > 10 and valid_acc[mode][-1] <= valid_acc[mode][-2] and valid_acc[mode][-2] <= valid_acc[mode][-3]):
                print('stop early at %d' % epoch)
                break
        filename = 'result/train_' + mode+ '_epoch_%d.dat' % (epoch +1)
        with open(filename,'w'):
            save_model(model, optimizer, epoch, filename)  
            
        train_loader, valid_loader, test_loader = data_load(mode, input_size[mode], 0)
        unimodal_activations = {}
        for i,data in enumerate(train_loader):
            input_x, y, sequence_len = data[0], data[1], data[2]
            input_x, sequence_len, y = sorted_sequence(input_x,sequence_len, y)
            input_x = Variable(input_x)
            y = cast_y(sequence_len[0], y).long()
            predict_y, inter1 = model(input_x, sequence_len)
            unimodal_activations['train'] = inter1
            unimodal_activations['train_len'] = sequence_len
            unimodal_activations['train_y'] = y
        for i,data in enumerate(test_loader):
            input_x, y, sequence_len = data[0], data[1], data[2]
            input_x, sequence_len, y = sorted_sequence(input_x,sequence_len, y)
            input_x = Variable(input_x)
            y = cast_y(sequence_len[0], y).long()
            predict_y, inter1 = model(input_x, sequence_len)
            unimodal_activations['test'] = inter1
            unimodal_activations['test_len'] = sequence_len
            unimodal_activations['test_y'] = y
        
        with open('result/' + mode + '_result_epoch_%d.pickle' %(epoch +1), 'wb') as handle:
            result = defaultdict()
            result['train_loss'] = train_loss
            result['train_acc'] = train_acc
            result['test_loss'] = test_loss
            result['test_acc'] = test_acc
            result['valid_acc'] = valid_acc
            result['valid_loss'] = valid_loss
            pickle.dump(result, handle, protocol=pickle.HIGHEST_PROTOCOL)
        with open('result/' + mode + '_unimodel_epoch_%d'.pickle %(epoch +1), 'wb') as handle:
            pickle.dump(unimodal_activations, handle, protocol=pickle.HIGHEST_PROTOCOL)

False


In [60]:
warnings.simplefilter("ignore", UserWarning)
train() # text 0.783245 audio 0.378989 

begin training for unimodel audio
epoch 0 train acc 0.486603 valid acc  0.427586
test loss 4.370063 acc 0.378989


AttributeError: 'str' object has no attribute 'pickle'

In [41]:
train_loader, valid_loader, test_loader = data_load('txt', 100, 0)
# model = Unimodel(input_size['video'])
for i,data in enumerate(train_loader):
    input_x, y, sequence_len = data[0], data[1], data[2]
    input_x, sequence_len, y = sorted_sequence(input_x,sequence_len, y)
    input_x = Variable(input_x)
    y = cast_y(sequence_len[0], y).long()
    predict_y, inter1 = model(input_x, sequence_len)
    print(i)
    print(inter1.size())

0
torch.Size([62, 55, 100])


In [31]:
print(unimodal_activations['text_train'].size())
def multiModel(nn.Module()):
     def __init__(self, input_size, hidden_size = 300, out_size = 500):
        super(Unimodel, self).__init__()
        self.hidden_size = hidden_size
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers=1,batch_first=True, bidirectional=True)
        self.dropout1 = nn.Dropout(p = 0.6)
        self.fc1 = nn.Linear(600,out_size) 
        self.dropout2 = nn.Dropout(p = 0.9)
        self.tanh = nn.Hardtanh(-1,1)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(out_size, 2) 
       
    def forward(self, inputs, sequence_len): # input is 10 * 63 * 100
        # training details:
        pack = torch.nn.utils.rnn.pack_padded_sequence(inputs, sequence_len, batch_first=True)
        hidden = (
        Variable(torch.zeros(2, inputs.size(0), self.hidden_size),requires_grad=True),
        Variable(torch.zeros(2, inputs.size(0), self.hidden_size),requires_grad=True))
        #print(pack)
        out, hidden = self.lstm(pack, hidden)
        unpacked, unpacked_len = torch.nn.utils.rnn.pad_packed_sequence(out, batch_first=True)
        #print(unpacked.size())
        output = self.dropout2(self.tanh(unpacked)) # apply drop out
        inter1 = self.dropout2(self.relu(self.fc1(output))) # 100
        output = self.fc2(inter1) # 2
        #print(output)
        return output

torch.Size([10, 30, 100])
