<a href="https://colab.research.google.com/github/LittlePea13/SMNLSHyper/blob/master/Basic_Multitask_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
from google.colab import drive
drive.mount('/content/gdrive', force_remount=True)
# Just make sure that you upload the correct files to the ntoebook. They are: datasets.py, helper.py,hyp_labels.npy,main.py,main_hyper.py, model.py
# And have the hyp_embeds.npy on your drive. That should do it

Mounted at /content/gdrive


In [0]:
from model1 import BiLSTMEncoder,MainModel, ModelHyper, SelfAttention, Metaphor, multitask_model
from helper1 import evaluate, evaluate_train, get_metaphor_dataset, write_board, get_document_dataset, evaluate_train_hyper, evaluate_hyper
import torch.nn as nn
import torch.optim as optim
import torch
import time
from datasets import DocumentDataset
import torch.utils.data as data_utils
import numpy as np
from torch.nn.utils.rnn import pad_sequence

In [0]:
train_labels = []
train_embeddings = []
train_labels = np.load('train_labels.npy',allow_pickle=True)
train_embeddings = np.load('gdrive/My Drive/train_embeds.npy',allow_pickle=True)
train_labels=np.array([np.array(xi).astype(np.int) for xi in train_labels])
train_embeddings=np.array([np.array(xi) for xi in train_embeddings])

valid_labels = []
valid_embeddings = []
valid_labels = np.load('valid_labels.npy', allow_pickle=True)
valid_embeddings = np.load('gdrive/My Drive/valid_embeds.npy', allow_pickle=True)
valid_labels=np.array([np.array(xi).astype(np.int) for xi in valid_labels])
valid_embeddings=np.array([np.array(xi) for xi in valid_embeddings])


In [0]:
train_dataset = DocumentDataset(train_embeddings, train_labels, 200)
valid_dataset = DocumentDataset(valid_embeddings, valid_labels, 200)

In [0]:
!pip install tensorboardX



In [0]:
!pip install -q tf-nightly-2.0-preview
%load_ext tensorboard

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


In [0]:
%tensorboard --logdir runs

Reusing TensorBoard on port 6006 (pid 347), started 3:04:26 ago. (Use '!kill 347' to kill it.)

In [0]:
class MetaphorModel(nn.Module):
    
    def __init__(self, hidden_dim, dropout_FC, num_classes):
        
        super(MetaphorModel, self).__init__()

        self.hidden_dim = hidden_dim
        self.dropout_FC = dropout_FC
        self.self_attention = SelfAttention(2*hidden_dim)
        self.metafor_classifier = Metaphor(dropout_FC, num_classes, hidden_dim)
          
    def forward(self, out_embedding, inputs, lengths):

        normalized_output = self.metafor_classifier(out_embedding)

        return normalized_output

In [0]:
class HyperModel(nn.Module):
    
    def __init__(self, hidden_dim, layers, dropout_FC, dropout_lstm_hyper,dropout_input_hyper,dropout_attention,num_classes):
       
        super(HyperModel, self).__init__()
        
        self.hidden_dim = hidden_dim
        self.layers = layers
        self.dropout_FC = dropout_FC
        self.self_attention = SelfAttention(2*hidden_dim, dropout_attention)
        self.self_attention_sentence = SelfAttention(2*hidden_dim, dropout_attention)
        self.doc_embbedding = BiLSTMEncoder(2*hidden_dim,hidden_dim,layers,dropout_lstm_hyper,dropout_input_hyper)
        self.metafor_classifier = Metaphor(dropout_FC, num_classes, hidden_dim)
        self.doc_classifier = Metaphor(dropout_FC, num_classes, hidden_dim)
        if torch.cuda.is_available():
            self.metafor_classifier.to(device=torch.device('cuda'))
    
    def forward(self, predicted, squezeed_lengths, inputs, lengths, doc_lengths):
        
        start = time.time()
        end = time.time()
        print(end - start, ' First layer')

        averaged_docs, attention, weighted = self.self_attention_sentence(predicted, squezeed_lengths.int())
        predicted_docs = torch.split(averaged_docs, split_size_or_sections=list(doc_lengths))
        predicted_docs = pad_sequence(predicted_docs, batch_first=True, padding_value=0)
        end = time.time()
#         print(end - start, ' Average sentences and pad doc')
        out_embedding = self.doc_embbedding.forward(predicted_docs, doc_lengths)
        end = time.time()
#         print(end - start, ' Second Layer')
        prediction, attention, weighted = self.self_attention(out_embedding, doc_lengths)
        end = time.time()
#         print(end - start, ' Attention Layer')
        class_prediction = self.doc_classifier(prediction)
        end = time.time()
#         print(end - start, ' Last Layer')
        return class_prediction 


In [0]:
class multitask_model1(nn.Module):
  
  def __init__(self, encoder_param, hyper_param, meta_param):
    
    super(multitask_model, self).__init__()
    
    self.embedding = BiLSTMEncoder(embed_dim = encoder_param['embed_dim'],
                                    hidden_dim = encoder_param['hidden_dim'],
                                    layers = encoder_param['layers'],
                                    dropout_lstm = encoder_param['dropout_lstm'],
                                    dropout_input = encoder_param['dropout_input'])
    
#     self.embedding.to(device = 'cuda')
    self.metaphor_model = MetaphorModel(hidden_dim = meta_param['hidden_dim'], 
                                    dropout_FC = meta_param['dropout_FC'],#0.1,
                                    num_classes = 2)
    
    self.hyper_model = HyperModel(hidden_dim = hyper_param['hidden_dim'],
                                  layers = hyper_param['layers'],
                      dropout_FC=hyper_param['dropout_FC'],
                      dropout_lstm_hyper = hyper_param['dropout_lstm_hyper'],
                      dropout_input_hyper = hyper_param['dropout_lstm_hyper'],
                      dropout_attention = hyper_param['dropout_lstm_hyper'],
                      num_classes = 2)

    
  def forward(self, is_doc, input_data, length_data, length_doc):
    


    if torch.cuda.is_available():
        input_data = input_data.to(device=torch.device('cuda'))
        length_data = length_data.to(device=torch.device('cuda'))


    out_embedding = self.embedding(input_data, length_data)
    if torch.cuda.is_available():
        out_embedding = out_embedding.to(device = torch.device('cuda'))

    meta_pred = self.metaphor_model(out_embedding, input_data, length_data)
#     meta_pred = None
    if is_doc:
        if torch.cuda.is_available():
            length_doc = length_doc.to(device=torch.device('cuda'))
        hyp_pred = self.hyper_model(out_embedding, length_data, input_data, length_data, length_doc)
    else:
        hyp_pred = None
    
    return meta_pred, hyp_pred

In [0]:
import torch.utils.data as data_utils
from datasets import SentenceDataset,DocumentDataset, AdaptSampler


train_hp_loader = data_utils.DataLoader(train_dataset, batch_sampler=AdaptSampler(train_dataset.doc_lens,batch_size=25,max_size=200000),
                              collate_fn=DocumentDataset.collate_fn)

val_hp_loader = data_utils.DataLoader(valid_dataset, batch_sampler=AdaptSampler(valid_dataset.doc_lens,batch_size=25,max_size=200000),
                              collate_fn=DocumentDataset.collate_fn)

#creating batch iterator for metaphor data

train_meta_loader = get_metaphor_dataset('gdrive/My Drive/vua_train_embeds.npy','vua_train_labels.npy',batch_size = 64)
val_meta_loader = get_metaphor_dataset('gdrive/My Drive/vua_val_embeds.npy','vua_val_labels.npy',batch_size = 64)


Number of sentences 6323
number of words 116622
Number of sentences 1550
number of words 38628


In [0]:
import random
l1 = len(train_hp_loader)
l2 = len(train_meta_loader)
print(l1)
print(l2)
#0 for hyper and 1 for meta
coin_flips = []
for i in range(l1):
  coin_flips.append(0)

for i in range(l2):
  coin_flips.append(1)
print(len(coin_flips))

# print(coin_flips)
# print(next(iter(train_hp_loader)))
# print(len(train_hp_loader))

22
99
121


In [0]:
continue_train =  False
# train_hp_loader = list(train_hp_loader)

In [0]:
import sys
from tensorboardX import SummaryWriter
writer = SummaryWriter()

encoder_param = { 'embed_dim': 1024,
                 'hidden_dim': 300,
                 'layers': 1,
                 'dropout_lstm' : 0,
                  'dropout_input' : 0.5 
}
hyper_param = {'embed_dim':1024, 
              'hidden_dim' : 300, 
              'layers' : 1, 
              'dropout_lstm' : 0, 
              'dropout_input':0.5, 
              'dropout_FC':0.2,
              'dropout_lstm_hyper' : 0.2,
              'dropout_input_hyper' : 0.2,
              'dropout_attention' : 0.2,
              'num_classes' : 2,
              'learning_rate':0.0001}

meta_param = {
    'hidden_dim' : 300, 
     'dropout_FC' : 0.1,
      'num_classes' : 2,
    'learning_rate': 0.001
}


best_acc = 0.0
model = multitask_model(encoder_param, hyper_param, meta_param)

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
if torch.cuda.is_available():
    model.to(device=device)
nll_criterion = nn.NLLLoss()
num_epochs = 15

val_loss = []
val_f1 = []
counter = 0

if continue_train == True:
    previous_model = torch.load('multitask_model1.pt')
    model.load_state_dict(previous_model['state_dict'])
    counter = previous_model['counter']
    num_epochs -= previous_model['epoch']

met_model_optimizer = optim.Adam(model.parameters(), lr = hyper_param['learning_rate'])
hyp_model_optimizer = optim.Adagrad(model.parameters(), lr = meta_param['learning_rate'], lr_decay=1e-5, weight_decay=1e-3)

for epoch in range(num_epochs):
    
    print("Starting epoch {}".format(epoch + 1))
    random.shuffle(coin_flips)
#     random.shuffle(train_hp_loader)
    dataloader_iterator = iter(train_hp_loader)
    metaloader_iterator = iter(train_meta_loader)
    
    for i in coin_flips:
      if(i == 0):
        #batch to be picked up from hyperpartisan dataset
        try:
          batch = next(dataloader_iterator)
        except:
          print("I AM IN EXCEPT!!!!!!!!!!!!!!!!!")
          dataloader_iterator = iter(train_hp_loader)
          batch = next(dataloader_iterator)
          
        data = batch[0]
        doc_len = batch[1]
        labels = batch[2]
        sen_len = batch[3] 
        is_doc = True
      else:
        #batch to be picked up from metaphor dataset
        try:
          batch = next(metaloader_iterator)
        except:
          metaloader_iterator = iter(train_meta_loader)
          batch = next(metaloader_iterator)
        #(data, lengths, labels)
        is_doc = False
        data = batch[0]
        lengths = batch[1]
        labels = batch[2]
      
      if torch.cuda.is_available():
        labels = labels.to(device=torch.device('cuda'))
      
      if(is_doc):
        
        meta_pred, hyp_pred = model(data, sen_len, doc_len, is_doc) 
        batch_loss = nll_criterion(hyp_pred.view(-1, 2), labels.view(-1))
        precision, recall, f1, eval_accuracy = evaluate_train_hyper(labels, hyp_pred)
        hyp_model_optimizer.zero_grad()
        batch_loss.backward()
        hyp_model_optimizer.step()
        counter += 1
        write_board(writer,'Hyper/Train', precision, recall, f1, eval_accuracy, batch_loss.item(), counter)

      else:
        
        meta_pred, hyp_pred = model(data, lengths, 1, is_doc)
        batch_loss = nll_criterion(meta_pred.view(-1, 2), labels.view(-1))
        precision, recall, f1, eval_accuracy = evaluate_train(labels, meta_pred, lengths)
        batch_loss = batch_loss * 0.6
        met_model_optimizer.zero_grad()
        batch_loss.backward()
        met_model_optimizer.step()
        counter += 1
        write_board(writer,'Meta/Train', precision, recall, f1, eval_accuracy, batch_loss.item(), counter)
      print("Iteration {}. Train Loss {}. Train Accuracy {}. Train Precision {}. Train Recall {}. Train F1 {}.".format(counter, batch_loss.item(), eval_accuracy, precision, recall, f1))
      
      if counter % 20 == 0:
        avg_eval_loss, precision, recall, f1, eval_accuracy = evaluate_hyper(val_hp_loader, model, nll_criterion, device)
        write_board(writer,'Hyper/Val', precision, recall, f1, eval_accuracy, avg_eval_loss, counter)
        avg_eval_loss, precision, recall, f1, eval_accuracy_meta = evaluate(val_meta_loader, model, nll_criterion, device)
        write_board(writer,'Meta/Val', precision, recall, f1, eval_accuracy_meta, avg_eval_loss, counter)
      if counter % 100 == 0:
        if(eval_accuracy > best_acc):
            best_acc = eval_accuracy
            checkpoint = {'hyperparameters': hyper_param,
                    'metaparameters': meta_param,
                    'encoderparameters': encoder_param,
                    'state_dict': model.state_dict(),
                    'epoch': epoch,
                    'counter': counter}
            torch.save(checkpoint, 'multitask_model.pt')
#                 copyfile('multi_soft_model.pt', '/content/gdrive/My Drive/HyperMeta/multi_soft_model_' + str(model_version) + '.pt')
#                 copyfile('multi_soft_model.pt', '/content/gdrive/My Drive/HyperMeta/multi_soft_model_' + str(model_version) + '.pt')

print("First Training done!")
# checkpoint = {'hyperparameters': hyper_param,
#               'metaparameters': meta_param,
#               'encoderparameters': encoder_param,
#               'state_dict': model.state_dict()}
torch.save(checkpoint, 'multitask_model.pt')
print("First Training done!")
        

Starting epoch 1

  "num_layers={}".format(dropout, num_layers))



2.384185791015625e-07  First layer
0.03535652160644531  Average sentences and pad doc
0.03943467140197754  Second Layer
0.04305315017700195  Attention Layer
0.04362607002258301  Last Layer
Iteration 1. Train Loss 0.6939827799797058. Train Accuracy 54.166666666666664. Train Precision 41.1764705882353. Train Recall 87.5. Train F1 56.0.
Iteration 2. Train Loss 0.42554253339767456. Train Accuracy 45.97069597069597. Train Precision 8.277027027027026. Train Recall 51.041666666666664. Train F1 14.244186046511627.
Iteration 3. Train Loss 0.4210052788257599. Train Accuracy 64.2911877394636. Train Precision 12.589073634204276. Train Recall 35.09933774834437. Train F1 18.53146853146853.
4.76837158203125e-07  First layer
0.06022953987121582  Average sentences and pad doc
0.06659460067749023  Second Layer
0.07222294807434082  Attention Layer
0.07278060913085938  Last Layer
Iteration 4. Train Loss 0.5930858850479126. Train Accuracy 79.16666666666667. Train Precision nan. Train Recall 0.0. Train F1 

  precision = 100 * confusion_matrix[1, 1] / np.sum(confusion_matrix[1])


Iteration 5. Train Loss 0.4146116375923157. Train Accuracy 80.24316109422493. Train Precision 7.142857142857143. Train Recall 10.344827586206897. Train F1 8.450704225352114.
Iteration 6. Train Loss 0.4082269072532654. Train Accuracy 85.66493955094991. Train Precision 16.666666666666668. Train Recall 5.970149253731344. Train F1 8.791208791208792.


  f1 = 2 * precision * recall / (precision + recall)


Iteration 7. Train Loss 0.4039982259273529. Train Accuracy 86.57074340527578. Train Precision 0.0. Train Recall 0.0. Train F1 nan.
Iteration 8. Train Loss 0.4019456207752228. Train Accuracy 89.51930654058313. Train Precision 0.0. Train Recall 0.0. Train F1 nan.
Iteration 9. Train Loss 0.4040954113006592. Train Accuracy 85.63968668407311. Train Precision 0.0. Train Recall 0.0. Train F1 nan.
4.76837158203125e-07  First layer
0.05238485336303711  Average sentences and pad doc
0.05979800224304199  Second Layer
0.06330657005310059  Attention Layer
0.06388258934020996  Last Layer
Iteration 10. Train Loss 0.5528420805931091. Train Accuracy 75.0. Train Precision nan. Train Recall 0.0. Train F1 nan.
Iteration 11. Train Loss 0.39806464314460754. Train Accuracy 87.85117691723615. Train Precision 0.0. Train Recall 0.0. Train F1 nan.
4.76837158203125e-07  First layer
0.04376077651977539  Average sentences and pad doc
0.047955989837646484  Second Layer
0.05124092102050781  Attention Layer
0.05178809

  precision = 100 * confusion_matrix[1, 1] / np.sum(confusion_matrix[1])


Iteration 13. Train Loss 0.39495646953582764. Train Accuracy 87.57446808510639. Train Precision nan. Train Recall 0.0. Train F1 nan.
Iteration 14. Train Loss 0.37780189514160156. Train Accuracy 89.21417565485362. Train Precision nan. Train Recall 0.0. Train F1 nan.
4.76837158203125e-07  First layer
0.05333352088928223  Average sentences and pad doc
0.06148695945739746  Second Layer
0.06730532646179199  Attention Layer
0.06796932220458984  Last Layer
Iteration 15. Train Loss 0.5424859523773193. Train Accuracy 75.0. Train Precision nan. Train Recall 0.0. Train F1 nan.
4.76837158203125e-07  First layer
0.10045456886291504  Average sentences and pad doc
0.11186051368713379  Second Layer
0.11674213409423828  Attention Layer
0.117279052734375  Last Layer
Iteration 16. Train Loss 0.5895627737045288. Train Accuracy 70.83333333333333. Train Precision nan. Train Recall 0.0. Train F1 nan.
Iteration 17. Train Loss 0.38378578424453735. Train Accuracy 89.07056798623064. Train Precision nan. Train Re

  precision = 100 * confusion_matrix[1, 1] / np.sum(confusion_matrix[1])
  precision = 100 * confusion_matrix[1, 1] / np.sum(confusion_matrix[1])


Number of predictions  38628.0
Iteration 21. Train Loss 0.37502655386924744. Train Accuracy 85.91445427728614. Train Precision nan. Train Recall 0.0. Train F1 nan.
Iteration 22. Train Loss 0.3750390410423279. Train Accuracy 89.11917098445596. Train Precision nan. Train Recall 0.0. Train F1 nan.
Iteration 23. Train Loss 0.36240553855895996. Train Accuracy 89.71631205673759. Train Precision nan. Train Recall 0.0. Train F1 nan.
Iteration 24. Train Loss 0.3637271225452423. Train Accuracy 88.88888888888889. Train Precision nan. Train Recall 0.0. Train F1 nan.
2.384185791015625e-07  First layer
0.11513924598693848  Average sentences and pad doc
0.12182950973510742  Second Layer
0.12775206565856934  Attention Layer
0.12826776504516602  Last Layer
Iteration 25. Train Loss 0.6493106484413147. Train Accuracy 62.5. Train Precision nan. Train Recall 0.0. Train F1 nan.
Iteration 26. Train Loss 0.339551717042923. Train Accuracy 91.12227805695143. Train Precision nan. Train Recall 0.0. Train F1 nan.
