Ref: https://github.com/bentrevett/pytorch-seq2seq/blob/master/6%20-%20Attention%20is%20All%20You%20Need.ipynb

#### Define 'EPOCHS' (total epochs) and 'EP_INT' (interval epochs) and then run from the starting...

In [None]:
import torch, torchvision
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchtext import  vocab,data
from torchtext.datasets import TranslationDataset, Multi30k
from torchtext.data import Field, BucketIterator, TabularDataset
from torchvision.transforms import ToTensor

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import os, csv, sys, random, re, time, math, spacy, nltk

from PIL import Image
from numpy.random import RandomState
from tensorboardX import SummaryWriter
from nltk.tokenize.treebank import TreebankWordDetokenizer

In [None]:
nlp = spacy.load('en_core_web_sm')

#Define the logger
#log_writer_train = SummaryWriter('TBlogs/train/')
#log_writer_val = SummaryWriter('TBlogs/val/')
#log_writer_test = SummaryWriter('TBlogs/test/')

log_writer = SummaryWriter('TBlogs/')

In [None]:
EPOCHS = 10     # Total epochs to train for
EP_INT = 2     # In the intervals of 'EP_INT' epochs
CLIP = 1

NF = 36*2048   # For Faster RCNN Feature Extraction using Object Detection
#NF=1000       # For ResNet/VGG Feature Extraction

In [None]:
SEED = 1234
max_length=102

random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

In [None]:
#dataset.isna().sum()
#df_null = dataset.isnull().unstack()
#t = df_null[df_null]
#t

Need to pass fixed length tokens to the transformer. Hence, 1. removing rows with null data and 2. truncating src length to max_length

In [None]:
# Cleaning the Training Data file...
dataset=pd.read_csv(r"train_data.csv")
dataset.shape

(58342, 4)

In [None]:
for i in range (dataset.shape[0]):
    if len(dataset['src'][i].split()) < max_length:
        dataset = dataset.drop(i)
    else:
        dataset['src'][i] = " ".join(dataset['src'][i].split()[0:102])

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dataset['src'][i] = " ".join(dataset['src'][i].split()[0:102])


In [None]:
dataset.to_csv('temp/train_data1.csv', index= False) 
dataset = pd.read_csv('temp/train_data1.csv')
dataset.head(-1)

Unnamed: 0,src,trg,img_path,img_id
0,video a bizarre conspiracy theory has surged i...,i take news with a pinch of salt so should eve...,/home/puneet/code/Multimodal Feedback/data/781...,7817
1,the pentagon said thursday that the us militar...,great news,/home/puneet/code/Multimodal Feedback/data/651...,6514
2,washington pressure is growing among president...,more good news please,/home/puneet/code/Multimodal Feedback/data/230...,2309
3,our energy supplier for more than years sse is...,why this video is getting so many dislikes,/home/puneet/code/Multimodal Feedback/data/447...,4470
4,michigan gov gretchen whitmer has met with pre...,unfortunately this will anger the mudsharks th...,/home/puneet/code/Multimodal Feedback/data/823...,8237
...,...,...,...,...
50954,kabul afghanistan the taliban ambushed a peace...,i always for the of reeducation no hope= misle...,/home/puneet/code/Multimodal Feedback/data/669...,6697
50955,file photo the olympic rings are illuminated i...,the crazy just never seems to stop in florida ...,/home/puneet/code/Multimodal Feedback/data/541...,5417
50956,saudi arabia has sentenced five people to deat...,five lives to save face for the prince,/home/puneet/code/Multimodal Feedback/data/627...,6271
50957,acting manatee county administrator dr scott h...,floridas republican governors have been availa...,/home/puneet/code/Multimodal Feedback/data/935...,9358


In [None]:
# Check 1: on the lengths of the cleaned data
#for i in range (dataset1.shape[0]):
#    if not len(dataset1['src'][i].split()) == 102:
#        print('1')

# Check 2: if (kk<max_length), then manually delete the entried from .csv file. 
# train_data: Print to check whether any samples with length><max_length are remaining...
#for i in range (dataset.shape[0]):
#     res = re.findall(r'\w+', dataset["src"][i])
#     print(res)
#     kk=min(len(res),max_length)
#     if (kk<max_length):
#            print(i," ",len(res)," ",kk," ", dataset["img_id"][i])
##      zz=str(res[0])   
##      for j in range(kk-1):
##          zz=zz+" "+str(res[j+1] )
##      dataset["src"][i]=zz    

In [None]:
# Cleaning the Validation Data file...
dataset1=pd.read_csv(r"val_data.csv")
dataset1.shape

(15558, 4)

In [None]:
for i in range (dataset1.shape[0]):
    if len(dataset1['src'][i].split()) < max_length:
        dataset1 = dataset1.drop(i)
    else:
        dataset1['src'][i] = " ".join(dataset1['src'][i].split()[0:102])

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dataset1['src'][i] = " ".join(dataset1['src'][i].split()[0:102])


In [None]:
dataset1.to_csv('temp/val_data1.csv', index= False)  
dataset1 = pd.read_csv('temp/val_data1.csv')
dataset1.head(-1)

Unnamed: 0,src,trg,img_path,img_id
0,image copyright getty images image caption the...,yin wei zhong guo de fa zhan rang mei di guo g...,/home/puneet/code/Multimodal Feedback/data/782...,7826
1,washington amid escalating tensions with both ...,well done advisors,/home/puneet/code/Multimodal Feedback/data/831...,8310
2,washington president trump directed the federa...,hey come on you gotta see where trump is comin...,/home/puneet/code/Multimodal Feedback/data/761...,7619
3,an american sign language interpreter who help...,for references who want to find additional inc...,/home/puneet/code/Multimodal Feedback/data/856...,8563
4,although many consider them to be rivals both ...,thank you very much for your work,/home/puneet/code/Multimodal Feedback/data/260...,2600
...,...,...,...,...
13595,a threegame series between the st louis cardin...,heres how cnn creates fake news,/home/puneet/code/Multimodal Feedback/data/823...,8236
13596,a view of the evergiven container ship as it r...,any chance the gpu prices will go down huh no ...,/home/puneet/code/Multimodal Feedback/data/287...,2870
13597,in the wake of the atlantaarea shootings and t...,can politicians do anything other than condemn,/home/puneet/code/Multimodal Feedback/data/176...,1768
13598,dr anthony fauci says there are five or six th...,selfish,/home/puneet/code/Multimodal Feedback/data/828...,8288


In [None]:
# val_data: Print to check whether any samples with length><max_length are remaining...
dataset=dataset1
for i in range (dataset.shape[0]):
     res = re.findall(r'\w+', dataset["src"][i])
 #     print(res)
     kk=min(len(res),max_length)
     if (kk<max_length):
            print(i," ",len(res)," ",kk," ", dataset["img_id"][i])

22   100   100   8005
325   101   101   5425
778   100   100   8948
1720   100   100   4645
1903   100   100   8948
1920   100   100   4645
1970   100   100   8948
2055   100   100   8948
2517   100   100   9195
2551   101   101   660
2600   100   100   8948
2725   100   100   4645
2905   100   100   8948
3045   100   100   8948
3056   100   100   8948
3084   100   100   8948
3308   101   101   8505
3473   100   100   8948
3958   100   100   8005
4703   100   100   8948
4777   100   100   8948
4893   100   100   4645
4895   99   99   81
5211   100   100   8948
5586   100   100   8948
5612   98   98   3729
5776   100   100   8948
5818   100   100   8005
6174   100   100   8948
6225   100   100   8948
6310   100   100   8948
6481   100   100   8948
6487   100   100   8948
6555   100   100   8948
6684   101   101   8505
7058   100   100   8948
7145   100   100   8948
7247   100   100   9195
7609   100   100   8005
8058   99   99   1426
8095   101   101   8496
8123   100   100   83
8486   

We'll then create our tokenizers as before.

In [None]:
def tokenize_en(text):
    """
    Tokenizes English text from a string into a list of strings
    """
    return [tok.text for tok in nlp.tokenizer(text)]

In [None]:
SRC = Field(tokenize = tokenize_en, 
            init_token = '<sos>', 
            eos_token = '<eos>',
            fix_length = max_length,
            lower = True, 
            batch_first = True)

TRG = Field(tokenize = tokenize_en, 
            init_token = '<sos>', 
            eos_token = '<eos>', 
            fix_length = max_length,
            lower = True, 
            batch_first = True)

ID = data.Field(sequential=False,use_vocab=False)

In [None]:
datafields=[('src', SRC), ('trg', TRG),('img_path',None),('img_id',ID)]
print('**********************************************************\ndatafields:', datafields)

**********************************************************
datafields: [('src', <torchtext.data.field.Field object at 0x7f78ee4b9ee0>), ('trg', <torchtext.data.field.Field object at 0x7f78ee4b9850>), ('img_path', None), ('img_id', <torchtext.data.field.Field object at 0x7f78ee4b9f10>)]


In [None]:
#df = pd.read_csv("data1.csv")

#rng = RandomState()
#train_data = df.sample(frac=0.80, random_state=rng)
#val_data = df.loc[~df.index.isin(train_data.index)]

##cols= ["src", "trg", "img_path", "img_id"]
#train_data.to_csv('train_data1.csv', index= False) #columns=cols
#val_data.to_csv('val_data1.csv', index= False) #columns=cols

In [None]:
train_data, val_data = data.TabularDataset.splits(path=r"",train="temp/train_data1.csv", validation="temp/val_data1.csv", format='csv', skip_header=True, fields=datafields)

In [None]:
print('\ntrain_data length: ',len(train_data))
print('val_data length: ',len(val_data))


train_data length:  50960
val_data length:  13601


In [None]:
#print(vars(train_data.examples[4]))
#print(sys.maxsize)

In [None]:
SRC.build_vocab(train_data, vectors="glove.6B.100d")
TRG.build_vocab(train_data, vectors="glove.6B.100d")
SRC.build_vocab(train_data, min_freq=2)
TRG.build_vocab(train_data, min_freq=1)

In [None]:
print(f"\nUnique tokens in source (en) vocabulary: {len(SRC.vocab)}")


Unique tokens in source (en) vocabulary: 32349


In [None]:
print(f"Unique tokens in target (en) vocabulary: {len(TRG.vocab)}")

Unique tokens in target (en) vocabulary: 42325


In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
#device = 'cuda'
#torch.cuda.set_device(0)

In [None]:
device

device(type='cuda')

In [None]:
BATCH_SIZE = 32

train_iterator, x_iterator = BucketIterator.splits(
    (train_data,train_data), 
    batch_size = BATCH_SIZE,
    device = device)

valid_iterator, y_iterator = BucketIterator.splits(
    (val_data,val_data), 
    batch_size = BATCH_SIZE, 
    device = device)

'''
train_iterator, x_iterator = BucketIterator.splits(
    (train_data, train_data), 
    batch_size = BATCH_SIZE,
    #sort_within_batch = True,
    #sort_key = lambda x : len(x.src), 
    device = device)

valid_iterator, y_iterator = BucketIterator.splits(
    (val_data, val_data), 
    batch_size = BATCH_SIZE, 
    #sort_within_batch = True,
    #sort_key = lambda x : len(x.src),
    device = device
'''

'\ntrain_iterator, x_iterator = BucketIterator.splits(\n    (train_data, train_data), \n    batch_size = BATCH_SIZE,\n    #sort_within_batch = True,\n    #sort_key = lambda x : len(x.src), \n    device = device)\n\nvalid_iterator, y_iterator = BucketIterator.splits(\n    (val_data, val_data), \n    batch_size = BATCH_SIZE, \n    #sort_within_batch = True,\n    #sort_key = lambda x : len(x.src),\n    device = device\n'

In [None]:
print('\ntrain_iterator length: ',len(train_iterator))
print('valid_iterator length: ',len(valid_iterator))


train_iterator length:  1593
valid_iterator length:  426


In [None]:
for i, batch in enumerate(train_iterator):
  print(i,batch)

In [None]:
class Encoder(nn.Module):
    def __init__(self, 
                 input_dim, 
                 hid_dim, 
                 n_layers, 
                 n_heads, 
                 pf_dim,
                 dropout, 
                 device,
                 max_length = max_length):
        super().__init__()

        self.device = device
        
        self.tok_embedding = nn.Embedding(input_dim, hid_dim)
        self.vis_fc=nn.Linear(NF, hid_dim)
        self.fc=nn.Linear(2*hid_dim,hid_dim)
        self.pos_embedding = nn.Embedding(max_length, hid_dim)
        
        self.layers = nn.ModuleList([EncoderLayer(hid_dim, 
                                                  n_heads, 
                                                  pf_dim,
                                                  dropout, 
                                                  device) 
                                     for _ in range(n_layers)])
        
        self.dropout = nn.Dropout(dropout)
        
        self.enc_ff_layer_norm = nn.LayerNorm(hid_dim)
        
        self.scale = torch.sqrt(torch.FloatTensor([hid_dim])).to(device)

        self.enc_positionwise_feedforward = PositionwiseFeedforwardLayer(hid_dim, 
                                                                     pf_dim, 
                                                                     dropout)
        #self.dropout = nn.Dropout(dropout)
        
    def forward(self, src,vs102,vs103, src_mask):
        
        #src = [batch size, src len]
        #src_mask = [batch size, src len]
        
        batch_size = src.shape[0]
        src_len = src.shape[1]
        
        vs102=torch.tanh(self.vis_fc(vs102))
        vs103=torch.tanh(self.vis_fc(vs103))
        
        #print('[vs102.shape]',vs102.shape)
        
        pos = torch.arange(0, src_len).unsqueeze(0).repeat(batch_size, 1).to(self.device)
        
        #pos = [batch size, src len]
        shape=self.tok_embedding(src).shape
        
        #print('[self.tok_embedding(src).shape]',self.tok_embedding(src).shape)
        
        #if(shape[1]==102):
        #    src=self.fc(torch.cat([self.tok_embedding(src),vs102],2))
        #if(shape[1]==103):
        #    src=self.fc(torch.cat([self.tok_embedding(src),vs103],2))    
        
        #print('src',src)
        #print('.......................\n[src.shape]',src.shape)
        #print(pos.shape)
        #print('[embedding shape]',self.pos_embedding(pos).shape)
        
        #src=self.fc(torch.cat([self.tok_embedding(src),visual_features],2))
        #src = self.dropout((src * self.scale) + self.pos_embedding(pos))
        
        src = self.dropout((self.tok_embedding(src) * self.scale) + self.pos_embedding(pos))
        
        #src = [batch size, src len, hid dim]
        
        for layer in self.layers:
            src = layer(src, src_mask)


        if(shape[1]==102):
            vis_src=self.fc(torch.cat([src,vs102],2))
        if(shape[1]==103):
            vis_src=self.fc(torch.cat([src,vs103],2)) 

        #positionwise feedforward
        _vis_src = self.enc_positionwise_feedforward(vis_src)
        
        #dropout, residual and layer norm
        vis_src = self.enc_ff_layer_norm(vis_src + self.dropout(_vis_src))    

        #src = [batch size, src len, hid dim]

            
        return src, vis_src

In [None]:
class EncoderLayer(nn.Module):
    def __init__(self, 
                 hid_dim, 
                 n_heads, 
                 pf_dim,  
                 dropout, 
                 device):
        super().__init__()
        
        self.self_attn_layer_norm = nn.LayerNorm(hid_dim)
        self.ff_layer_norm = nn.LayerNorm(hid_dim)
        self.self_attention = MultiHeadAttentionLayer(hid_dim, n_heads, dropout, device)
        self.positionwise_feedforward = PositionwiseFeedforwardLayer(hid_dim, 
                                                                     pf_dim, 
                                                                     dropout)
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, src, src_mask):
        
        #src = [batch size, src len, hid dim]
        #src_mask = [batch size, src len]
                
        #self attention
        _src, _ = self.self_attention(src, src, src, src_mask)
        
        #dropout, residual connection and layer norm
        src = self.self_attn_layer_norm(src + self.dropout(_src))
        
        #src = [batch size, src len, hid dim]
        
        #positionwise feedforward
        _src = self.positionwise_feedforward(src)
        
        #dropout, residual and layer norm
        src = self.ff_layer_norm(src + self.dropout(_src))
        
        #src = [batch size, src len, hid dim]
        
        return src

In [None]:
class MultiHeadAttentionLayer(nn.Module):
    def __init__(self, hid_dim, n_heads, dropout, device):
        super().__init__()
        
        assert hid_dim % n_heads == 0
        
        self.hid_dim = hid_dim
        self.n_heads = n_heads
        self.head_dim = hid_dim // n_heads
        
        self.fc_q = nn.Linear(hid_dim, hid_dim)
        self.fc_k = nn.Linear(hid_dim, hid_dim)
        self.fc_v = nn.Linear(hid_dim, hid_dim)
        
        self.fc_o = nn.Linear(hid_dim, hid_dim)
        
        self.dropout = nn.Dropout(dropout)
        
        self.scale = torch.sqrt(torch.FloatTensor([self.head_dim])).to(device)
        
    def forward(self, query, key, value, mask = None):
        
        batch_size = query.shape[0]
        
        #query = [batch size, query len, hid dim]
        #key = [batch size, key len, hid dim]
        #value = [batch size, value len, hid dim]
                
        Q = self.fc_q(query)
        K = self.fc_k(key)
        V = self.fc_v(value)
        
        #Q = [batch size, query len, hid dim]
        #K = [batch size, key len, hid dim]
        #V = [batch size, value len, hid dim]
                
        Q = Q.view(batch_size, -1, self.n_heads, self.head_dim).permute(0, 2, 1, 3)
        K = K.view(batch_size, -1, self.n_heads, self.head_dim).permute(0, 2, 1, 3)
        V = V.view(batch_size, -1, self.n_heads, self.head_dim).permute(0, 2, 1, 3)
        
        #Q = [batch size, n heads, query len, head dim]
        #K = [batch size, n heads, key len, head dim]
        #V = [batch size, n heads, value len, head dim]
                
        energy = torch.matmul(Q, K.permute(0, 1, 3, 2)) / self.scale
        
        #energy = [batch size, n heads, query len, key len]
        
        if mask is not None:
            energy = energy.masked_fill(mask == 0, -1e10)
        
        attention = torch.softmax(energy, dim = -1)
                
        #attention = [batch size, n heads, query len, key len]
                
        x = torch.matmul(self.dropout(attention), V)
        
        #x = [batch size, n heads, query len, head dim]
        
        x = x.permute(0, 2, 1, 3).contiguous()
        
        #x = [batch size, query len, n heads, head dim]
        
        x = x.view(batch_size, -1, self.hid_dim)
        
        #x = [batch size, query len, hid dim]
        
        x = self.fc_o(x)
        
        #x = [batch size, query len, hid dim]
        
        return x, attention

In [None]:
class PositionwiseFeedforwardLayer(nn.Module):
    def __init__(self, hid_dim, pf_dim, dropout):
        super().__init__()
        
        self.fc_1 = nn.Linear(hid_dim, pf_dim)
        self.fc_2 = nn.Linear(pf_dim, hid_dim)
        
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, x):
        
        #x = [batch size, seq len, hid dim]
        
        x = self.dropout(torch.relu(self.fc_1(x)))
        
        #x = [batch size, seq len, pf dim]
        
        x = self.fc_2(x)
        
        #x = [batch size, seq len, hid dim]
        
        return x

In [None]:
class Decoder(nn.Module):
    def __init__(self, 
                 output_dim, 
                 hid_dim, 
                 n_layers, 
                 n_heads, 
                 pf_dim, 
                 dropout, 
                 device,
                 max_length = max_length):
        super().__init__()
        
        self.device = device
        
        self.tok_embedding = nn.Embedding(output_dim, hid_dim)
        self.pos_embedding = nn.Embedding(max_length, hid_dim)
        
        self.layers = nn.ModuleList([DecoderLayer(hid_dim, 
                                                  n_heads, 
                                                  pf_dim, 
                                                  dropout, 
                                                  device)
                                     for _ in range(n_layers)])
        
        self.fc_out = nn.Linear(hid_dim, output_dim)
        
        self.dropout = nn.Dropout(dropout)
        
        self.scale = torch.sqrt(torch.FloatTensor([hid_dim])).to(device)
        
    def forward(self, trg, enc_src, trg_mask, src_mask, vs102, vs103, enc_vis_src):
        
        #trg = [batch size, trg len]
        #enc_src = [batch size, src len, hid dim]
        #trg_mask = [batch size, trg len]
        #src_mask = [batch size, src len]
                
        batch_size = trg.shape[0]
        trg_len = trg.shape[1]
        
        pos = torch.arange(0, trg_len).unsqueeze(0).repeat(batch_size, 1).to(self.device)
                            
        #pos = [batch size, trg len]
            
        trg = self.dropout((self.tok_embedding(trg) * self.scale) + self.pos_embedding(pos))
                
        #trg = [batch size, trg len, hid dim]
        
        for layer in self.layers:
            trg, attention = layer(trg, enc_src, trg_mask, src_mask,  vs102, vs103, enc_vis_src)
        
        #trg = [batch size, trg len, hid dim]
        #attention = [batch size, n heads, trg len, src len]
        
        output = self.fc_out(trg)
        
        #output = [batch size, trg len, output dim]
            
        return output, attention

In [None]:
class DecoderLayer(nn.Module):
    def __init__(self, 
                 hid_dim, 
                 n_heads, 
                 pf_dim, 
                 dropout, 
                 device):
        super().__init__()
        
        self.vis_fc=nn.Linear(NF, hid_dim)

        self.self_attn_layer_norm = nn.LayerNorm(hid_dim)
        self.enc_attn_layer_norm = nn.LayerNorm(hid_dim)
        self.enc_vis_attn_layer_norm = nn.LayerNorm(hid_dim)
        self.ff_layer_norm = nn.LayerNorm(hid_dim)
        self.self_attention = MultiHeadAttentionLayer(hid_dim, n_heads, dropout, device)
        self.encoder_attention = MultiHeadAttentionLayer(hid_dim, n_heads, dropout, device)
        self.encoder_vis_attention = MultiHeadAttentionLayer(hid_dim, n_heads, dropout, device)
        self.positionwise_feedforward = PositionwiseFeedforwardLayer(hid_dim, 
                                                                     pf_dim, 
                                                                     dropout)
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, trg, enc_src, trg_mask, src_mask, vs102, vs103, enc_vis_src):
        
        #trg = [batch size, trg len, hid dim]
        #enc_src = [batch size, src len, hid dim]
        #trg_mask = [batch size, trg len]
        #src_mask = [batch size, src len]

        #vs102=torch.tanh(self.vis_fc(vs102))
        #vs103=torch.tanh(self.vis_fc(vs103))


        
        #self attention
        _trg, _ = self.self_attention(trg, trg, trg, trg_mask)
        
        #dropout, residual connection and layer norm
        trg = self.self_attn_layer_norm(trg + self.dropout(_trg))
            
        #trg = [batch size, trg len, hid dim]
            
        #encoder attention
        _trg, attention = self.encoder_attention(trg, enc_src, enc_src, src_mask)
        
        #dropout, residual connection and layer norm
        trg = self.enc_attn_layer_norm(trg + self.dropout(_trg))

        #encoder visual attention
        _trg, attention = self.encoder_vis_attention(trg, enc_vis_src, enc_vis_src, src_mask)
        
        #dropout, residual connection and layer norm
        trg = self.enc_vis_attn_layer_norm(trg + self.dropout(_trg))
                    
        #trg = [batch size, trg len, hid dim]
        
        #positionwise feedforward
        _trg = self.positionwise_feedforward(trg)
        
        #dropout, residual and layer norm
        trg = self.ff_layer_norm(trg + self.dropout(_trg))
        
        #trg = [batch size, trg len, hid dim]
        #attention = [batch size, n heads, trg len, src len]
        
        return trg, attention

In [None]:
class Seq2Seq(nn.Module):
    def __init__(self, 
                 encoder, 
                 decoder, 
                 src_pad_idx, 
                 trg_pad_idx, 
                 device):
        super().__init__()
        
        self.encoder = encoder
        self.decoder = decoder
        self.src_pad_idx = src_pad_idx
        self.trg_pad_idx = trg_pad_idx
        self.device = device
        
    def make_src_mask(self, src):
        
        #src = [batch size, src len]
        
        src_mask = (src != self.src_pad_idx).unsqueeze(1).unsqueeze(2)

        #src_mask = [batch size, 1, 1, src len]

        return src_mask
    
    def make_trg_mask(self, trg):
        
        #trg = [batch size, trg len]
        
        trg_pad_mask = (trg != self.trg_pad_idx).unsqueeze(1).unsqueeze(2)
        
        #trg_pad_mask = [batch size, 1, 1, trg len]
        
        trg_len = trg.shape[1]
        
        trg_sub_mask = torch.tril(torch.ones((trg_len, trg_len), device = self.device)).bool()
        
        #trg_sub_mask = [trg len, trg len]
            
        trg_mask = trg_pad_mask & trg_sub_mask
        
        #trg_mask = [batch size, 1, trg len, trg len]
        
        return trg_mask

    def forward(self, src,vs102,vs103, trg):
        
        #src = [batch size, src len]
        #trg = [batch size, trg len]
                
        src_mask = self.make_src_mask(src)
        trg_mask = self.make_trg_mask(trg)
        
        #src_mask = [batch size, 1, 1, src len]
        #trg_mask = [batch size, 1, trg len, trg len]
        
        enc_src, vis_enc_src = self.encoder(src,vs102,vs103, src_mask)
        
        #enc_src = [batch size, src len, hid dim]
                
        output, attention = self.decoder(trg, enc_src, trg_mask, src_mask, vs102, vs103, vis_enc_src)
        
        #output = [batch size, trg len, output dim]
        #attention = [batch size, n heads, trg len, src len]
        
        return output, attention

In [None]:
INPUT_DIM = len(SRC.vocab)
OUTPUT_DIM = len(TRG.vocab)
HID_DIM = 64
ENC_LAYERS = 3
DEC_LAYERS = 3
ENC_HEADS = 8
DEC_HEADS = 8
ENC_PF_DIM = 128
DEC_PF_DIM = 128
ENC_DROPOUT = 0.1
DEC_DROPOUT = 0.1

enc = Encoder(INPUT_DIM, 
              HID_DIM, 
              ENC_LAYERS, 
              ENC_HEADS, 
              ENC_PF_DIM, 
              ENC_DROPOUT, 
              device)

dec = Decoder(OUTPUT_DIM, 
              HID_DIM, 
              DEC_LAYERS, 
              DEC_HEADS, 
              DEC_PF_DIM, 
              DEC_DROPOUT, 
              device)

In [None]:
SRC_PAD_IDX = SRC.vocab.stoi[SRC.pad_token]
TRG_PAD_IDX = TRG.vocab.stoi[TRG.pad_token]

model = Seq2Seq(enc, dec, SRC_PAD_IDX, TRG_PAD_IDX, device).to(device)

In [None]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f'\nThe model has {count_parameters(model):,} trainable parameters')


The model has 12,521,365 trainable parameters


In [None]:
def initialize_weights(m):
    if hasattr(m, 'weight') and m.weight.dim() > 1:
        nn.init.xavier_uniform_(m.weight.data)

In [None]:
model.apply(initialize_weights);

In [None]:
LEARNING_RATE = 0.0005
optimizer = torch.optim.Adam(model.parameters(), lr = LEARNING_RATE)
criterion = nn.CrossEntropyLoss(ignore_index = TRG_PAD_IDX)

In [None]:
def train(model, iterator, optimizer, criterion, clip):
    
    model.train()
    
    epoch_loss = 0
    
    for i, batch in enumerate(iterator):
        
        src = batch.src
        trg = batch.trg
        img_id=batch.img_id

        x=img_id.cpu().numpy()
        y=len(x)
#         print(y)
        visual_features_102=torch.empty(y,max_length,NF).cuda()
        visual_features_103=torch.empty(y,max_length,NF).cuda()
        #visual_features_102=torch.empty(y,max_length+2,1000).cuda()
        #visual_features_103=torch.empty(y,max_length+3,1000).cuda()
        
        df=pd.read_csv(r"visual_features_rcnn.csv")
        
        for i in range(y):
            q=df[str(x[i])].to_numpy()
#            print(q)
#            print(len(q))
            r=np.zeros((max_length,1),dtype=q.dtype) + q
            s=np.zeros((max_length,1),dtype=q.dtype) + q
            #r=np.zeros((max_length+2,1),dtype=q.dtype) + q
            #s=np.zeros((max_length+3,1),dtype=q.dtype) + q
            r=torch.from_numpy(r).float()
            s=torch.from_numpy(s).float()
            visual_features_102[i]=r
            visual_features_103[i]=s
            
        vs102=visual_features_102.float()
        vs103=visual_features_103.float()
        
        optimizer.zero_grad()
#        print(src.size())
#        print(trg.size())
        
#        print(trg.size()[1])
#        print(src.size()[1])
        
#        if(src.size()[1]==104 or src.size()[1]==105):
#            src.size()[1] = 103
#            print('Delete the samples with image id:', img_id)
        
        sh=src.shape[1]
        #print('\n[src.shape in train()]',sh)
        output, _ = model(src, vs102,vs103,trg[:,:-1])
                
        #output = [batch size, trg len - 1, output dim]
        #trg = [batch size, trg len]
            
        output_dim = output.shape[-1]
            
        output = output.contiguous().view(-1, output_dim)
        trg = trg[:,1:].contiguous().view(-1)
                
        #output = [batch size * trg len - 1, output dim]
        #trg = [batch size * trg len - 1]
            
        loss = criterion(output, trg)
        
        loss.backward()
        
        torch.nn.utils.clip_grad_norm_(model.parameters(), clip)
        
        optimizer.step()
        
        epoch_loss += loss.item()
        
    return epoch_loss / len(iterator)

In [None]:
def evaluate(model, iterator, criterion):
    
    model.eval()
    
    epoch_loss = 0
    
    with torch.no_grad():
    
        for i, batch in enumerate(iterator):

            src = batch.src
            trg = batch.trg
            img_id=batch.img_id

            x=img_id.cpu().numpy()
            y=len(x)
            visual_features_102=torch.empty(y,max_length,NF).cuda()
            visual_features_103=torch.empty(y,max_length,NF).cuda()
            #visual_features_102=torch.empty(y,max_length+2,1000).cuda()
            #visual_features_103=torch.empty(y,max_length+3,1000).cuda()

            df=pd.read_csv(r"visual_features_rcnn.csv")

            for i in range(y):
                q=df[str(x[i])].to_numpy()

                r=np.zeros((max_length,1),dtype=q.dtype) + q
                s=np.zeros((max_length,1),dtype=q.dtype) + q
                #r=np.zeros((max_length+2,1),dtype=q.dtype) + q
                #s=np.zeros((max_length+3,1),dtype=q.dtype) + q
                r=torch.from_numpy(r).float()
                s=torch.from_numpy(s).float()
                visual_features_102[i]=r
                visual_features_103[i]=s

            vs102=visual_features_102.float()
            vs103=visual_features_103.float()

            output, _ = model(src,vs102,vs103, trg[:,:-1])
            
            #output = [batch size, trg len - 1, output dim]
            #trg = [batch size, trg len]
            
            output_dim = output.shape[-1]
            
            output = output.contiguous().view(-1, output_dim)
            trg = trg[:,1:].contiguous().view(-1)
            
            #output = [batch size * trg len - 1, output dim]
            #trg = [batch size * trg len - 1]
            
            loss = criterion(output, trg)

            epoch_loss += loss.item()
        
    return epoch_loss / len(iterator)

In [None]:
def epoch_time(start_time, end_time):
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time / 60)
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
    return elapsed_mins, elapsed_secs

In [None]:
def load_checkpt(model, optimizer, chpt_file):
    start_epoch = 0
    if (os.path.exists(chpt_file)):
        print("=> loading checkpoint '{}'".format(chpt_file))
        checkpoint = torch.load(chpt_file)
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        start_epoch = checkpoint['epoch']
        
        print("=> loaded checkpoint '{}' (epoch {})".format(chpt_file, checkpoint['epoch']))
        
    else:
        print("=> Checkpoint NOT found '{}'".format(chpt_file))
    return model, optimizer, start_epoch

In [None]:
def translate_sentence(sentence, src_field, trg_field,img_id,path, model, device, max_len = 50):
    
    model.eval()
        
    if isinstance(sentence, str):
        nlp = spacy.load('en_core_web_sm')
        #nlp = spacy.load('en') ##https://www.gitmemory.com/issue/OmkarPathak/pyresparser/46/777568505
        tokens = [token.text.lower() for token in nlp(sentence)]
    else:
        tokens = [token.lower() for token in sentence]

    tokens = [src_field.init_token] + tokens + [src_field.eos_token]
        
    src_indexes = [src_field.vocab.stoi[token] for token in tokens]

    src_tensor = torch.LongTensor(src_indexes).unsqueeze(0).to(device)
    
    src_mask = model.make_src_mask(src_tensor)
    
    df=pd.read_csv(path)
    visual_features_102=torch.empty(1,max_length,NF).cuda()
    visual_features_103=torch.empty(1,max_length,NF).cuda()
    #visual_features_102=torch.empty(1,max_length+2,1000).cuda()
    #visual_features_103=torch.empty(1,max_length+3,1000).cuda()
    q=df[str(img_id)].to_numpy()
    r=np.zeros((max_length,1),dtype=q.dtype) + q
    s=np.zeros((max_length,1),dtype=q.dtype) + q
    #r=np.zeros((max_length+2,1),dtype=q.dtype) + q
    #s=np.zeros((max_length+3,1),dtype=q.dtype) + q
    r=torch.from_numpy(r).float()
    s=torch.from_numpy(s).float()
    visual_features_102[0]=r
    visual_features_103[0]=s
    
    
    #print('\n[src_tensor.shape]',src_tensor.shape)
    #print('\n[src_tensor.shape[1]]',src_tensor.shape[1])
    #print('[visual_features_102.shape]',visual_features_102.shape)
    #print('[visual_features_103.shape]',visual_features_103.shape)
    #print('[src_mask.shape]',src_mask.shape)
    
    with torch.no_grad():
        enc_src, enc_vis_src = model.encoder(src_tensor,visual_features_102,visual_features_103, src_mask)

    trg_indexes = [trg_field.vocab.stoi[trg_field.init_token]]

    for i in range(max_len):

        trg_tensor = torch.LongTensor(trg_indexes).unsqueeze(0).to(device)

        trg_mask = model.make_trg_mask(trg_tensor)
        
        with torch.no_grad():
            output, attention = model.decoder(trg_tensor, enc_src, trg_mask, src_mask, visual_features_102, visual_features_103, enc_vis_src)
        
        pred_token = output.argmax(2)[:,-1].item()
        
        trg_indexes.append(pred_token)

        if pred_token == trg_field.vocab.stoi[trg_field.eos_token]:
            break
    
    trg_tokens = [trg_field.vocab.itos[i] for i in trg_indexes]
    
    return trg_tokens[1:], attention

In [None]:
'''def display_attention(sentence, translation, attention, n_heads = 8, n_rows = 4, n_cols = 2):
    
    assert n_rows * n_cols == n_heads
    
    fig = plt.figure(figsize=(15,25))
    
    for i in range(n_heads):
        
        ax = fig.add_subplot(n_rows, n_cols, i+1)
        
        _attention = attention.squeeze(0)[i].cpu().detach().numpy()

        cax = ax.matshow(_attention, cmap='bone')

        ax.tick_params(labelsize=12)
        ax.set_xticklabels(['']+['<sos>']+[t.lower() for t in sentence]+['<eos>'], 
                           rotation=45)
        ax.set_yticklabels(['']+translation)

        ax.xaxis.set_major_locator(ticker.MultipleLocator(1))
        ax.yaxis.set_major_locator(ticker.MultipleLocator(1))

    plt.show()
    plt.close()
'''

"def display_attention(sentence, translation, attention, n_heads = 8, n_rows = 4, n_cols = 2):\n    \n    assert n_rows * n_cols == n_heads\n    \n    fig = plt.figure(figsize=(15,25))\n    \n    for i in range(n_heads):\n        \n        ax = fig.add_subplot(n_rows, n_cols, i+1)\n        \n        _attention = attention.squeeze(0)[i].cpu().detach().numpy()\n\n        cax = ax.matshow(_attention, cmap='bone')\n\n        ax.tick_params(labelsize=12)\n        ax.set_xticklabels(['']+['<sos>']+[t.lower() for t in sentence]+['<eos>'], \n                           rotation=45)\n        ax.set_yticklabels(['']+translation)\n\n        ax.xaxis.set_major_locator(ticker.MultipleLocator(1))\n        ax.yaxis.set_major_locator(ticker.MultipleLocator(1))\n\n    plt.show()\n    plt.close()\n"

In [None]:
'''text_idx = 25                #renamed from 'example_idx'
image_idx="9"

path=r"visual_features_rcnn.csv"

src = vars(train_data.examples[text_idx])['src']
trg = vars(train_data.examples[text_idx])['trg']
#img_path = vars(train_data.examples[text_idx])['img_path']


translation, attention = translate_sentence(src, SRC, TRG, image_idx, path, model, device)

#print(f'src = {src}\n')
print(f'Ground-truth Comment')
print( '--------------------')
print(f'trg = {trg}')

print(f'\n\nPredicted Feedback (Feedback)')
print( '-----------------------------')
    
print(f'{translation}')
'''

'text_idx = 25                #renamed from \'example_idx\'\nimage_idx="9"\n\npath=r"visual_features_rcnn.csv"\n\nsrc = vars(train_data.examples[text_idx])[\'src\']\ntrg = vars(train_data.examples[text_idx])[\'trg\']\n#img_path = vars(train_data.examples[text_idx])[\'img_path\']\n\n\ntranslation, attention = translate_sentence(src, SRC, TRG, image_idx, path, model, device)\n\n#print(f\'src = {src}\n\')\nprint(f\'Ground-truth Comment\')\nprint( \'--------------------\')\nprint(f\'trg = {trg}\')\n\nprint(f\'\n\nPredicted Feedback (Feedback)\')\nprint( \'-----------------------------\')\n    \nprint(f\'{translation}\')\n'

In [None]:
#display_attention(src, translation, attention)

In [None]:
dataset=pd.read_csv(r"test_data.csv")
dataset.shape

(100, 4)

In [None]:
for i in range (dataset.shape[0]):
    if len(dataset['src'][i].split()) < 100:
        dataset = dataset.drop(i)
    else:
        dataset['src'][i] = " ".join(dataset['src'][i].split()[0:100]) #102?

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dataset['src'][i] = " ".join(dataset['src'][i].split()[0:100]) #102?


In [None]:
dataset.to_csv('temp/test_data2.csv', index= False) 
dataset = pd.read_csv('temp/test_data2.csv')

In [None]:
for i in range (dataset.shape[0]):
    sentence = dataset['src'][i]
    
    if isinstance(sentence, str):
        nlp = spacy.load('en_core_web_sm')
        #nlp = spacy.load('en') #https://www.gitmemory.com/issue/OmkarPathak/pyresparser/46/777568505
        tokens = [token.text.lower() for token in nlp(sentence)]
    else:
        tokens = [token.lower() for token in sentence]

    tokens = [SRC.init_token] + tokens + [SRC.eos_token]
        
    src_indexes = [SRC.vocab.stoi[token] for token in tokens]

    src_tensor = torch.LongTensor(src_indexes).unsqueeze(0).to(device)

    if (src_tensor.shape[1] != 102):
        dataset = dataset.drop(i)

In [None]:
dataset.to_csv('temp/test_data1.csv', index= False) 
dataset = pd.read_csv('temp/test_data1.csv')
dataset.head(-1) 

Unnamed: 0,src,trg,img_path,img_id
0,there can be few people who have not at some s...,most people want to be fooled,/home/puneet/code/Multimodal Feedback/data/133...,133
1,federal and local law enforcement sources told...,a police officer has died too,/home/puneet/code/Multimodal Feedback/data/174...,174
2,item one on the agenda according to the kremli...,from a german erspective the longing for sputn...,/home/puneet/code/Multimodal Feedback/data/227...,227
3,new delhi union health and family welfare mini...,need to learn to take care of your countrymen ...,/home/puneet/code/Multimodal Feedback/data/714...,714
4,the tone of joe bidens first official press co...,yesterdays press conference was a complete emb...,/home/puneet/code/Multimodal Feedback/data/820...,820
...,...,...,...,...
77,listen and subscribe to the daily apple podcas...,you have learned absolutely nothing about cent...,/home/puneet/code/Multimodal Feedback/data/894...,8948
78,file photo feb bradenton florida usa brooks ko...,georgia all who play will be remembered as tra...,/home/puneet/code/Multimodal Feedback/data/916...,9161
79,former presidents barack obama and donald trum...,mlb forgot to take the team with them the brav...,/home/puneet/code/Multimodal Feedback/data/932...,9327
80,for new yorkers a pandemic years fight for the...,for football betting fans search for the chann...,/home/puneet/code/Multimodal Feedback/data/935...,9353


In [None]:
for i in range (dataset.shape[0]):
     res = re.findall(r'\w+', dataset["src"][i])
     #print(res)
     kk=min(len(res),100)
     if (kk<100):
            print(i," ",len(res)," ",kk," ", dataset["img_id"][i])

57   98   98   8005
77   98   98   8948


### Spice & Meteor
#### (Hitting erorr in py3, need to run in py2...) tried switching the kernel to py2 and runnign here but T_EPOCHS is cleared off...
#### 1. Go to '/home/puneet/code/EvalMetrics_py2'
#### 2. Copy 'nEp_test_comments.csv' & 'nEp_test_feedbacks.csv'
#### 3. Rename as 'test_comments.csv' & 'test_feedbacks.csv'
#### 4. Run eval.py with 'py2' env
#### 5. Rename the generated 'scores.txt' file  

In [None]:
def quant_eval(model, optimizer, start_epoch, chpt_file):
    from pycocoevalcap.bleu.bleu import Bleu
    from pycocoevalcap.cider.cider import Cider
    from pycocoevalcap.meteor.meteor import Meteor
    from pycocoevalcap.rouge.rouge import Rouge
    from pycocoevalcap.spice.spice import Spice
    import os, json, csv
    
    model, optimizer, start_epoch = load_checkpt(model, optimizer, chpt_file)
    T_EPOCHS = start_epoch + EP_INT    
    

    #print('Download Stanford models... Run once!')
    os.system("sh get_stanford_models.sh")

    with open('temp/'+str(T_EPOCHS)+'Ep_test_comments.csv',"r") as f: 
            reader = csv.reader(f)
            gts = {rows[0]:rows[1:] for rows in reader}
            #print(mydict) #prints with single quotes
            #print (json.dumps(mydict)) #prints with double quotes

    with open('temp/'+str(T_EPOCHS)+'Ep_test_feedbacks.csv',"r") as g: 
            reader = csv.reader(g)
            res = {rows[0]:rows[1:] for rows in reader}
            #print(json.dumps(mydict))

    '''with open('temp/test_comments.json', 'r') as file:
        gts = json.load(file)
    with open('temp/test_feedbacks.json', 'r') as file:
        res = json.load(file)
    '''

    def bleu():
        scorer = Bleu(n=4)
        score, scores = scorer.compute_score(gts, res)
        return score


    def cider():
        scorer = Cider()
        (score, scores) = scorer.compute_score(gts, res)
        return score

    def rouge():
        scorer = Rouge()
        score, scores = scorer.compute_score(gts, res)
        return score

    #bgts = gts[0].encode(encoding='UTF-8')
    #bres = res[0].encode(encoding='UTF-8')

    def spice():
        scorer = Spice()
        #print(gts, res)
        score, scores = scorer.compute_score(gts, res)
        return score

    def meteor():
        scorer = Meteor()
        #print(gts, res)
        score, scores = scorer.compute_score(bgts, bres)
        return score    
    s_cider=cider()
    s_rouge=rouge()
    s_bleu=bleu()
    #s_spice=spice()#
    #s_meteor=meteor()#
    
    print('\n----------------------\nbleu = %s' %s_bleu )
    print('cider = %s' %s_cider )
    print('rouge = %s' %s_rouge )
    #print('spice = %s' %s_spice )
    #print('meteor = %s' %s_meteor )
    
    b=" ".join(str(x) for x in s_bleu)
    print('\n----------------------')
    f = open('scores.txt', 'w') 
    f.write("\ncider: %f" % s_cider)
    f.write("\nrouge: %f" % s_rouge)
    #f.write("\nspice: %f" % s_spice)
    #f.write("\nmeteor: %f" % s_meteor)
    f.write("\nbleu :")
    f.write(b)
    f.close()
    
    #print(str(T_EPOCHS))
    #Log with Tensorboard: Eval metrics
    log_writer.add_text(str(T_EPOCHS)+'Ep=>Metrics/cider', str(s_cider))
    log_writer.add_text(str(T_EPOCHS)+'Ep=>Metrics/rouge', str(s_rouge))
    #log_writer.add_text(str(T_EPOCHS)+'Ep=>Metrics/spice', str(s_spice))
    #log_writer.add_text(str(T_EPOCHS)+'Ep=>Metrics/meteor', str(s_meteor))
    log_writer.add_text(str(T_EPOCHS)+'Ep=>Metrics/bleu-1', str(s_bleu[0]))
    log_writer.add_text(str(T_EPOCHS)+'Ep=>Metrics/bleu-2', str(s_bleu[1]))
    log_writer.add_text(str(T_EPOCHS)+'Ep=>Metrics/bleu-3', str(s_bleu[2]))
    log_writer.add_text(str(T_EPOCHS)+'Ep=>Metrics/bleu-4', str(s_bleu[3]))

In [None]:
def qual_eval(model, optimizer, start_epoch, chpt_file):
    model, optimizer, start_epoch = load_checkpt(model, optimizer, chpt_file)
    T_EPOCHS = start_epoch + EP_INT
    
    # Save the predicted Feedbacks in CSV file
    # Log with Tensorboard: Text, Comment, Image and Feedback
    test_pred=[]
    #test_df=pd.read_csv("temp/test_data.csv")  
    test_df=pd.read_csv("temp/test_data1.csv") 
    path=r"visual_features_rcnn.csv"      
    length=test_df.shape[0]
    images = []

    #print(length)
    for i in range(length):
        src=test_df['src'][i]
        trg=test_df['trg'][i]
        img_path=test_df['img_path'][i]
        image_idx=test_df['img_id'][i]
        translation, attention = translate_sentence(src, SRC, TRG, image_idx, path, model, device)

        if not translation:
            translation="*empty*"

        #Untokenization    
        translation1=translation[0:(len(translation)-1)]    
        translation2 = TreebankWordDetokenizer().detokenize(translation1)
        test_pred.append(str(translation2))

        image = Image.open(img_path)
        image = ToTensor()(image)   
        #images.append(image)
        
        #print(str(T_EPOCHS))
        if (i%10==0): 
            #Log with Tensorboard: Text, Comment, Image and Feedback
            log_writer.add_text(str(T_EPOCHS)+'Ep=>Ground-truth Comment of Sample/'+str(i+1), str(trg))
            log_writer.add_text(str(T_EPOCHS)+'Ep=>News Text of Sample/'+str(i+1), str(src))#, i+1)
            log_writer.add_text(str(T_EPOCHS)+'Ep=>Predicted Feedback of Sample/'+str(i+1), str(translation))
            #log_writer.add_image('Image', image, i+1)
            #image_grid = torchvision.utils.make_grid(images)
            log_writer.add_image(str(T_EPOCHS)+'Ep:Image of Sample/'+str(i+1), image)        

    with open('temp/'+str(T_EPOCHS)+'Ep_test_results.csv', 'w'): 
        pass
    with open('temp/'+str(T_EPOCHS)+'Ep_test_comments.csv', 'w'): 
        pass
    with open('temp/'+str(T_EPOCHS)+'Ep_test_feedbacks.csv', 'w'): 
        pass

    test_df["pred"] = test_pred 
    test_df.to_csv('temp/'+str(T_EPOCHS)+'Ep_test_results.csv', index= False)
    test_df.to_csv('temp/'+str(T_EPOCHS)+'Ep_test_comments.csv', index= True, columns=["trg"]) #index -> "key": value -> ["trg/pred"]
    test_df.to_csv('temp/'+str(T_EPOCHS)+'Ep_test_feedbacks.csv', index= True, columns=["pred"])

    #Re-open and save with new column names
    df = pd.read_csv('temp/'+str(T_EPOCHS)+'Ep_test_comments.csv')
    df.columns = ['id', 'comment']
    df.to_csv('temp/'+str(T_EPOCHS)+'Ep_test_comments.csv', index= False)

    df = pd.read_csv('temp/'+str(T_EPOCHS)+'Ep_test_feedbacks.csv')
    df.columns = ['id', 'feedback']
    df.to_csv('temp/'+str(T_EPOCHS)+'Ep_test_feedbacks.csv', index= False)
    
    print('tensorboard --logdir "/home/puneet/code/Multimodal Feedback/TBlogs"\n---')

In [None]:
def interval_train(model, optimizer, start_epoch, chpt_file):
    model, optimizer, start_epoch = load_checkpt(model, optimizer, chpt_file)
    T_EPOCHS = start_epoch + EP_INT

    print('Already trained for',start_epoch, 'epochs. Training now for', EP_INT, 'more')

    best_valid_loss = float('inf')
    cur_best_train_loss = float('inf')

    #for epoch in range(EP_INT):
    for epoch in range(start_epoch, T_EPOCHS):    
        start_time = time.time()

        train_loss = train(model, train_iterator, optimizer, criterion, CLIP)#, log_writer_train)
        valid_loss = evaluate(model, valid_iterator, criterion)#, log_writer_val)

        #Log with Tensorboard: Loss & PPL for train & val 
        log_writer.add_scalar('Train/Loss',float(train_loss), epoch+1)
        log_writer.add_scalar('Train/PPL', float(math.exp(train_loss)), epoch+1)    
        log_writer.add_scalar('Val/Loss',float(valid_loss), epoch+1)
        log_writer.add_scalar('Val/PPL', float(math.exp(valid_loss)), epoch+1)

        end_time = time.time()

        epoch_mins, epoch_secs = epoch_time(start_time, end_time)

        #if valid_loss < best_valid_loss:
        #    best_valid_loss = valid_loss
        #if train_loss < cur_best_train_loss:
        #    cur_best_train_loss = train_loss
            #torch.save(model.state_dict(), '/home/puneet/code/Multimodal Feedback/checkpoints/baseline4.pt'
            #torch.save({
            #    'epoch': T_EPOCHS,
            #    'state_dict': model.state_dict(),
            #    'optimizer': optimizer.state_dict(),
            #    'loss': train_loss,
            #    }, '/home/puneet/code/Multimodal Feedback/checkpoints/baseline4.pt')

        state = {'epoch': T_EPOCHS, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), 'loss': train_loss}
        torch.save(state, chpt_file)

        print(f'\nEpoch: {epoch+1:02} | Time: {epoch_mins}m {epoch_secs}s')
        print(f'\tTrain Loss: {train_loss:.3f} | Train PPL: {math.exp(train_loss):7.3f}')
        print(f'\t Val. Loss: {valid_loss:.3f} |  Val. PPL: {math.exp(valid_loss):7.3f}')

    torch.cuda.empty_cache()

In [None]:
chpt_file = '/home/puneet/code/Multimodal Feedback/checkpoints/baseline4.pt'
model, optimizer, start_epoch = load_checkpt(model, optimizer, chpt_file)
T_EPOCHS = start_epoch + EP_INT
    
for i in range(EPOCHS):
    if (i%EP_INT==0):
        interval_train(model, optimizer, start_epoch, chpt_file)
        print('\nEvaluation\n----------------------')
        qual_eval(model, optimizer, start_epoch, chpt_file)
        quant_eval(model, optimizer, start_epoch, chpt_file)

=> Checkpoint NOT found '/home/puneet/code/Multimodal Feedback/checkpoints/baseline4.pt'
=> Checkpoint NOT found '/home/puneet/code/Multimodal Feedback/checkpoints/baseline4.pt'
Already trained for 0 epochs. Training now for 1 more

Epoch: 01 | Time: 8210m 4s
	Train Loss: 6.794 | Train PPL: 892.376
	 Val. Loss: 6.318 |  Val. PPL: 554.620

Evaluation
----------------------
=> loading checkpoint '/home/puneet/code/Multimodal Feedback/checkpoints/baseline4.pt'
=> loaded checkpoint '/home/puneet/code/Multimodal Feedback/checkpoints/baseline4.pt' (epoch 1)
tensorboard --logdir "/home/puneet/code/Multimodal Feedback/TBlogs"
---
=> loading checkpoint '/home/puneet/code/Multimodal Feedback/checkpoints/baseline4.pt'
=> loaded checkpoint '/home/puneet/code/Multimodal Feedback/checkpoints/baseline4.pt' (epoch 1)
{'testlen': 416, 'reflen': 1418, 'guess': [416, 332, 249, 166], 'correct': [40, 2, 0, 0]}
ratio: 0.2933709449927409

----------------------
bleu = [0.008647719345295986, 0.002164533003836