In [None]:
!pip3 install torch

In [None]:
import pickle
import json
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import gensim
import gensim.downloader as api
import torchtext
import torch
from torch import nn

In [None]:
path = '../input/minivqaiust/'

In [None]:
with open(path + "image_features.pickle", 'rb') as f:
    img = pickle.load(f)
with open(path + "image_question.json") as json_file:
      img_q = json.load(json_file)

In [None]:
df = pd.read_csv(path + "train.csv")
q_train_idx = list(df['question_id'])
label_train = list(df['label'])
df = pd.read_csv(path + "val.csv")
q_val_idx = list(df['question_id'])
label_val = list(df['label'])
df = pd.read_csv(path + "test.csv")
q_test_idx = list(df['question_id'])

In [None]:
questions_train = []
image_features_train = []
all_qs = {}

#change format for better performing
for idx, imq in img_q.items():
  for ques in imq:
    all_qs[ques[0]] = {'question':ques[1], 'image_id': str(idx)}
all_qs[131087000]

In [None]:
for idx in q_train_idx:
  questions_train.append(all_qs[idx]['question'])
  image_features_train.append(img[all_qs[idx]['image_id']])

In [None]:
questions_val = []
image_features_val = []

for idx in q_val_idx:
  questions_val.append(all_qs[idx]['question'])
  image_features_val.append(img[all_qs[idx]['image_id']])

In [None]:
questions_test = []
image_features_test = []

for idx in q_test_idx:
  questions_test.append(all_qs[idx]['question'])
  image_features_test.append(img[all_qs[idx]['image_id']])

In [None]:
class TextEmbedding(nn.Module):
    def __init__(self, vocab_size, embed_dim):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim)

    def forward(self, q):
        x = self.embedding(q)
        return x

In [None]:
pre_model = api.load('word2vec-google-news-300')

In [None]:
tokenizer = torchtext.data.utils.get_tokenizer('basic_english')

In [None]:
vocab = list(pre_model.vocab.keys())
embed_size = len(pre_model.get_vector('me'))

In [None]:
process_text = TextEmbedding(
    vocab_size = len(vocab) + 1, 
    embed_dim = embed_size
)

In [None]:
word_dict = {}
for idx , word in enumerate(vocab):
  word_dict[word] = idx
word_dict['me']

In [None]:
def encode(seq):
  code = []
  for tok in tokenizer(seq):
    try:
      code.append(word_dict[tok])
    except:
      code.append(len(vocab))
  return code

In [None]:
def padify(b):
  v = [encode(x) for x in b]
  l = max(map(len,v))
  return torch.stack([torch.nn.functional.pad(torch.tensor(t),(0,l-len(t)),mode='constant',value=0) for t in v])

In [None]:
train_question_pad = padify(questions_train)
val_question_pad = padify(questions_val)

In [None]:
test_question_pad = padify(questions_test)

In [None]:
with torch.no_grad():
  question_features_train = process_text(train_question_pad)
with torch.no_grad():
  question_features_val = process_text(val_question_pad)

In [None]:
with torch.no_grad():
  question_features_test = process_text(test_question_pad)

In [None]:
train_dataset = torch.utils.data.TensorDataset(question_features_train, torch.tensor(image_features_train), torch.tensor(label_train))
train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)

In [None]:
val_dataset = torch.utils.data.TensorDataset(question_features_val, torch.tensor(image_features_val), torch.tensor(label_val))
val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=32, shuffle=True)

In [None]:
class VQA(nn.Module):
    def __init__(self, features_size):
        super(type(self), self).__init__()
        self.lstm = nn.LSTM(300, 512, num_layers=2, dropout=0.1, batch_first=True)
        self.linear = nn.Sequential(
            nn.BatchNorm1d(features_size),
            nn.Linear(features_size, 512),
            nn.ReLU(),
            nn.BatchNorm1d(512),         
            nn.Linear(512, 128),
            nn.ReLU(),
            nn.BatchNorm1d(128),
            nn.Linear(128, 10),
            nn.BatchNorm1d(10),
        )
      
      
            
        
    def forward(self, text, image):
        text_feature = self.lstm(text)[0]
        text_feature = torch.mean(text_feature, 1)
        x = torch.cat([text_feature, image], dim=1)
        logits = self.linear(x)
        logits = nn.functional.softmax(logits, dim=1)
        return logits

In [None]:
final_model = VQA(1024)

In [None]:
learning_rate = 0.3
epochs = 30
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(final_model.parameters(), lr=learning_rate)
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9)

In [None]:
def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    for batch, (text, image, y) in enumerate(dataloader):        
        # Compute prediction and loss
        pred = model(text, image)
        #print('hello')
        loss = loss_fn(pred, y)
        
        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        #print(batch)

        if batch % 2 == 0:
            loss, current = loss.item(), batch * len(text)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

def test_loop(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    test_loss, correct = 0, 0

    with torch.no_grad():
        for text, image, y in dataloader:
            pred = model(text, image)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
            
    test_loss /= size
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [None]:
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(train_dataloader, final_model, loss_fn, optimizer)
    test_loop(val_dataloader, final_model, loss_fn)
    scheduler.step()
print("Done!")

In [None]:
y = final_model(question_features_test, torch.tensor(image_features_test))

In [None]:
results = [int(out.argmax(0).numpy()) for out in y]
labeldict = {}
labeldict['question_id'] = q_test_idx
labeldict['label'] = []
for idx, out in enumerate(results):
  labeldict['label'].append(int(out))

In [None]:
dfl = pd.DataFrame(labeldict)

In [None]:
dfl

In [None]:
dfl.to_csv(path + 'testvqa1_last.csv', index=False)