In [1]:
! pip install pytorch-nlp



In [0]:
import torch
import numpy as np
import torch.nn as nn
import torchtext

from torchtext.data import TabularDataset, Field, Iterator
from torchtext.data.utils import get_tokenizer
from torchtext.vocab import Vocab
from torch import optim 
from torch.optim import lr_scheduler
import matplotlib.pyplot as plt
from torch.nn import functional as F
from torchnlp.nn import Attention

import copy
import time
from collections import namedtuple
from tqdm import tqdm, tqdm_notebook

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
%cd drive/"My Drive"/NN4NLP/topicclass

/content/drive/My Drive/NN4NLP/topicclass


In [6]:
! ls

bestMobile_v2_NetModel.pt
Classifier.ipynb
ConvNetClassificationTesting_10.txt
ConvNetClassificationTesting_11.txt
ConvNetClassificationTesting_12.txt
ConvNetClassificationTesting_13.txt
ConvNetClassificationTesting_2.txt
ConvNetClassificationTesting_3.txt
ConvNetClassificationTesting_4.txt
ConvNetClassificationTesting_5.txt
ConvNetClassificationTesting_6.txt
ConvNetClassificationTesting_7.txt
ConvNetClassificationTesting_8.txt
ConvNetClassificationTesting_9.txt
ConvNetClassificationTesting_real_10.txt
ConvNetClassificationTesting_real_11.txt
ConvNetClassificationTesting_real_12_redo.txt
ConvNetClassificationTesting_real_12.txt
ConvNetClassificationTesting_real_13_redo.txt
ConvNetClassificationTesting_real_13.txt
ConvNetClassificationTesting_real_14_redo.txt
ConvNetClassificationTesting_real_14.txt
ConvNetClassificationTesting_real_15_redo.txt
ConvNetClassificationTesting_real_15.txt
ConvNetClassificationTesting_real_16_redo.txt
ConvNetClassificationTesting_real_16.txt
ConvNetClassific

In [0]:
# ! wget "https://dl.fbaipublicfiles.com/fasttext/vectors-english/crawl-300d-2M.vec.zip"

In [0]:
#! wget "https://dl.fbaipublicfiles.com/fasttext/vectors-english/crawl-300d-2M-subword.zip"

In [0]:
# ! unzip crawl-300d-2M-subword.zip

In [0]:
# ! unzip crawl-300d-2M.vec.zip

In [0]:
int2Label = \
{0: 'Miscellaneous',
 1: 'Video games',
 2: 'Language and literature',
 3: 'Music',
 4: 'Social sciences and society',
 5: 'Sports and recreation',
 6: 'Natural sciences',
 7: 'Art and architecture',
 8: 'History',
 9: 'Warfare',
 10: 'Engineering and technology',
 11: 'Philosophy and religion',
 12: 'Agriculture, food and drink',
 13: 'Geography and places',
 14: 'Mathematics',
 15: 'Media and drama'}

In [0]:
np.random.seed(11747)
LOWER = False
LEARNING_RATE = 3e-4
N_CLASS = len(int2Label.keys())
MODELNO = "ensemble_out"
LOG_FILE = "ConvNetClassificationTesting_" + str(MODELNO) + ".txt"
VERBOSE_LOG_FILE = "VerboseConvNetClassificationTesting_" + str(MODELNO) + ".txt"
STEP_SIZE = 5
GAMMA = 1.0
NUMBER_EPOCHS = 10
EMBEDDING_DIM = 300

In [0]:
f = open(LOG_FILE,"w+")
v = open(VERBOSE_LOG_FILE, "w+")

In [0]:
tokenizer = get_tokenizer("spacy")

TEXT = Field(sequential=True, tokenize=tokenizer, lower=LOWER, batch_first=True )

LABEL = Field(sequential=False, use_vocab=False, batch_first=True)

In [0]:
train, val, test = TabularDataset.splits(".", 
                                            train = "topicclass_train.csv", 
                                            validation = "topicclass_valid_fixed.csv", 
                                            test = "topicclass_test.csv", 
                                            format = "csv", 
                                            skip_header = True,
                                            fields = [('label', LABEL), ('text', TEXT)])

In [0]:
TEXT.build_vocab(train, val, test)

In [0]:
embedding_mtx = torch.load("embedding_mtx.pt")

In [0]:
# defined_vocab = set()
# num_lines = 2000000

# with open("crawl-300d-2M.vec") as vec_file: 
#   for i, line in enumerate(tqdm_notebook(vec_file, total=num_lines)): 
#     if i == 0: 
#       pass
#     else: 
#       word, vector = line.split(" ", 1)
#       if word in TEXT.vocab.stoi: 
#         embedding_mtx[TEXT.vocab.stoi.get(word)] =  torch.from_numpy(np.fromstring(vector, sep = " "))
#         defined_vocab.add(word)



In [12]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cuda', index=0)

In [13]:
print("device is: {}".format(device))
f.write("device is: {}\n".format(device))
v.write("device is: {}\n".format(device))

device is: cuda:0


18

In [0]:
train_loader = Iterator.splits(   train, 
                                  batch_size = 64, 
                                  shuffle = True, 
                                  sort_key = lambda x: len(x.text), 
                                  device = device, 
                                  )

val_loader = Iterator(val, shuffle= False, batch_size=64, device = device )
test_loader = Iterator(test, shuffle= False, batch_size=64, device = device)

In [0]:
dataloaders = {'train': train_loader, 'val': val_loader}
dataset_sizes = {'train': len(train), 'val': len(val)}

Metric = namedtuple('Metric', ['loss', 'train_error', 'val_error'])

In [0]:
class ConvClassifier(nn.Module): 
    def __init__(self, embeddings, n_class, channels_first = 64, channels_second = 64, kernels = (2,3,4,5), dropout = 0.1): 
        super().__init__()
        self.vocab_size, self.embedding_size = embeddings.shape
        self.embedding = nn.Embedding(self.vocab_size, self.embedding_size, sparse = False)
        self.embedding.weight.data.copy_(embeddings)
        
        # self.embedding_delta = nn.Embedding(self.vocab_size, self.embedding_size, sparse = False)
        
        self.conv1 = nn.Conv1d(in_channels=self.embedding_size, out_channels=channels_first//len(kernels), dilation = 1, kernel_size = kernels[0])
        self.bn1 = nn.BatchNorm1d(channels_first)
        
        self.conv2 = nn.Conv1d(in_channels=self.embedding_size, out_channels=channels_first//len(kernels), dilation = 1, kernel_size = kernels[1])
        self.bn2 = nn.BatchNorm1d(channels_first)

        self.conv3 = nn.Conv1d(in_channels=self.embedding_size, out_channels=channels_first//len(kernels), dilation = 1, kernel_size = kernels[2])
        self.bn3 = nn.BatchNorm1d(channels_first)

        self.conv4 = nn.Conv1d(in_channels=self.embedding_size, out_channels=channels_first//len(kernels), dilation = 1, kernel_size = kernels[3])
        self.bn4 = nn.BatchNorm1d(channels_first)
    
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(dropout)
        self.fc = nn.Linear(channels_first, n_class)
        
    def forward(self, texts): 
        with torch.no_grad(): 
            embeddings = self.embedding(texts)
        #embeddings = static_embeddings
        #embeddings = static_embeddings + self.embedding_delta(texts)
        conv1_out = self.conv1(F.pad(embeddings.transpose(1,2), (1,0)))
        conv2_out = self.conv2(F.pad(embeddings.transpose(1,2), (1,1)))
        conv3_out = self.conv3(F.pad(embeddings.transpose(1,2), (2,1)))
        conv4_out = self.conv4(F.pad(embeddings.transpose(1,2), (2,2,)))

        conv_out = torch.cat((conv1_out, conv2_out, conv3_out, conv4_out), dim = 1)
        conv_out = self.relu(conv_out)
      
        # if conv1_out.shape == conv2_out.shape: 
        #     conv2_out += conv1_out
        
        pool = nn.MaxPool1d(kernel_size = conv_out.shape[2])
#         print(f"conv2 shape is {conv2_out.shape}")
        
        pooled_out = pool(conv_out).squeeze(2)
        #print(f"embeddings shape {embeddings.shape} and conv out is {conv_out.shape} and pooled_out is {pooled_out.shape}")
        
#         print(f"pooled out shape is {pooled_out.shape}")
        pooled_out = self.dropout(pooled_out)
        out = self.fc(pooled_out)
        
        return out
        
        
        
class LSTMClassifier(nn.Module): 
    def __init__(self, embeddings, n_class, hidden_size = 256, num_layers = 2, dropout = 0.1): 
        super().__init__()
        self.n_layers = num_layers
        self.hidden_size = hidden_size
        self.vocab_size, self.embedding_size = embeddings.shape
        self.embedding = nn.Embedding(self.vocab_size, self.embedding_size, sparse = False)
        self.embedding.weight.data.copy_(embeddings)

        self.rnn = nn.LSTM(input_size = self.embedding_size, hidden_size = hidden_size, num_layers = num_layers, batch_first = True, bidirectional = True)
        
        # self.conv1 = nn.Conv1d(in_channels=self.embedding_size, out_channels=channels_first//len(kernels), dilation = 1, kernel_size = kernels[0])
        # self.bn1 = nn.BatchNorm1d(channels_first)
        
        # self.conv2 = nn.Conv1d(in_channels=self.embedding_size, out_channels=channels_first//len(kernels), dilation = 1, kernel_size = kernels[1])
        # self.bn2 = nn.BatchNorm1d(channels_first)

        # self.conv3 = nn.Conv1d(in_channels=self.embedding_size, out_channels=channels_first//len(kernels), dilation = 1, kernel_size = kernels[2])
        # self.bn3 = nn.BatchNorm1d(channels_first)

        # self.conv4 = nn.Conv1d(in_channels=self.embedding_size, out_channels=channels_first//len(kernels), dilation = 1, kernel_size = kernels[3])
        # self.bn4 = nn.BatchNorm1d(channels_first)
        
    
        self.relu = nn.ReLU()
        self.tanh = nn.Tanh()
        self.dropout = nn.Dropout(dropout)
        self.fc = nn.Linear(hidden_size*2, n_class)
        
    def forward(self, texts): 
        with torch.no_grad(): 
            embeddings = self.embedding(texts)
        #embeddings = static_embeddings
        #embeddings = static_embeddings + self.embedding_delta(texts)
        # conv1_out = self.conv1(F.pad(embeddings.transpose(1,2), (1,0)))
        # conv2_out = self.conv2(F.pad(embeddings.transpose(1,2), (1,1)))
        # conv3_out = self.conv3(F.pad(embeddings.transpose(1,2), (2,1)))
        # conv4_out = self.conv4(F.pad(embeddings.transpose(1,2), (2,2)))
        # conv5_out = self.conv5(F.pad(embeddings.transpose(1,2), (3,2)))
        # conv6_out = self.conv6(F.pad(embeddings.transpose(1,2), (3,3)))

        # conv_out = torch.cat((conv1_out, conv2_out, conv3_out, conv4_out, conv5_out, conv6_out), dim = 1)
        # conv_out = self.relu(conv_out)
      
        # if conv1_out.shape == conv2_out.shape: 
        #     conv2_out += conv1_out
        
        outputs, hiddens = self.rnn(embeddings)
        # hiddens[0] -> (num_layers * num_directions, batch, hidden_size)
        h_n, _ = hiddens
        #print(h_n.shape)
        #print(h_n.transpose(0,1).shape)
        
        # num_layers * num_directions, batch, hidden_size
        #hidden_out = h_n.view(embeddings.shape[0], self.n_layers, 2, self.hidden_size)[:, self.n_layers-1].view(embeddings.shape[0], 2*self.hidden_size)
        
        hidden_out = h_n.reshape(self.n_layers, 2, -1, self.hidden_size)
        #print(f"hidden_out shape right after reshape is {hidden_out.shape}")
        hidden_out = hidden_out[self.n_layers-1]
        #print(f"hidden_out after indexing is {hidden_out.shape}")
        hidden_out = hidden_out.transpose(0,1).reshape(-1, 2*self.hidden_size)
        
        #hidden_out = h_n.transpose(0,1).reshape(-1, self.hidden_size * 2)
        #print(f"hidden_out shape is {hidden_out.shape}, h_n is {h_n.shape}")

#         pool = nn.MaxPool1d(kernel_size = conv_out.shape[2])
# #         print(f"conv2 shape is {conv2_out.shape}")
        
#         pooled_out = pool(conv_out).squeeze(2)
        #print(f"embeddings shape {embeddings.shape} and conv out is {conv_out.shape} and pooled_out is {pooled_out.shape}")
        
#         print(f"pooled out shape is {pooled_out.shape}")

        hidden_out = self.dropout(hidden_out)

        out = self.fc(hidden_out)
        
        return out
        
        
        
        
        
class AttnClassifier(nn.Module): 
    def __init__(self, embeddings, n_class, channels_first = 64, channels_second = 64, kernels = (2,3,4,5), dropout = 0.1): 
        super().__init__()
        self.vocab_size, self.embedding_size = embeddings.shape
        self.embedding = nn.Embedding(self.vocab_size, self.embedding_size, sparse = False)
        self.embedding.weight.data.copy_(embeddings)
        
        # self.embedding_delta = nn.Embedding(self.vocab_size, self.embedding_size, sparse = False)
        
        # self.conv1 = nn.Conv1d(in_channels=self.embedding_size, out_channels=channels_first//len(kernels), dilation = 1, kernel_size = kernels[0])
        # self.bn1 = nn.BatchNorm1d(channels_first)
        
        # self.conv2 = nn.Conv1d(in_channels=self.embedding_size, out_channels=channels_first//len(kernels), dilation = 1, kernel_size = kernels[1])
        # self.bn2 = nn.BatchNorm1d(channels_first)

        # self.conv3 = nn.Conv1d(in_channels=self.embedding_size, out_channels=channels_first//len(kernels), dilation = 1, kernel_size = kernels[2])
        # self.bn3 = nn.BatchNorm1d(channels_first)

        # self.conv4 = nn.Conv1d(in_channels=self.embedding_size, out_channels=channels_first//len(kernels), dilation = 1, kernel_size = kernels[3])
        # self.bn4 = nn.BatchNorm1d(channels_first)

        self.self_attention1 = Attention(self.embedding_size)
        self.self_attention2 = Attention(self.embedding_size)
    
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(dropout)
        self.fc = nn.Linear(self.embedding_size, n_class)
        
    def forward(self, texts): 
        with torch.no_grad(): 
            embeddings = self.embedding(texts)
        #embeddings = static_embeddings
        #embeddings = static_embeddings + self.embedding_delta(texts)

        attn_out = self.self_attention1(embeddings, embeddings)[0]
        attn_out = self.self_attention2(attn_out, attn_out)[0] + attn_out



        # conv1_out = self.conv1(F.pad(embeddings.transpose(1,2), (1,0)))
        # conv2_out = self.conv2(F.pad(embeddings.transpose(1,2), (1,1)))
        # conv3_out = self.conv3(F.pad(embeddings.transpose(1,2), (2,1)))
        # conv4_out = self.conv4(F.pad(embeddings.transpose(1,2), (2,2,)))

        # conv_out = torch.cat((conv1_out, conv2_out, conv3_out, conv4_out), dim = 1)
        # conv_out = self.relu(conv_out)
      
        # if conv1_out.shape == conv2_out.shape: 
        #     conv2_out += conv1_out
        
        pool = nn.MaxPool1d(kernel_size = attn_out.shape[1])
#         print(f"conv2 shape is {conv2_out.shape}")
        
        pooled_out = pool(attn_out.transpose(1,2)).squeeze(2)
        # print(f"embeddings shape {embeddings.shape} and attn out is {attn_out.shape} and pooled_out is {pooled_out.shape}")
        
#         print(f"pooled out shape is {pooled_out.shape}")
        pooled_out = self.dropout(pooled_out)
        out = self.fc(pooled_out)
        
        return out
        
        
        
        
        

In [16]:
model1 = torch.load("Model_real_22.pt", map_location=torch.device('cpu'))

model2 = torch.load("Model_real_29.pt", map_location=torch.device('cpu'))

model3 = torch.load("Model_real_31.pt", map_location=torch.device('cpu'))



In [40]:
# pip install torch==1.2.0

Collecting torch==1.2.0
[?25l  Downloading https://files.pythonhosted.org/packages/30/57/d5cceb0799c06733eefce80c395459f28970ebb9e896846ce96ab579a3f1/torch-1.2.0-cp36-cp36m-manylinux1_x86_64.whl (748.8MB)
[K     |████████████████████████████████| 748.9MB 22kB/s 
[31mERROR: torchvision 0.5.0 has requirement torch==1.4.0, but you'll have torch 1.2.0 which is incompatible.[0m
Installing collected packages: torch
  Found existing installation: torch 1.4.0
    Uninstalling torch-1.4.0:
      Successfully uninstalled torch-1.4.0
Successfully installed torch-1.2.0


In [17]:

model1.to(device)
model2.to(device)
model3.to(device)
model1.eval()
model2.eval()
model3.eval()

AttnClassifier(
  (embedding): Embedding(137968, 300)
  (self_attention1): Attention(
    (linear_in): Linear(in_features=300, out_features=300, bias=False)
    (linear_out): Linear(in_features=600, out_features=300, bias=False)
    (softmax): Softmax(dim=-1)
    (tanh): Tanh()
  )
  (self_attention2): Attention(
    (linear_in): Linear(in_features=300, out_features=300, bias=False)
    (linear_out): Linear(in_features=600, out_features=300, bias=False)
    (softmax): Softmax(dim=-1)
    (tanh): Tanh()
  )
  (relu): ReLU()
  (dropout): Dropout(p=0.5, inplace=False)
  (fc): Linear(in_features=300, out_features=16, bias=True)
)

In [18]:
class ConvClassifier(nn.Module): 
    def __init__(self, embeddings, n_class, channels_first = 128, channels_second = 128, kernel_first = 2, kernel_second = 2): 
        super().__init__()
        self.vocab_size, self.embedding_size = embeddings.shape
        self.embedding = nn.Embedding(self.vocab_size, self.embedding_size, sparse = False)
        self.embedding.weight.data.copy_(embeddings)
        
        #self.embedding_delta = nn.Embedding(self.vocab_size, self.embedding_size, sparse = False)
        
        self.conv1 = nn.Conv1d(in_channels=self.embedding_size, out_channels=channels_first, dilation = 1, kernel_size = kernel_first, padding = kernel_first - 1)
        self.bn1 = nn.BatchNorm1d(channels_first)
        
        self.conv2 = nn.Conv1d(in_channels=channels_first, out_channels=channels_second, dilation = 1, kernel_size = kernel_second, padding = kernel_second - 1)
        self.bn2 = nn.BatchNorm1d(channels_second)
    
        self.relu = nn.ReLU()

        self.dropout = nn.Dropout(0.2)
        
        self.fc = nn.Linear(channels_first, n_class)
        
    def forward(self, texts): 
        with torch.no_grad(): 
            static_embeddings = self.embedding(texts)
        embeddings = static_embeddings
        #embeddings = static_embeddings + self.embedding_delta(texts)
        conv1_out = self.conv1(embeddings.transpose(1,2))
        conv1_out = self.bn1(conv1_out)
        conv1_out = self.relu(conv1_out)
        
        # conv2_out = self.conv2(conv1_out)
        # conv2_out = self.bn2(conv2_out)
        # conv2_out = self.relu(conv2_out) 
        
        # if conv1_out.shape == conv2_out.shape: 
        #     conv2_out += conv1_out
        
        pool = nn.MaxPool1d(kernel_size = conv1_out.shape[2])
#         print(f"conv2 shape is {conv2_out.shape}")
        
        pooled_out = pool(conv1_out).squeeze(2)
        
#         print(f"pooled out shape is {pooled_out.shape}")

        #pooled_out = self.dropout(pooled_out)
        
        out = self.fc(pooled_out)
        
        return out
        
model4 = torch.load("Model_real_2.pt", map_location=torch.device('cpu'))
model4.to(device)       
model4.eval()

class ConvClassifier(nn.Module): 
    def __init__(self, embeddings, n_class, channels_first = 64, channels_second = 64, kernels = (2,3,4,5,6,7), dropout = 0.1): 
        super().__init__()
        self.vocab_size, self.embedding_size = embeddings.shape
        self.embedding = nn.Embedding(self.vocab_size, self.embedding_size, sparse = False)
        self.embedding.weight.data.copy_(embeddings)
        
        # self.embedding_delta = nn.Embedding(self.vocab_size, self.embedding_size, sparse = False)
        
        self.conv1 = nn.Conv1d(in_channels=self.embedding_size, out_channels=channels_first//len(kernels), dilation = 1, kernel_size = kernels[0])
        self.bn1 = nn.BatchNorm1d(channels_first)
        
        self.conv2 = nn.Conv1d(in_channels=self.embedding_size, out_channels=channels_first//len(kernels), dilation = 1, kernel_size = kernels[1])
        self.bn2 = nn.BatchNorm1d(channels_first)

        self.conv3 = nn.Conv1d(in_channels=self.embedding_size, out_channels=channels_first//len(kernels), dilation = 1, kernel_size = kernels[2])
        self.bn3 = nn.BatchNorm1d(channels_first)

        self.conv4 = nn.Conv1d(in_channels=self.embedding_size, out_channels=channels_first//len(kernels), dilation = 1, kernel_size = kernels[3])
        self.bn4 = nn.BatchNorm1d(channels_first)

        self.conv5 = nn.Conv1d(in_channels=self.embedding_size, out_channels=channels_first//len(kernels), dilation = 1, kernel_size = kernels[4])
        self.bn4 = nn.BatchNorm1d(channels_first)

        self.conv6 = nn.Conv1d(in_channels=self.embedding_size, out_channels=channels_first//len(kernels), dilation = 1, kernel_size = kernels[5])
        self.bn4 = nn.BatchNorm1d(channels_first)
    
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(dropout)
        self.fc = nn.Linear(channels_first, n_class)
        
    def forward(self, texts): 
        with torch.no_grad(): 
            embeddings = self.embedding(texts)
        #embeddings = static_embeddings
        #embeddings = static_embeddings + self.embedding_delta(texts)
        conv1_out = self.conv1(F.pad(embeddings.transpose(1,2), (1,0)))
        conv2_out = self.conv2(F.pad(embeddings.transpose(1,2), (1,1)))
        conv3_out = self.conv3(F.pad(embeddings.transpose(1,2), (2,1)))
        conv4_out = self.conv4(F.pad(embeddings.transpose(1,2), (2,2)))
        conv5_out = self.conv5(F.pad(embeddings.transpose(1,2), (3,2)))
        conv6_out = self.conv6(F.pad(embeddings.transpose(1,2), (3,3)))

        conv_out = torch.cat((conv1_out, conv2_out, conv3_out, conv4_out, conv5_out, conv6_out), dim = 1)
        conv_out = self.relu(conv_out)
      
        # if conv1_out.shape == conv2_out.shape: 
        #     conv2_out += conv1_out
        
        pool = nn.MaxPool1d(kernel_size = conv_out.shape[2])
#         print(f"conv2 shape is {conv2_out.shape}")
        
        pooled_out = pool(conv_out).squeeze(2)
        #print(f"embeddings shape {embeddings.shape} and conv out is {conv_out.shape} and pooled_out is {pooled_out.shape}")
        
#         print(f"pooled out shape is {pooled_out.shape}")
        pooled_out = self.dropout(pooled_out)
        out = self.fc(pooled_out)
        
        return out
        
        
model5 = torch.load("Model_real_24.pt", map_location=torch.device('cpu'))
model5.to(device)       
model5.eval()
        


class LSTMClassifier(nn.Module): 
    def __init__(self, embeddings, n_class, hidden_size = 256, num_layers = 2, dropout = 0.1): 
        super().__init__()
        self.n_layers = num_layers
        self.hidden_size = hidden_size
        self.vocab_size, self.embedding_size = embeddings.shape
        self.embedding = nn.Embedding(self.vocab_size, self.embedding_size, sparse = False)
        self.embedding.weight.data.copy_(embeddings)

        self.rnn = nn.LSTM(input_size = self.embedding_size, hidden_size = hidden_size, num_layers = num_layers, batch_first = True, bidirectional = True)
        
        # self.conv1 = nn.Conv1d(in_channels=self.embedding_size, out_channels=channels_first//len(kernels), dilation = 1, kernel_size = kernels[0])
        # self.bn1 = nn.BatchNorm1d(channels_first)
        
        # self.conv2 = nn.Conv1d(in_channels=self.embedding_size, out_channels=channels_first//len(kernels), dilation = 1, kernel_size = kernels[1])
        # self.bn2 = nn.BatchNorm1d(channels_first)

        # self.conv3 = nn.Conv1d(in_channels=self.embedding_size, out_channels=channels_first//len(kernels), dilation = 1, kernel_size = kernels[2])
        # self.bn3 = nn.BatchNorm1d(channels_first)

        # self.conv4 = nn.Conv1d(in_channels=self.embedding_size, out_channels=channels_first//len(kernels), dilation = 1, kernel_size = kernels[3])
        # self.bn4 = nn.BatchNorm1d(channels_first)
        
    
        self.relu = nn.ReLU()
        self.tanh = nn.Tanh()
        self.dropout = nn.Dropout(dropout)
        self.fc = nn.Linear(hidden_size*2, n_class)
        
    def forward(self, texts): 
        with torch.no_grad(): 
            embeddings = self.embedding(texts)
        #embeddings = static_embeddings
        #embeddings = static_embeddings + self.embedding_delta(texts)
        # conv1_out = self.conv1(F.pad(embeddings.transpose(1,2), (1,0)))
        # conv2_out = self.conv2(F.pad(embeddings.transpose(1,2), (1,1)))
        # conv3_out = self.conv3(F.pad(embeddings.transpose(1,2), (2,1)))
        # conv4_out = self.conv4(F.pad(embeddings.transpose(1,2), (2,2)))
        # conv5_out = self.conv5(F.pad(embeddings.transpose(1,2), (3,2)))
        # conv6_out = self.conv6(F.pad(embeddings.transpose(1,2), (3,3)))

        # conv_out = torch.cat((conv1_out, conv2_out, conv3_out, conv4_out, conv5_out, conv6_out), dim = 1)
        # conv_out = self.relu(conv_out)
      
        # if conv1_out.shape == conv2_out.shape: 
        #     conv2_out += conv1_out
        
        outputs, hiddens = self.rnn(embeddings)
        # hiddens[0] -> (num_layers * num_directions, batch, hidden_size)
        h_n, _ = hiddens
        #print(h_n.shape)
        #print(h_n.transpose(0,1).shape)
        
        # num_layers * num_directions, batch, hidden_size
        #hidden_out = h_n.view(embeddings.shape[0], self.n_layers, 2, self.hidden_size)[:, self.n_layers-1].view(embeddings.shape[0], 2*self.hidden_size)
        
        hidden_out = h_n.reshape(self.n_layers, 2, -1, self.hidden_size)
        #print(f"hidden_out shape right after reshape is {hidden_out.shape}")
        hidden_out = hidden_out[self.n_layers-1]
        #print(f"hidden_out after indexing is {hidden_out.shape}")
        hidden_out = hidden_out.transpose(0,1).reshape(-1, 2*self.hidden_size)
        
        #hidden_out = h_n.transpose(0,1).reshape(-1, self.hidden_size * 2)
        #print(f"hidden_out shape is {hidden_out.shape}, h_n is {h_n.shape}")

#         pool = nn.MaxPool1d(kernel_size = conv_out.shape[2])
# #         print(f"conv2 shape is {conv2_out.shape}")
        
#         pooled_out = pool(conv_out).squeeze(2)
        #print(f"embeddings shape {embeddings.shape} and conv out is {conv_out.shape} and pooled_out is {pooled_out.shape}")
        
#         print(f"pooled out shape is {pooled_out.shape}")

        hidden_out = self.dropout(hidden_out)

        out = self.fc(hidden_out)
        
        return out
        
model6 = torch.load("Model_real_28.pt", map_location=torch.device('cpu'))
model6.to(device)       
model6.eval()      

class LSTMClassifier(nn.Module): 
    def __init__(self, embeddings, n_class, hidden_size = 256, num_layers = 2, dropout = 0.1): 
        super().__init__()
        self.n_layers = num_layers
        self.hidden_size = hidden_size
        self.vocab_size, self.embedding_size = embeddings.shape
        self.embedding = nn.Embedding(self.vocab_size, self.embedding_size, sparse = False)
        self.embedding.weight.data.copy_(embeddings)

        self.rnn = nn.LSTM(input_size = self.embedding_size, hidden_size = hidden_size, num_layers = num_layers, batch_first = True, bidirectional = True)
        
        # self.conv1 = nn.Conv1d(in_channels=self.embedding_size, out_channels=channels_first//len(kernels), dilation = 1, kernel_size = kernels[0])
        # self.bn1 = nn.BatchNorm1d(channels_first)
        
        # self.conv2 = nn.Conv1d(in_channels=self.embedding_size, out_channels=channels_first//len(kernels), dilation = 1, kernel_size = kernels[1])
        # self.bn2 = nn.BatchNorm1d(channels_first)

        # self.conv3 = nn.Conv1d(in_channels=self.embedding_size, out_channels=channels_first//len(kernels), dilation = 1, kernel_size = kernels[2])
        # self.bn3 = nn.BatchNorm1d(channels_first)

        # self.conv4 = nn.Conv1d(in_channels=self.embedding_size, out_channels=channels_first//len(kernels), dilation = 1, kernel_size = kernels[3])
        # self.bn4 = nn.BatchNorm1d(channels_first)
        
    
        self.relu = nn.ReLU()
        self.tanh = nn.Tanh()
        self.dropout = nn.Dropout(dropout)
        self.fc = nn.Linear(hidden_size*2, n_class)
        
    def forward(self, texts): 
        with torch.no_grad(): 
            embeddings = self.embedding(texts)
        #embeddings = static_embeddings
        #embeddings = static_embeddings + self.embedding_delta(texts)
        # conv1_out = self.conv1(F.pad(embeddings.transpose(1,2), (1,0)))
        # conv2_out = self.conv2(F.pad(embeddings.transpose(1,2), (1,1)))
        # conv3_out = self.conv3(F.pad(embeddings.transpose(1,2), (2,1)))
        # conv4_out = self.conv4(F.pad(embeddings.transpose(1,2), (2,2)))
        # conv5_out = self.conv5(F.pad(embeddings.transpose(1,2), (3,2)))
        # conv6_out = self.conv6(F.pad(embeddings.transpose(1,2), (3,3)))

        # conv_out = torch.cat((conv1_out, conv2_out, conv3_out, conv4_out, conv5_out, conv6_out), dim = 1)
        # conv_out = self.relu(conv_out)
      
        # if conv1_out.shape == conv2_out.shape: 
        #     conv2_out += conv1_out
        
        outputs, hiddens = self.rnn(embeddings)
        # hiddens[0] -> (num_layers * num_directions, batch, hidden_size)
        h_n, _ = hiddens
        #print(h_n.shape)
        #print(h_n.transpose(0,1).shape)
        
        # num_layers * num_directions, batch, hidden_size
        #hidden_out = h_n.view(embeddings.shape[0], self.n_layers, 2, self.hidden_size)[:, self.n_layers-1].view(embeddings.shape[0], 2*self.hidden_size)
        
        hidden_out = h_n.reshape(self.n_layers, 2, -1, self.hidden_size)
        #print(f"hidden_out shape right after reshape is {hidden_out.shape}")
        hidden_out = hidden_out[self.n_layers-1]
        #print(f"hidden_out after indexing is {hidden_out.shape}")
        hidden_out = hidden_out.transpose(0,1).reshape(-1, 2*self.hidden_size)
        
        #hidden_out = h_n.transpose(0,1).reshape(-1, self.hidden_size * 2)
        #print(f"hidden_out shape is {hidden_out.shape}, h_n is {h_n.shape}")

#         pool = nn.MaxPool1d(kernel_size = conv_out.shape[2])
# #         print(f"conv2 shape is {conv2_out.shape}")
        
#         pooled_out = pool(conv_out).squeeze(2)
        #print(f"embeddings shape {embeddings.shape} and conv out is {conv_out.shape} and pooled_out is {pooled_out.shape}")
        
#         print(f"pooled out shape is {pooled_out.shape}")

        hidden_out = self.dropout(hidden_out)

        out = self.fc(hidden_out)
        
        return out
        
model7 = torch.load("Model_real_29.pt", map_location=torch.device('cpu'))
model7.to(device)       
model7.eval()      
                

class AttnClassifier(nn.Module): 
    def __init__(self, embeddings, n_class, channels_first = 64, channels_second = 64, kernels = (2,3,4,5), dropout = 0.1): 
        super().__init__()
        self.vocab_size, self.embedding_size = embeddings.shape
        self.embedding = nn.Embedding(self.vocab_size, self.embedding_size, sparse = False)
        self.embedding.weight.data.copy_(embeddings)
        
        # self.embedding_delta = nn.Embedding(self.vocab_size, self.embedding_size, sparse = False)
        
        # self.conv1 = nn.Conv1d(in_channels=self.embedding_size, out_channels=channels_first//len(kernels), dilation = 1, kernel_size = kernels[0])
        # self.bn1 = nn.BatchNorm1d(channels_first)
        
        # self.conv2 = nn.Conv1d(in_channels=self.embedding_size, out_channels=channels_first//len(kernels), dilation = 1, kernel_size = kernels[1])
        # self.bn2 = nn.BatchNorm1d(channels_first)

        # self.conv3 = nn.Conv1d(in_channels=self.embedding_size, out_channels=channels_first//len(kernels), dilation = 1, kernel_size = kernels[2])
        # self.bn3 = nn.BatchNorm1d(channels_first)

        # self.conv4 = nn.Conv1d(in_channels=self.embedding_size, out_channels=channels_first//len(kernels), dilation = 1, kernel_size = kernels[3])
        # self.bn4 = nn.BatchNorm1d(channels_first)

        self.self_attention = Attention(self.embedding_size)
    
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(dropout)
        self.fc = nn.Linear(self.embedding_size, n_class)
        
    def forward(self, texts): 
        with torch.no_grad(): 
            embeddings = self.embedding(texts)
        #embeddings = static_embeddings
        #embeddings = static_embeddings + self.embedding_delta(texts)

        attn_out, _ = self.self_attention(embeddings, embeddings)



        # conv1_out = self.conv1(F.pad(embeddings.transpose(1,2), (1,0)))
        # conv2_out = self.conv2(F.pad(embeddings.transpose(1,2), (1,1)))
        # conv3_out = self.conv3(F.pad(embeddings.transpose(1,2), (2,1)))
        # conv4_out = self.conv4(F.pad(embeddings.transpose(1,2), (2,2,)))

        # conv_out = torch.cat((conv1_out, conv2_out, conv3_out, conv4_out), dim = 1)
        # conv_out = self.relu(conv_out)
      
        # if conv1_out.shape == conv2_out.shape: 
        #     conv2_out += conv1_out
        
        pool = nn.MaxPool1d(kernel_size = attn_out.shape[1])
#         print(f"conv2 shape is {conv2_out.shape}")
        
        pooled_out = pool(attn_out.transpose(1,2)).squeeze(2)
        # print(f"embeddings shape {embeddings.shape} and attn out is {attn_out.shape} and pooled_out is {pooled_out.shape}")
        
#         print(f"pooled out shape is {pooled_out.shape}")
        pooled_out = self.dropout(pooled_out)
        out = self.fc(pooled_out)
        
        return out

        
model8 = torch.load("Model_real_32.pt", map_location=torch.device('cpu'))
model8.to(device)       
model8.eval()                     

class AttnClassifier(nn.Module): 
    def __init__(self, embeddings, n_class, channels_first = 64, channels_second = 64, kernels = (2,3,4,5), dropout = 0.1): 
        super().__init__()
        self.vocab_size, self.embedding_size = embeddings.shape
        self.embedding = nn.Embedding(self.vocab_size, self.embedding_size, sparse = False)
        self.embedding.weight.data.copy_(embeddings)
        
        # self.embedding_delta = nn.Embedding(self.vocab_size, self.embedding_size, sparse = False)
        
        # self.conv1 = nn.Conv1d(in_channels=self.embedding_size, out_channels=channels_first//len(kernels), dilation = 1, kernel_size = kernels[0])
        # self.bn1 = nn.BatchNorm1d(channels_first)
        
        # self.conv2 = nn.Conv1d(in_channels=self.embedding_size, out_channels=channels_first//len(kernels), dilation = 1, kernel_size = kernels[1])
        # self.bn2 = nn.BatchNorm1d(channels_first)

        # self.conv3 = nn.Conv1d(in_channels=self.embedding_size, out_channels=channels_first//len(kernels), dilation = 1, kernel_size = kernels[2])
        # self.bn3 = nn.BatchNorm1d(channels_first)

        # self.conv4 = nn.Conv1d(in_channels=self.embedding_size, out_channels=channels_first//len(kernels), dilation = 1, kernel_size = kernels[3])
        # self.bn4 = nn.BatchNorm1d(channels_first)

        self.self_attention1 = Attention(self.embedding_size)
        self.self_attention2 = Attention(self.embedding_size)
        self.self_attention_query = Attention(self.embedding_size)


        self.softmax = nn.Softmax(dim = 1)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(dropout)
        self.fc = nn.Linear(self.embedding_size, n_class)
        
    def forward(self, texts): 
        with torch.no_grad(): 
            embeddings = self.embedding(texts)
        #embeddings = static_embeddings
        #embeddings = static_embeddings + self.embedding_delta(texts)

        attn_out = self.self_attention1(embeddings, embeddings)[0]
        attn_out = self.self_attention2(attn_out, attn_out)[0] + attn_out

        attn_query = self.self_attention_query(attn_out, attn_out)[0]
        
        raw_energy = torch.mul(attn_query, attn_out).sum(axis = 2)
        normalized_energy = self.softmax(raw_energy).reshape(embeddings.shape[0], 1, -1)
        
        attn_contexts = torch.bmm(normalized_energy, attn_out).reshape(embeddings.shape[0], self.embedding_size)
    



        # conv1_out = self.conv1(F.pad(embeddings.transpose(1,2), (1,0)))
        # conv2_out = self.conv2(F.pad(embeddings.transpose(1,2), (1,1)))
        # conv3_out = self.conv3(F.pad(embeddings.transpose(1,2), (2,1)))
        # conv4_out = self.conv4(F.pad(embeddings.transpose(1,2), (2,2,)))

        # conv_out = torch.cat((conv1_out, conv2_out, conv3_out, conv4_out), dim = 1)
        # conv_out = self.relu(conv_out)
      
        # if conv1_out.shape == conv2_out.shape: 
        #     conv2_out += conv1_out
        
        # pool = nn.MaxPool1d(kernel_size = attn_out.shape[1])

#         print(f"conv2 shape is {conv2_out.shape}")
        
        # pooled_out = pool(attn_out.transpose(1,2)).squeeze(2)
        # print(f"embeddings shape {embeddings.shape} and attn out is {attn_out.shape} and pooled_out is {pooled_out.shape}")
        
#         print(f"pooled out shape is {pooled_out.shape}")
        pooled_out = self.dropout(attn_contexts)
        out = self.fc(pooled_out)
        
        return out

        
model9 = torch.load("Model_real_38.pt", map_location=torch.device('cpu'))
model9.to(device)       
model9.eval()             
        



AttnClassifier(
  (embedding): Embedding(137968, 300)
  (self_attention1): Attention(
    (linear_in): Linear(in_features=300, out_features=300, bias=False)
    (linear_out): Linear(in_features=600, out_features=300, bias=False)
    (softmax): Softmax(dim=-1)
    (tanh): Tanh()
  )
  (self_attention2): Attention(
    (linear_in): Linear(in_features=300, out_features=300, bias=False)
    (linear_out): Linear(in_features=600, out_features=300, bias=False)
    (softmax): Softmax(dim=-1)
    (tanh): Tanh()
  )
  (self_attention_query): Attention(
    (linear_in): Linear(in_features=300, out_features=300, bias=False)
    (linear_out): Linear(in_features=600, out_features=300, bias=False)
    (softmax): Softmax(dim=-1)
    (tanh): Tanh()
  )
  (softmax): Softmax(dim=1)
  (relu): ReLU()
  (dropout): Dropout(p=0.3, inplace=False)
  (fc): Linear(in_features=300, out_features=16, bias=True)
)

In [0]:
device

In [26]:
print(model5)

NameError: ignored

In [0]:
class EnsembleClassifier(nn.Module): 
    def __init__(self, models, n_class, dropout = 0.5): 
        super().__init__()
        # self.vocab_size, self.embedding_size = embeddings.shape
        # self.embedding = nn.Embedding(self.vocab_size, self.embedding_size, sparse = False)
        # self.embedding.weight.data.copy_(embeddings)
        
        # # self.embedding_delta = nn.Embedding(self.vocab_size, self.embedding_size, sparse = False)
        
        # # self.conv1 = nn.Conv1d(in_channels=self.embedding_size, out_channels=channels_first//len(kernels), dilation = 1, kernel_size = kernels[0])
        # # self.bn1 = nn.BatchNorm1d(channels_first)
        
        # # self.conv2 = nn.Conv1d(in_channels=self.embedding_size, out_channels=channels_first//len(kernels), dilation = 1, kernel_size = kernels[1])
        # # self.bn2 = nn.BatchNorm1d(channels_first)

        # # self.conv3 = nn.Conv1d(in_channels=self.embedding_size, out_channels=channels_first//len(kernels), dilation = 1, kernel_size = kernels[2])
        # # self.bn3 = nn.BatchNorm1d(channels_first)

        # # self.conv4 = nn.Conv1d(in_channels=self.embedding_size, out_channels=channels_first//len(kernels), dilation = 1, kernel_size = kernels[3])
        # # self.bn4 = nn.BatchNorm1d(channels_first)

        # self.self_attention1 = Attention(self.embedding_size)
        # self.self_attention2 = Attention(self.embedding_size)
        # self.self_attention_query = Attention(self.embedding_size)


        # self.softmax = nn.Softmax(dim = 1)
        # self.relu = nn.ReLU()
        self.dropout = nn.Dropout(dropout)
        self.tanh = nn.Tanh()

        self.model1 = models[0]
        self.model2 = models[1]
        self.model3 = models[2]
        self.model4 = models[3]
        self.model5 = models[4]
        self.model6 = models[5]
        self.model7 = models[6]
        self.model8 = models[7]
        self.model9 = models[8]

        
        self.fc = nn.Linear(n_class*3, n_class)
        self.softmax = nn.Softmax(dim = 1)
        
    def forward(self, texts): 
        with torch.no_grad(): 
            # model1_out = 1/9 * self.softmax(self.model1(texts))
            # model2_out = 1/9 * self.softmax(self.model2(texts))
            # model3_out = 1/9 * self.softmax(self.model3(texts))
            # model4_out = 1/9 * self.softmax(self.model4(texts))
            # model5_out = 1/9 * self.softmax(self.model5(texts))
            # model6_out = 1/9 * self.softmax(self.model6(texts))
            # model7_out = 1/9 * self.softmax(self.model7(texts))
            # model8_out = 1/9 * self.softmax(self.model8(texts))
            # model9_out = 1/9 * self.softmax(self.model9(texts))

            model1_out = 1/9 * torch.log((self.softmax(self.model1(texts))))
            model2_out = 1/9 * torch.log(self.softmax(self.model2(texts)))
            model3_out = 1/9 * torch.log(self.softmax(self.model3(texts)))
            model4_out = 1/9 * torch.log(self.softmax(self.model4(texts)))
            model5_out = 1/9 * torch.log(self.softmax(self.model5(texts)))
            model6_out = 1/9 * torch.log(self.softmax(self.model6(texts)))
            model7_out = 1/9 * torch.log(self.softmax(self.model7(texts)))
            model8_out = 1/9 * torch.log(self.softmax(self.model8(texts)))
            model9_out = 1/9 * torch.log(self.softmax(self.model9(texts)))
            
            out = model1_out+model2_out+model3_out+model4_out+model5_out+model6_out+model7_out+model8_out+model9_out
            out = self.softmax(out)


            # stacked_out = torch.stack([model1_out, model2_out, model3_out])

            
            # out = torch.mean(stacked_out, dim=0)
            #stacked_out = self.dropout(self.tanh(stacked_out))
            
        #out = self.fc(stacked_out)

        # attn_query = self.self_attention_query(attn_out, attn_out)[0]

        
        # raw_energy = torch.mul(attn_query, attn_out).sum(2)
        # normalized_energy = self.softmax(raw_energy).reshape(embeddings.shape[0], 1, -1)
        
        # attn_contexts = torch.bmm(normalized_energy, attn_out).reshape(embeddings.shape[0], self.embedding_size)
    
        # conv1_out = self.conv1(F.pad(embeddings.transpose(1,2), (1,0)))
        # conv2_out = self.conv2(F.pad(embeddings.transpose(1,2), (1,1)))
        # conv3_out = self.conv3(F.pad(embeddings.transpose(1,2), (2,1)))
        # conv4_out = self.conv4(F.pad(embeddings.transpose(1,2), (2,2,)))

        # conv_out = torch.cat((conv1_out, conv2_out, conv3_out, conv4_out), dim = 1)
        # conv_out = self.relu(conv_out)
      
        # if conv1_out.shape == conv2_out.shape: 
        #     conv2_out += conv1_out
        
#         pool = nn.MaxPool1d(kernel_size = attn_out.shape[1])

# #         print(f"conv2 shape is {conv2_out.shape}")
        
#         pooled_out = pool(attn_out.transpose(1,2)).squeeze(2)
#         # print(f"embeddings shape {embeddings.shape} and attn out is {attn_out.shape} and pooled_out is {pooled_out.shape}")
        
# #         print(f"pooled out shape is {pooled_out.shape}")
        
        return out
        

In [0]:
def init_weights(m):
    if type(m) == nn.Conv1d or type(m) == nn.Linear:
        nn.init.xavier_normal_(m.weight.data)

In [0]:
def train_model(model, criterion, optimizer, scheduler, num_epochs=25, lambda_reg = .0001):

    since = time.time()
    
    best_acc = 0.0

    for epoch in range(num_epochs):
        f.write('Epoch {}/{}\n'.format(epoch+1, num_epochs))
        v.write('Epoch {}/{}\n'.format(epoch+1, num_epochs))
        f.write('-' *10)
        v.write('-' * 10)

        print('Epoch {}/{}'.format(epoch+1, num_epochs))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0.0

            # Iterate over data.
            f.write("\nstarting epoch {} for {} phase\n".format(epoch+1, phase))
            v.write("\nstarting epoch {} for {} phase\n".format(epoch+1, phase))
            print("starting epoch {} for {} phase".format(epoch+1, phase))

            for i, data in enumerate(tqdm_notebook(dataloaders[phase])):
                #pdb.set_trace()
                inputs = data.text.to(device)
                labels = data.label.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)

                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels) 
                    # for i, param in enumerate(model.parameters()): 
                    #   if i == 1: 
                    #     loss += lambda_reg * torch.norm(param)


                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

                if ((i%300) == 0):
                    #print("inside")
                    v.write("inputs size: {}\n".format(inputs.size(0)))
                    v.write("epoch {}, batch {},  loss : {}\n".format(epoch+1, i, loss.item()))
                    v.write("percent correct: {}\n".format((torch.sum(preds == labels.data)/inputs.size(0))))
            
            
            epoch_loss = running_loss / dataset_sizes[phase]

            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            if phase == 'train':

                train_loss = epoch_loss
                train_error = 1 - epoch_acc
                scheduler.step()

            elif phase == 'val': 

                val_error = 1 - epoch_acc
                metrics.append(Metric(loss=train_loss, train_error=train_error,val_error=val_error))
            
            f.write('{} Loss: {:.4f} Acc: {:.4f}\n'.format(phase, epoch_loss, epoch_acc))
            v.write('{} Loss: {:.4f} Acc: {:.4f}\n'.format(phase, epoch_loss, epoch_acc))
            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
                PATH = 'MobileNetModel_' + str(epoch+2) + '.pt'
                torch.save(model, PATH)

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    f.write('Training complete in {:.0f}m {:.0f}s\n'.format(time_elapsed // 60, time_elapsed % 60))
    v.write('Training complete in {:.0f}m {:.0f}s\n'.format(time_elapsed // 60, time_elapsed % 60))


    print('Best val Acc: {:4f}'.format(best_acc))

    f.write('Best val Acc: {:4f}'.format(best_acc))
    v.write('Best val Acc: {:4f}'.format(best_acc))


    # load best model weights
    model.load_state_dict(best_model_wts)
    return model

In [0]:
# for named_param in model.named_parameters(): 
#   print(named_param)

In [0]:
model = EnsembleClassifier((model1, model2, model3, model4, model5, model6, model7, model8, model9), N_CLASS, dropout=0)
# model = EnsembleClassifier((model1, model2, model3), N_CLASS, dropout=0)
model = model.to(device)
#model.apply(init_weights)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=STEP_SIZE, gamma=GAMMA)


In [0]:
metrics = []
model = train_model(model, criterion, optimizer, exp_lr_scheduler, num_epochs=NUMBER_EPOCHS, lambda_reg = 0)

f.close()
v.close()
torch.save(model, "Model_" + str(MODELNO) + ".pt")

def training_plot(metrics):
    plt.figure(1)
    plt.plot([m.val_error for m in metrics], 'b')
    plt.plot([m.train_error for m in metrics], 'r')
    plt.title('Train Error (red), Val Error (blue)')
    plt.savefig('Model_' + str(MODELNO) + '.png')
 
training_plot(metrics)


NameError: ignored

In [22]:
running_loss = 0.0
running_corrects = 0
model.eval()
model.cuda()
pred_list = []
for i, data in enumerate(tqdm_notebook(val_loader)):
  #pdb.set_trace()
  inputs = data.text.to(device)
  labels = data.label.to(device)

  # zero the parameter gradients
  optimizer.zero_grad()

  # forward
  # track history if only in train
  
  with torch.no_grad():
      outputs = model(inputs)
      #print(outputs.shape)

      _, preds = torch.max(outputs, 1)
      pred_list.extend(preds.cpu().tolist())
      loss = criterion(outputs, labels) 
      # for i, param in enumerate(model.parameters()): 
      #   if i == 1: 
      #     loss += lambda_reg * torch.norm(param)


  # statistics
  running_loss += loss.item() * inputs.size(0)
  running_corrects += torch.sum(preds == labels.data)


epoch_loss = running_loss / len(val)

epoch_acc = running_corrects.double() / len(val)
print('Loss: {:.4f} Acc: {:.4f}'.format(epoch_loss, epoch_acc))

HBox(children=(IntProgress(value=0, max=11), HTML(value='')))


Loss: 2.0697 Acc: 0.8849


In [23]:
val_preds = [int2Label.get(pred) for pred in pred_list]
val_preds[:10]

['Sports and recreation',
 'Sports and recreation',
 'Sports and recreation',
 'Media and drama',
 'Social sciences and society',
 'Music',
 'Music',
 'Media and drama',
 'Social sciences and society',
 'History']

In [0]:
import pandas as pd
valid = pd.read_csv("topicclass_valid_fixed.csv")


In [25]:
pred_list[:10]

[5, 5, 5, 15, 4, 3, 3, 15, 4, 8]

In [0]:
answers = []
with open("topicclass_valid_fixed.txt") as f: 
  for line in f: 
    answers.append(line.split(" ||| ")[0])

In [33]:
answers[:10]

['Sports and recreation',
 'Sports and recreation',
 'Sports and recreation',
 'Media and drama',
 'Music',
 'Music',
 'Music',
 'Media and drama',
 'Social sciences and society',
 'History']

In [35]:
total_number = len(answers)
total_correct = 0
for pair in zip(val_preds, answers): 
  if pair[0] == pair[1]: 
    total_correct+=1
total_correct / total_number

0.8849144634525661

In [0]:
with open('dev_results.txt', 'w') as f:
    for pred in val_preds:
        f.write("%s\n" % pred)

In [37]:
! head dev_results.txt

Sports and recreation
Sports and recreation
Sports and recreation
Media and drama
Social sciences and society
Music
Music
Media and drama
Social sciences and society
History


In [38]:
pred_list = []
for i, data in enumerate(tqdm_notebook(test_loader)):
  #pdb.set_trace()
  inputs = data.text.to(device)
  
  with torch.no_grad():
      outputs = model(inputs)
      #print(outputs.shape)

      _, preds = torch.max(outputs, 1)
      pred_list.extend(preds.cpu().tolist())
      # for i, param in enumerate(model.parameters()): 
      #   if i == 1: 
      #     loss += lambda_reg * torch.norm(param)


  # statistics

HBox(children=(IntProgress(value=0, max=11), HTML(value='')))




In [42]:
test_preds = [int2Label.get(pred) for pred in pred_list]
test_preds[:15]

['Engineering and technology',
 'Media and drama',
 'Sports and recreation',
 'Sports and recreation',
 'History',
 'History',
 'Music',
 'Music',
 'Warfare',
 'Geography and places',
 'History',
 'Media and drama',
 'Philosophy and religion',
 'Art and architecture',
 'Media and drama']

In [43]:
! head -n 15 topicclass_test.txt

UNK ||| NY 93 was moved onto NY 104 and Junction Road in Cambria in the 1940s , and altered to bypass Lockport to the south on a new highway and Robinson and Dysinger roads in 1991 .
UNK ||| It was also staged in Hartford , Connecticut in the United States in 1983 and starred John Cullum as Hitler .
UNK ||| In 2008 , Dodd was the Australian national Grade IV para @-@ equestrian champion .
UNK ||| He has headlined numerous pay @-@ per @-@ view events for both the WWE and UFC , including WrestleMania XIX , WrestleMania 31 , UFC 100 , and UFC 116 .
UNK ||| Nerva became Emperor at the age of sixty @-@ five , after a lifetime of imperial service under Nero and the rulers of the Flavian dynasty .
UNK ||| Maynilà had been Indianized since the sixth century CE and earlier .
UNK ||| The single peaked at number 46 on the US Billboard Hot 100 and has been certified gold by the Recording Industry Association of America ( RIAA ) for shipments of 500 @,@ 000 copies .
UNK ||| For Independiente , Arjo

In [0]:
with open('test_results.txt', 'w') as f:
    for pred in test_preds:
        f.write("%s\n" % pred)

In [46]:
! pwd

/content/drive/My Drive/NN4NLP/topicclass


In [0]:
int2Label = \
{0: 'Miscellaneous',
 1: 'Video games',
 2: 'Language and literature',
 3: 'Music',
 4: 'Social sciences and society',
 5: 'Sports and recreation',
 6: 'Natural sciences',
 7: 'Art and architecture',
 8: 'History',
 9: 'Warfare',
 10: 'Engineering and technology',
 11: 'Philosophy and religion',
 12: 'Agriculture, food and drink',
 13: 'Geography and places',
 14: 'Mathematics',
 15: 'Media and drama'}