In [2]:
import csv
from torch.utils.data import Dataset
import torch
from sklearn.model_selection import train_test_split
import numpy as np
from bs4 import BeautifulSoup
import string
import spacy
import jsonlines
import json
import re
import torch.nn as nn
from torch.nn.utils.rnn import pad_packed_sequence,pack_padded_sequence,pad_sequence
import torch.nn.functional as F
import torch.optim as optim
import fasttext
from torch.utils.data import SubsetRandomSampler,DataLoader,Subset
from torchtext.vocab import GloVe
from tqdm import tqdm
import io



#   Which Dataset to use
#   1 = Assignment 1 Training Data
#   2 = Assignment 2 Training Data

# SENTENCE_SPLITTING_USED; whether to use the splitting of reviews into sentences.

DATASET_TO_USE = 2
SENTENCE_SPLITTING_USED = True

EMBED_DIM = 300
HIDDEN_DIM = 128
NUM_FILTERS = 86
FILTER_SIZES = [3,4,5,7]
CNN_DIM = NUM_FILTERS*len(FILTER_SIZES)

PATIENCE_PARAMETER = 7
VALIDATION_LOSS_COMPUTE_STEP = 1

device_cpu = torch.device('cpu')
device_fast = torch.device('cpu')



if torch.has_mps:
    device_fast = torch.device('mps')
elif torch.has_cuda:
    device_fast = torch.device('cuda')

#torch.manual_seed(0)
#np.random.seed(0)
nlp = spacy.load('en_core_web_sm')
glove = GloVe()


torch.cuda.empty_cache()

print(torch.cuda.is_available())

False


In [3]:
def preprocess_text(text):    
    text = re.sub(r'<br /><br />',".",text)
    text = BeautifulSoup(text,'lxml').get_text().strip()
    text = text.lower()
    text = re.sub(r"http\S+", "", text)
    text = ' '.join(re.findall(r"[\w']+|[.,!;/\"]", text))
    
    new_text = []
    for word in text.split():
        if word == '':
            continue
        new_text.append(word)
    
    text = ' '.join(new_text)
    words = nlp(text)
    text =  " ".join([token.text for token in words if not token.is_punct or token.text=='/' or token.text=="\"" or token.text=="."]).strip()
    new_words = []
    for word in text.split(" "):
        if word == 'n\'t':
            if len(new_words) > 1:
                new_words[-1] = new_words[-1] + word
            else:
                new_words.append(word)
        else:
            new_words.append(word)
    
    text = " ".join(new_words)
    return text

In [None]:
'''# preprocess the training data which was given for Assignment 1
def process_assignment1_training_data():
    negative_reviews = io.open('./Train.neg',encoding='latin-1').readlines()
    positive_reviews = io.open('./Train.pos',encoding='latin-1').readlines()
    with jsonlines.open('train.jsonl',mode='w') as writer:

        for review in positive_reviews:
            processed_text = preprocess_text(review)
            d = {'text': processed_text , 'label': 1.0}
            writer.write(d)
        for review in negative_reviews:
            processed_text = preprocess_text(review)
            d = {'text': processed_text , 'label': 0.0}
            writer.write(d)

# preprocess the training data which was given for Assignment 2
def process_assignment2_training_data():
    preprocessed_dataset = []
    train_dataset_labels = []
    with open("./Train dataset.csv") as csvfile:
        csvFile = csv.reader(csvfile)
        next(csvFile)
        json_writer = jsonlines.open('processed_dataset.jsonl','w')

        for line in csvFile:
            processed_text = preprocess_text(line[0])
            label = 1.0 if line[1] == 'positive' else 0.0
            train_dataset_labels.append(label)
            json_writer.write({"text":processed_text,"label":label})
            preprocessed_dataset.append({"text":processed_text,"label":label})
    
        json_writer.close()

if DATASET_TO_USE == 1:
    process_assignment1_training_data()
else:
    process_assignment2_training_data()'''

In [4]:
preprocessed_dataset = []
train_dataset_labels = []


TRAIN_FILE_NAME = './train.jsonl' if DATASET_TO_USE==1 else './processed_dataset.jsonl'

with open(TRAIN_FILE_NAME ,encoding='utf-8') as f:
#with open('processed_dataset.jsonl',encoding='utf-8') as f:
    for line in f:
        sample = json.loads(line)
        train_dataset_labels.append(sample['label'])
        preprocessed_dataset.append(sample)
      
train_dataset_labels = np.array(train_dataset_labels)

In [5]:
def getWordEmbeddingforText(text,glove=glove):
    length = 0
    words = []
    text = text.strip()
    for word in text.split(' '):
        length+=1
        word_embedding = glove[word]
        words.append(word_embedding)

    # If the number of words in a sentence is below 3
    if length < max(FILTER_SIZES):
        words.append(torch.zeros(EMBED_DIM))
        length+=1
    
    return torch.stack(words),length

In [6]:
def review_to_embed(review,glove=glove): 
    sentences = review.split(".")
    sentence_lengths = []
    review_embeddings = []
    num_sentences = 0
    for sentence in sentences:
        if sentence == '':
            continue
        s= sentence.strip()
        num_sentences += 1
        sentence_word_embeddings,sentence_length = getWordEmbeddingforText(s,glove)
        sentence_lengths.append(sentence_length)
        review_embeddings.append(sentence_word_embeddings)

    return torch.nn.utils.rnn.pad_sequence(review_embeddings,batch_first=True),sentence_lengths,num_sentences

In [7]:
class ReviewDataSet(Dataset):
    
    def __init__(self,reviews):
        super().__init__()
        self.reviews = reviews
        
    def __len__(self):
        return len(self.reviews)

    def __getitem__(self, index):
        return self.reviews[index]

In [8]:
processed_dataset = []
for review in preprocessed_dataset:
    if SENTENCE_SPLITTING_USED:
        embeddings, sent_length ,n_sents = review_to_embed(review['text'])
        processed_dataset.append({'review': embeddings,'sent_lengths': sent_length,'length' : n_sents,'label' : review['label']})
    else:
        embedding,length = getWordEmbeddingforText(review['text'])
        processed_dataset.append({'review': embedding,'length': length,'label' : review['label']})

In [9]:
torch.cuda.is_available()

False

In [10]:
dataset = ReviewDataSet(processed_dataset)

In [11]:
def collate_fn_no_sentence_split(batch_data):
    inputs = [b['review'] for b in batch_data]
    lengths = [b['length'] for b in batch_data]
    labels = torch.tensor([b['label'] for b in batch_data])

    labels = labels.unsqueeze(1)
    inputs = pad_sequence(inputs,batch_first=True)
    return  {'input' : inputs , 'lengths': lengths , 'labels' : labels }



def collate_function(batch_data):   
    inputs = [b['review'] for b in batch_data]
    sent_lengths = [ b['sent_lengths'] for b in batch_data ]
    n_sentences = [ b['length'] for b in batch_data ]
    
    labels = torch.tensor([b['label'] for b in batch_data])

    labels = labels.unsqueeze(1)
    
    max_n_sentences = max([i.shape[0] for i in inputs] )
    max_n_words = max([i.shape[1] for i in inputs])

 
    processed_inputs = []
    for inp in inputs:

        t1 = torch.permute(inp,(2,1,0))
        t1 = torch.nn.functional.pad(t1,(0,max_n_sentences-inp.shape[0],0,max_n_words-inp.shape[1]))
        t1 = torch.permute(t1,(2,1,0))
        processed_inputs.append(t1)

    final_inp = torch.stack(processed_inputs)
    #inputs = pad_sequence(inputs,batch_first=True)
    return  {'input' : final_inp , 'sent_lengths': sent_lengths , 'lengths' : n_sentences ,'labels' : labels }

In [12]:
train_idx,valid_idx = train_test_split(np.arange(train_dataset_labels.shape[0]), 
    test_size=0.2,
    shuffle= True,
    stratify= train_dataset_labels,
    random_state=0
)

train_sampler = SubsetRandomSampler(train_idx)
valid_sampler = SubsetRandomSampler(valid_idx)


if SENTENCE_SPLITTING_USED:
    train_dataloader = DataLoader(dataset,16,sampler=train_sampler,collate_fn=collate_function)
    valid_dataloader = DataLoader(dataset,16,sampler=valid_sampler,collate_fn=collate_function)
else:
    train_dataloader = DataLoader(dataset,64,sampler=train_sampler,collate_fn=collate_fn_no_sentence_split)
    valid_dataloader = DataLoader(dataset,64,sampler=valid_sampler,collate_fn=collate_fn_no_sentence_split)

In [None]:
class DAN(nn.Module):
    def __init__(self,embed_dim=EMBED_DIM,hidden_dim = HIDDEN_DIM, droput_prob = 0.3, train_device = device_cpu):
        super().__init__()
        self.embed_dim = embed_dim
        self.hidden_dim = hidden_dim
        self.fc1 = nn.Linear(self.embed_dim,self.hidden_dim)
        #self.fc = nn.Linear(self.hidden_dim,self.hidden_dim)
        self.fc2 = nn.Linear(self.hidden_dim,1)
        self.sigmoid  = nn.Sigmoid()
        self.word_dropout_prob = droput_prob

        self.train_device = train_device


    def forward(self,inp,inp_len):    
        inp_mask = torch.ones((inp.shape[0],inp.shape[1]))
        for i  in range(inp.shape[0]):
            inp_mask[i,inp_len[i]:] = 0.0

        inp_mask = inp_mask.to(self.train_device)
        inp_lengths = torch.sum(inp_mask,-1,keepdim=True).int()
        inp_lengths = inp_lengths.to(self.train_device)
        total = torch.sum(inp*(inp_mask.unsqueeze(2)),axis=1)
        vector_average = total / inp_lengths
        ans = F.relu(self.fc1(vector_average))
        #ans = F.relu(self.fc(ans))
        ans = self.sigmoid(self.fc2(ans))
        return ans

In [None]:
class RNNModel(nn.Module):

    def __init__(self,
            embed_dim=EMBED_DIM,hidden_dim =HIDDEN_DIM,bidirectional=False,
            rnn_type = 'gru',num_layers=1,rnn_dropout = 0.4,fc_dropout = 0.3):
        
        super().__init__()
        
        self.embed_dim = embed_dim
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        self.bidirectional = bidirectional
        self.dropout = rnn_dropout
        self.fc_dropout = nn.Dropout(fc_dropout)

        self.rnn = None
        
        if rnn_type == 'gru':
            self.rnn  = nn.GRU(input_size = self.embed_dim,
                hidden_size = self.hidden_dim,
                num_layers = self.num_layers,
                batch_first = True,
                dropout = self.dropout,
                bidirectional = self.bidirectional
            )
        elif rnn_type == 'rnn':
            self.rnn = nn.RNN(input_size = self.embed_dim,
                hidden_size = self.hidden_dim,
                num_layers = self.num_layers,
                batch_first = True,
                dropout = self.dropout,
                bidirectional = self.bidirectional)
        

        self.bidirectional_factor = 2 if self.bidirectional else 1
        self.dnn_input_size= self.bidirectional_factor * self.num_layers * self.hidden_dim
        self.fc_list = []

        log_base_2 = np.log2(self.dnn_input_size)
        nearest_power_2 = int(log_base_2)

        if(float(nearest_power_2) != log_base_2):
            self.fc_list.append(nn.Linear(self.dnn_input_size,2**nearest_power_2))

        while nearest_power_2 > 7 :
            self.fc_list.append(nn.Linear(2**(nearest_power_2),2**(nearest_power_2-1)))
            nearest_power_2-=1
        
        self.fc_list.append(nn.Linear(128,1))
        self.fc = nn.ModuleList(self.fc_list)

        #self.fc1 = nn.Linear(self.dnn_input_size,128)
        #self.fc2 = nn.Linear(128,1)
        self.sigmoid = nn.Sigmoid()

    def forward(self,x,x_len):

        packed_input = pack_padded_sequence(x,x_len,batch_first=True,enforce_sorted=False)
        packed_output,hidden = self.rnn(packed_input)
        output,output_lengths = pad_packed_sequence(packed_output)
        #hidden = hidden.squeeze()
        hidden = torch.permute(hidden,(1,0,2))
        hidden = hidden.contiguous().view((hidden.shape[0],-1))
        out = hidden
        for i,l in enumerate(self.fc):
            if i!=(len(self.fc_list)-1):
                out = self.fc_dropout(l(out))
            
        #out = self.fc_dropout(self.fc1(hidden))
        ans = self.sigmoid(self.fc_list[len(self.fc_list)-1](out))
        return ans

In [None]:
# ADD Dropout Term

class CNNModel(nn.Module):

    def __init__(self, embed_dim=EMBED_DIM,hidden_dim = HIDDEN_DIM,filter_sizes = FILTER_SIZES, n_filters = NUM_FILTERS,dropout = 0.2):
        super().__init__()
        self.embed_dim = embed_dim
        self.hidden_dim = hidden_dim
        self.filter_sizes = filter_sizes
        self.num_filters = n_filters
        self.dropout = nn.Dropout(p=dropout)
        
        self.modulelist = nn.ModuleList([
            nn.Conv1d(in_channels=self.embed_dim,
            out_channels=self.num_filters,
            kernel_size= self.filter_sizes[i]
            )
    
        for i in range(len(self.filter_sizes))])
        
        self.fc1 = nn.Linear(self.num_filters*len(self.filter_sizes),hidden_dim)
        self.fc2 = nn.Linear(self.hidden_dim,1)
        self.sigmoid = nn.Sigmoid()

    def forward(self,x : torch.Tensor,xlen = None):
        
        permuted_x = x.permute(0,2,1)
        x_conv_list = [F.relu(conv(permuted_x)) for conv in self.modulelist]
        x_max_pool_list = [F.max_pool1d(x_conv,kernel_size=x_conv.shape[2]) for x_conv in x_conv_list]
        
        feature = torch.cat([x_pool.squeeze(dim=2) for x_pool in x_max_pool_list],dim=1)
        
        out = self.dropout(F.relu(self.fc1(feature)))
        out = self.fc2(out)
        return self.sigmoid(out)



cnnmodel = CNNModel(10,2,FILTER_SIZES,NUM_FILTERS)    

In [None]:
import torch.nn.functional as F
class EnsembleModel(nn.Module):
    
    def __init__(self,EMBED_DIM,CNN_DIM,HIDDEN_DIM):
        super().__init__()
        self.rnn = nn.GRU(input_size = CNN_DIM,hidden_size = HIDDEN_DIM, batch_first = True)
        self.cnn = nn.Conv1d(in_channels=EMBED_DIM,out_channels=CNN_DIM,kernel_size=3)
        self.fc = nn.Linear(HIDDEN_DIM,1)


    def forward(self,inp : torch.Tensor,n_sents=None):

        ## inp  = (batch_size,max_sent_length,max_word_length,embed_dim)

        outputs = []
      
        
        for i in range(inp.shape[1]):
            current_inp = inp[:,i,:,:]
            current_inp = torch.permute(current_inp,(0,2,1))
            current_output = self.cnn(current_inp)
            current_output = F.max_pool1d(current_output,kernel_size = current_output.shape[2]).squeeze(dim=2)
            outputs.append(current_output)
        
        #print(len(outputs))
        #print(outputs[0].shape)
        lstm_in = torch.stack(outputs,dim=1)
   
        packed_input = pack_padded_sequence(lstm_in,n_sents,batch_first=True,enforce_sorted=False)
        packed_output,hidden = self.rnn(packed_input)
        output,output_lengths = pad_packed_sequence(packed_output,batch_first=True)

        hidden = torch.permute(hidden,(1,0,2))
        hidden = hidden.contiguous().view((hidden.shape[0],-1))

        out = self.fc(hidden)
        return nn.Sigmoid()(out)
        #out = self.cnn(inp)
        #return out

batch_data = next(iter(train_dataloader))
j = EnsembleModel(EMBED_DIM,CNN_DIM,HIDDEN_DIM)
j(batch_data['input'],batch_data['n_sent'])

KeyError: 'n_sent'

In [13]:
import torch.nn.functional as F
class CNNLSTMAttention(nn.Module):
    
    def __init__(self,EMBED_DIM,CNN_DIM,HIDDEN_DIM,bidirectional_factor = 2,fc_dropout=0.3):
        super().__init__()

        bidirectional = False

        if bidirectional_factor==2:
            bidirectional = True

        self.rnn = nn.GRU(input_size = CNN_DIM,hidden_size = HIDDEN_DIM, bidirectional=bidirectional,batch_first = True)
        
        cnn_layers = [  
            nn.Conv1d(in_channels=EMBED_DIM,out_channels=NUM_FILTERS,kernel_size=FILTER_SIZES[i]) for i in range(len(FILTER_SIZES))
        ]
        self.cnn_list = nn.ModuleList(cnn_layers)
    
        #self.cnn = nn.Conv1d(in_channels=EMBED_DIM,out_channels=CNN_DIM,kernel_size=3)
        self.attention_layer = nn.Linear(bidirectional_factor* HIDDEN_DIM,1)
        self.fc = nn.Linear(bidirectional_factor*HIDDEN_DIM,HIDDEN_DIM)
        self.fc_dropout = nn.Dropout(fc_dropout)
        self.out_fc = nn.Linear(HIDDEN_DIM,1)
        self.batchnorm1d = nn.BatchNorm1d(CNN_DIM)

    def forward(self,inp : torch.Tensor,n_sents=None):

        ## inp  = (batch_size,max_sent_length,max_word_length,embed_dim)

        outputs = []
      
        
        for i in range(inp.shape[1]):
            current_inp = inp[:,i,:,:]
            current_inp = torch.permute(current_inp,(0,2,1))
            
            current_output = None
            for cnn in self.cnn_list:
                current_out = cnn(current_inp)
                current_out = F.max_pool1d(current_out,kernel_size=current_out.shape[2]).squeeze(dim=2)

                if current_output is None:
                    current_output = current_out
                else:
                    current_output = torch.cat([current_output,current_out],dim=1)
                #current_output = self.cnn(current_inp)
                #current_output = F.max_pool1d(current_output,kernel_size = current_output.shape[2]).squeeze(dim=2)

            outputs.append(current_output)
        
        #print(len(outputs))
        #print(outputs[0].shape)
        lstm_in = torch.stack(outputs,dim=2)
        lstm_in = self.batchnorm1d(lstm_in)
        lstm_in = torch.permute(lstm_in,(0,2,1))

        packed_input = pack_padded_sequence(lstm_in,n_sents,batch_first=True,enforce_sorted=False)
        packed_output,hidden = self.rnn(packed_input)
        output,output_lengths = pad_packed_sequence(packed_output,batch_first=True)
        attention_logs = self.attention_layer(output).squeeze(dim=2)
        attention_score = F.softmax(attention_logs,dim=1).unsqueeze(2)

        final_out = attention_score*output

        averaged_vector = torch.sum(final_out,dim=1,keepdim=False)

        #hidden = torch.permute(hidden,(1,0,2))
        #hidden = hidden.contiguous().view((hidden.shape[0],-1))
        out = self.fc_dropout(F.leaky_relu(self.fc(averaged_vector)))
        out = self.out_fc(out)
        return nn.Sigmoid()(out)
        #out = self.cnn(inp)
        #return out


In [14]:
batch_data = next(iter(train_dataloader))

In [19]:
print(batch_data['input'].shape)
print(batch_data['lengths'])

torch.Size([16, 58, 65, 300])
[14, 30, 36, 29, 8, 11, 55, 14, 5, 10, 58, 13, 14, 8, 15, 20]


In [20]:
cnn = CNNLSTMAttention(EMBED_DIM,CNN_DIM,HIDDEN_DIM)
cnn(batch_data['input'],batch_data['lengths'])

tensor([[0.4688],
        [0.4460],
        [0.4683],
        [0.4364],
        [0.4712],
        [0.4773],
        [0.4318],
        [0.4664],
        [0.4781],
        [0.4691],
        [0.5031],
        [0.4612],
        [0.4626],
        [0.4737],
        [0.4584],
        [0.4693]], grad_fn=<SigmoidBackward0>)

: 

In [None]:
import os
from torch.utils.tensorboard import SummaryWriter
from datetime import  datetime

def train(model,train_dataloader,valid_dataloader,num_epochs,criterion,optimizer,
    checkpoint_name='best_model.pt',
    device_train = device_fast,use_rnn = False,log=True):

    tensorboard_name='Ensemble'
    if log == True:
        current_datetime = datetime.now().strftime("%d_%m_%Y_%H_%M_%S")
        tensorboard_name = tensorboard_name + "_" + current_datetime
        writer = SummaryWriter('runs/' + tensorboard_name)
    
    
    model = model.to(device_train)
    clip = 0
    if use_rnn:
        clip = 5

    best_validation_loss = 1000.0
    valdiation_loss_not_decreased_steps = 0
    
    model.train()
    for e in range(num_epochs):
        
        training_set_size = 0
        training_loss = 0.0
        model.train()

        for data in tqdm(train_dataloader):
            
            optimizer.zero_grad()
            if SENTENCE_SPLITTING_USED:
                input_reviews,sent_lengths,n_sents,output_labels = data['input'], data['sent_lengths'],data['lengths'],data['labels']
            else:
                input_reviews,n_sents,output_labels = data['input'],data['lengths'],data['labels']

            input_reviews = input_reviews.to(device_train)
            training_set_size += input_reviews.shape[0]
            output = model(input_reviews,n_sents)
            output = output.to(device_cpu)
            loss = criterion(output,output_labels.float())
            training_loss += loss.item()
            loss.backward()
            if use_rnn:
                nn.utils.clip_grad_norm_(model.parameters(),clip)
            optimizer.step()
        
        current_training_loss = training_loss
        if log==True:
            print("Epoch " + str(e) + " Average Training Loss = " +  str(current_training_loss))
            writer.add_scalars(tensorboard_name + 'Training Loss vs Epoch',{'train' : current_training_loss},e)

        
        model.eval()
        
        if valid_dataloader is None:
            continue
        
        validation_set_size  = 0 
        if e% VALIDATION_LOSS_COMPUTE_STEP==0:
            correct_count = 0
            validation_loss = 0

            for i,data in enumerate(valid_dataloader,0):
                if SENTENCE_SPLITTING_USED:
                    input_reviews,sent_lengths,n_sents,output_labels = data['input'], data['sent_lengths'],data['lengths'],data['labels']
                else:
                    input_reviews,n_sents,output_labels = data['input'],data['lengths'],data['labels']
                
                input_reviews = input_reviews.to(device_train)
                validation_set_size += input_reviews.shape[0]
                output = model(input_reviews,n_sents)
                output = output.to(device_cpu)
                loss = criterion(output,output_labels.float())
                validation_loss += loss.item()
                nearest_class = torch.round(output)

                correct = (nearest_class == output_labels.float()).float()
                correct_count += correct.sum()
            correct_count = int(correct_count)
            current_validation_accuracy = (correct_count/validation_set_size)*100
            current_validation_loss = (1.0* validation_loss)
            if log == True:
                print("Epoch " + str(e) + " " +  "Validation Loss = " + str(current_validation_loss) )
                print("Validation Set Accuracy = " + str((correct_count/validation_set_size)*100) )
                writer.add_scalar(tensorboard_name + ' Validation Accuracy vs Epoch ',(correct_count/validation_set_size*100),e)
                writer.add_scalars(tensorboard_name + 'Validation Loss vs Epoch',{'valid' : current_validation_loss},e)

            
            if log==True:
                if current_validation_loss < best_validation_loss:
                    valdiation_loss_not_decreased_steps = 0
                    torch.save(model.state_dict(),checkpoint_name)
                    best_validation_loss = current_validation_loss
                else:
                    valdiation_loss_not_decreased_steps +=1
        if log == True:
            if valdiation_loss_not_decreased_steps >= PATIENCE_PARAMETER:
                break

In [26]:
torch.cuda.empty_cache()

net = CNNLSTMAttention(EMBED_DIM,CNN_DIM,HIDDEN_DIM)
optimizer= optim.SGD(net.parameters(),lr=0.0054,momentum=0.9,nesterov=True)
scheduler = torch.optim.lr_scheduler.CyclicLR(optimizer, base_lr=0.0054, max_lr=0.0072,step_size_up=10000)
train(net,train_dataloader,valid_dataloader,100,nn.BCELoss(),optimizer,'test_cnn_rnn_att_adam_batch_nrom_cyclelr_bidir_0.0054.pt',device_fast,True,True)

100%|██████████| 2000/2000 [09:00<00:00,  3.70it/s]


Epoch 0 Average Training Loss = 730.8767990171909
Epoch 0 Validation Loss = 135.62008828297257
Validation Set Accuracy = 89.1


100%|██████████| 2000/2000 [08:35<00:00,  3.88it/s]


Epoch 1 Average Training Loss = 480.47654472664
Epoch 1 Validation Loss = 126.14620087854564
Validation Set Accuracy = 89.6875


100%|██████████| 2000/2000 [08:41<00:00,  3.84it/s]


Epoch 2 Average Training Loss = 402.75145566184074
Epoch 2 Validation Loss = 125.95687280595303
Validation Set Accuracy = 90.14999999999999


100%|██████████| 2000/2000 [08:36<00:00,  3.87it/s]


Epoch 3 Average Training Loss = 320.07007088838145
Epoch 3 Validation Loss = 195.673126203008
Validation Set Accuracy = 84.95


  6%|▌         | 111/2000 [00:33<09:28,  3.33it/s]


RuntimeError: CUDA out of memory. Tried to allocate 1.64 GiB (GPU 0; 2.00 GiB total capacity; 157.35 MiB already allocated; 536.18 MiB free; 444.00 MiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [None]:
torch.cuda.empty_cache()
net = EnsembleModel(EMBED_DIM,CNN_DIM,HIDDEN_DIM)
train(net,train_dataloader,valid_dataloader,50,nn.BCELoss(),optim.Adam(net.parameters(),0.001),'cnn_rnn_dnn_adam.pt',device_fast,True,True)

  0%|          | 0/2000 [00:00<?, ?it/s]


RuntimeError: CUDA out of memory. Tried to allocate 46.00 MiB (GPU 0; 2.00 GiB total capacity; 1.64 GiB already allocated; 0 bytes free; 1.70 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [27]:
def test(model_name,test_data,test_lengths,test_labels):
    model = CNNLSTMAttention(EMBED_DIM,CNN_DIM,HIDDEN_DIM)
    model.load_state_dict(torch.load(model_name,map_location=device_cpu))
    model.eval()
    count = 0
    for i in range(len(test_data)):
        ans = model(test_data[i],[test_lengths[i]])
        ans = torch.round(ans)
        if ans[0][0] == test_labels[i]:
            count+=1
    
    print("Accuracy = " + str((count/len(test_data)*100)))


In [28]:
test_word_embeddings = [] 
test_sentence_lengths = []
test_dataset_labels = []  

def getAssignment2TestData():
    test_processed_text = []
    with open("./E0334 Assignment2 Test Dataset.csv",encoding='utf-8') as csvfile:
        csvFile = csv.reader(csvfile)
        next(csvFile)
        for line in csvFile:
            processed_text = preprocess_text(line[0])
            label = 1.0 if line[1] == 'positive' else 0.0
            test_dataset_labels.append(label)
            test_processed_text.append(processed_text)

    for i in range(len(test_processed_text)):
        if SENTENCE_SPLITTING_USED:
            current_embeddings,current_sent_lengths,current_n_sent = review_to_embed(test_processed_text[i]) 
        else:
            current_embeddings,current_n_sent = getWordEmbeddingforText(test_processed_text[i])

        test_word_embeddings.append(current_embeddings.clone().detach().unsqueeze(0))
        test_sentence_lengths.append(current_n_sent)


def getAssignment1TestData():
    
    correct_count = 0

    reviews = open('./TestData','r',encoding='latin-1').readlines()
    for i in range(len(reviews)):
        r = reviews[i]
        reviews[i] = preprocess_text(r)

        if SENTENCE_SPLITTING_USED:
            current_embeddings,current_sent_lengths,current_n_sent = review_to_embed(reviews[i]) 
        else:
            current_embeddings,current_n_sent = getWordEmbeddingforText(reviews[i])
      
        if (i<331):
            test_dataset_labels.append(1.0)
        else:
            test_dataset_labels.append(0.0)
        test_word_embeddings.append(current_embeddings.clone().detach().unsqueeze(0))
        test_sentence_lengths.append(current_n_sent)

getAssignment2TestData()



In [29]:
test('./test_cnn_rnn_att_adam_batch_nrom_cyclelr_bidir_0.0054.pt',test_word_embeddings,test_sentence_lengths,test_dataset_labels)

Accuracy = 89.6989698969897


In [None]:
test('./first_cnn_rnn_att_adam_batch_nrom_cyclelr_bidir_0.0054.pt',test_word_embeddings,test_sentence_lengths,test_dataset_labels)

Accuracy = 80.76923076923077
