In [14]:
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import time
cuda = torch.cuda.is_available()

BATCH_SIZE = 64

In [15]:
# from google.colab import drive
# drive.mount("/content/drive")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [16]:
#TODO - replace all the following parameters
train_X_path = "/content/drive/My Drive/train_X.npy"
train_Y_path = '/content/drive/My Drive/train_Y.npy'
test_X_path = '/content/drive/My Drive/test_X.npy'
train_results_path = "/content/drive/My Drive/train_results"

#loss_functions: XEntropy, MSE
criterion = nn.CrossEntropyLoss()

#optimizer: Adam, SGD, SGD+momentum ; lr, weight_decay
optimizer = optim.Adam(model.parameters(), lr = 3e-4, weight_decay=5e-6)


In [17]:
# len(train_X) = 154684
train_X = np.load(train_X_path, allow_pickle = True)[:110000]
train_Y = np.load(train_Y_path, allow_pickle = True)[:110000]
val_X = np.load(train_X_path, allow_pickle = True)[110000:]
val_Y = np.load(train_Y_path, allow_pickle = True)[110000:]
test_X = np.load(test_X_path, allow_pickle = True)

In [18]:
# custom Dataset class
class MovieReviewsData(Dataset):
    def __init__(self, X, Y = None):
        self.maxlen = max(len(x) for x in X)
        self.X = [F.pad(torch.FloatTensor(x), (0, 0, 0, self.maxlen - len(x))) for x in X]
        if Y is not None:
            self.Y = torch.LongTensor(Y)
        else:
            self.Y = None
    
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        if self.Y is not None:
            return self.X[idx], self.Y[idx]
        else:
            return self.X[idx]

In [19]:
train_dataset = MovieReviewsData(train_X, train_Y)
val_dataset = MovieReviewsData(val_X, val_Y)
test_dataset = MovieReviewsData(test_X)
train_loader = DataLoader(train_dataset, shuffle = True, batch_size = BATCH_SIZE)
val_loader = DataLoader(val_dataset, shuffle = False, batch_size = BATCH_SIZE)
test_loader = DataLoader(test_dataset, shuffle = False, batch_size = BATCH_SIZE)

In [20]:
class Network(nn.Module):
    def __init__(self, input_channel, out_channel, kernel_sizes, output_dim):
        super().__init__()
        self.convs = nn.ModuleList([
                                    nn.Conv1d(in_channels = input_channel, 
                                              out_channels = out_channel, 
                                              kernel_size = ks)
                                    for ks in kernel_sizes
                                    ])
        
        self.linear = nn.Linear(len(kernel_sizes) * out_channel, output_dim)
        self.dropout = nn.Dropout(0.5)
        
    def forward(self, embedded):     
        embedded = embedded.permute(0, 2, 1)       
        conved = [F.relu(conv(embedded)) for conv in self.convs]
        pooled = [F.max_pool1d(conv, conv.shape[2]).squeeze(2) for conv in conved]
        cat = self.dropout(torch.cat(pooled, dim = 1))
        return self.linear(cat)

In [21]:
input_channel = 300
out_channel = 100
#[3,4,5]
kernel_sizes = [3,4,5]
output_dim = 5
model = Network(input_channel, out_channel, kernel_sizes, output_dim)

In [22]:
device = torch.device("cuda" if cuda else "cpu")
model.to(device)

Network(
  (convs): ModuleList(
    (0): Conv1d(300, 100, kernel_size=(3,), stride=(1,))
    (1): Conv1d(300, 100, kernel_size=(4,), stride=(1,))
    (2): Conv1d(300, 100, kernel_size=(5,), stride=(1,))
  )
  (linear): Linear(in_features=300, out_features=5, bias=True)
  (dropout): Dropout(p=0.5, inplace=False)
)

In [23]:
def train_epoch(model, train_loader, criterion, optimizer):
    model.train()

    running_loss = 0.0
    
    start_time = time.time()
    for batch_idx, (X, Y) in enumerate(train_loader):   
        optimizer.zero_grad()   
        X = X.to(device)
        Y = Y.to(device) 

        outputs = model(X)
        loss = criterion(outputs, Y)
        running_loss += loss.item()

        loss.backward()
        optimizer.step()
    
    end_time = time.time()
    
    running_loss /= len(train_loader)
    print('Training Loss: ', running_loss, 'Time: ',end_time - start_time, 's')
    return running_loss

def test_model(model, test_loader, criterion):
    with torch.no_grad():
        model.eval()

        running_loss = 0.0
        total_predictions = 0.0
        correct_predictions = 0.0

        for batch_idx, (X, Y) in enumerate(test_loader):   
            X = X.to(device)
            Y = Y.to(device) 

            outputs = model(X)
            _, predicted = torch.max(outputs.data, 1)
            total_predictions += Y.size(0)
            correct_predictions += (predicted == Y).sum().item()

            loss = criterion(outputs, Y)
            running_loss += loss.item()


        running_loss /= len(test_loader)
        acc = (correct_predictions/total_predictions)*100.0
        print('Testing Loss: ', running_loss)
        print('Testing Accuracy: ', acc, '%')
        return running_loss, acc

#TODO: predict requires tokenize input sentence and embeddings

# def predict(model, test_loader, criterion):
#     with torch.no_grad():
#         model.eval()

#         for batch_idx, (X, Y) in enumerate(test_loader):   
#             X = X.to(device)

#             outputs = model(X)
#             _, predicted = torch.max(outputs.data, 1)

#             loss = criterion(outputs, Y)
#             running_loss += loss.item()


#         running_loss /= len(test_loader)
#         acc = (correct_predictions/total_predictions)*100.0
#         print('Testing Loss: ', running_loss)
#         print('Testing Accuracy: ', acc, '%')
#         return running_loss, acc

In [31]:
n_epochs = 40
Train_loss = []
Train_acc = []
Test_loss = []
Test_acc = []

for i in range(n_epochs):
    print("epoch" + str(i+1))
    train_epoch(model, train_loader, criterion, optimizer)
    train_loss, train_acc = test_model(model, train_loader, criterion)
    test_loss, test_acc = test_model(model, val_loader, criterion)
    Train_loss.append(train_loss)
    Train_acc.append(train_acc)
    Test_loss.append(test_loss)
    Test_acc.append(test_acc)
    print('='*20)

#save intermediate results
torch.save({
        'train_loss': Train_loss,
        'train_acc': Train_acc,
        'test_loss' : Test_loss,
        'test_acc' : Test_acc,
      }, train_results_path)

#read intermediate results
train_results = torch.load(train_results_path)
train_acc =  train_results["train_acc"]
print(train_acc)

[1.6413330933325092, 1.6413362220389125, 1.641331154846604, 1.6413342099747317, 1.6413343142048435]
