In [1]:
import argparse
import sys
sys.path.append("..")

In [2]:
import torch 
#print(torch.__version__)

In [3]:
parser = argparse.ArgumentParser()
parser.add_argument(
    '--embedding', 
    default='tt',
    choices=['tt', 'tr', 'full'],
    type=str)
parser.add_argument('--ranks', type=int, default=8)
parser.add_argument('--d', type=int, default=3)
parser.add_argument('--embed_dim', type=int, default=64)
parser.add_argument('--voc_dim', default=250, type=int)
parser.add_argument('--lr', default=5e-4)
parser.add_argument('--gpu', default='', type=str)
parser.add_argument('--hidden_dim', default=128, type=int)
parser.add_argument('--n_epochs',  default=10, type=int)
parser.add_argument('--fout',  default="logdir/", type=str)
parser.add_argument('--dropout', default=0.5, type=float)
parser.add_argument(
    '--dataset',
    default='TB',
    type=str)
args = parser.parse_args('')

In [4]:
if args.embedding == 'tt':
    tt = "tt"
elif args.embedding == 'tr':
    tt = 'tr'
else:             
    tt = "full"

In [5]:
model_name = f"{args.dataset}-dim_{args.embed_dim}-d_{args.d}-ranks_{args.ranks}-{tt}"

In [6]:
import os
os.environ['CUDA_DEVICE_ORDER']='PCI_BUS_ID'
os.environ['CUDA_VISIBLE_DEVICES']=args.gpu


import utils
import torch
import numpy as np
import torch.nn.functional as F
import torch.nn as nn
import t3nsor as t3
from torchtext import data
from torchtext import datasets
import torch.optim as optim
from models import LSTM_Classifier
import pickle
import random
import spacy
from spacy.cli.download import download
#download(model="en_core_web_sm")
random.seed(42)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
#
#spacy.blank("en")
nlp = spacy.load("en_core_web_sm")

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [7]:
TEXT = data.Field(tokenize='spacy', fix_length=1000)
LABEL = data.LabelField(dtype=torch.float)

In [None]:
OUTPUT_DIM = 1
train_data, test_ = datasets.IMDB.splits(TEXT, LABEL)
test_list = list(test_)
random.shuffle(test_list)
test_data_ = test_list[:12500]
val_data_ = test_list[12500:]
valid_data = data.dataset.Dataset(val_data_, fields=[('text', TEXT), ('label', LABEL)])
test_data = data.dataset.Dataset(test_data_, fields=[('text', TEXT), ('label', LABEL)])

In [None]:
def sort_key(ex):
    return len(ex.text)

In [None]:
TEXT.build_vocab(train_data, max_size=args.voc_dim - 2)
LABEL.build_vocab(train_data)

BATCH_SIZE = 64

In [None]:
train_iterator, valid_iterator, test_iterator = data.BucketIterator.splits(
    (train_data, valid_data, test_data),
    batch_size=BATCH_SIZE,
    device=device)

valid_iterator.sort_key = sort_key
test_iterator.sort_key = sort_key

In [None]:
INPUT_DIM = len(TEXT.vocab)
EMBEDDING_DIM = args.embed_dim
HIDDEN_DIM = args.hidden_dim
N_LAYERS = 2
BIDIRECTIONAL = True
DROPOUT = args.dropout

In [None]:
actual_vocab_size = len(TEXT.vocab.stoi)

In [None]:
lstm_model = LSTM_Classifier(embedding_dim=EMBEDDING_DIM,
                             hidden_dim=HIDDEN_DIM,
                             output_dim=OUTPUT_DIM,
                             n_layers=N_LAYERS,
                             bidirectional=BIDIRECTIONAL,
                             dropout=DROPOUT)

In [None]:
if args.embedding == 'tt':
        embed_model = t3.TTEmbedding(
            voc_size=INPUT_DIM,
            emb_size=EMBEDDING_DIM,
            auto_shapes=True,
            auto_shape_mode='mixed',
            d=args.d,
            tt_rank=args.ranks,
            padding_idx=1
        )
        compression_rate = INPUT_DIM * EMBEDDING_DIM / embed_model.tt_matrix.dof
elif args.embedding == 'tr':
        embed_model = t3.TREmbedding(
            voc_size=INPUT_DIM,
            emb_size=EMBEDDING_DIM,
            auto_shapes=True,
            auto_shape_mode='mixed',
            d=args.d,
            tr_rank=args.ranks,
            padding_idx=1
        )
        compression_rate = INPUT_DIM * EMBEDDING_DIM / embed_model.tr_matrix.dof
else:
    embed_model = nn.Embedding(
        num_embeddings=INPUT_DIM,
        embedding_dim=EMBEDDING_DIM
    )
    compression_rate = 1.0


def cross_entropy_loss(logits, target):
    labels = target.type(torch.LongTensor).to(logits.device)
    return nn.CrossEntropyLoss()(logits, labels)

In [None]:
model = nn.Sequential(embed_model, lstm_model)

In [None]:
if args.dataset == 'imdb':
    criterion = nn.BCEWithLogitsLoss()
    #criterion = criterion.to(device)
elif args.dataset[:3] == 'sst':
    criterion = nn.CrossEntropyLoss()
    #criterion = criterion.to(device)
else:
    raise NotImplementedError

In [None]:
model = model.to(device)
optimizer = optim.Adam(model.parameters(), lr=1e-3)
print(model)
N_EPOCHS = args.n_epochs

In [None]:
log = {
    'compression_rate':compression_rate,
    'train_loss':[], 'test_loss':[], 'valid_loss':[],
    'train_acc':[], 'test_acc':[], 'valid_acc':[]}
best_result = {
    "epoch": 0, "train_acc": 0, "valid_acc": 0, "train_acc": 0}

In [None]:
import torch
import torch.nn as nn
import subprocess
import pandas as pd
import pickle

def binary_accuracy(preds, y):
    """
    Returns accuracy per batch, i.e. if you get 8/10 right, this returns 0.8, NOT 8
    """

    #round predictions to the closest integer
    if len(preds.shape) == 1:
        rounded_preds = torch.round(torch.sigmoid(preds))
    else:
        rounded_preds = preds.argmax(1)
    correct = (rounded_preds == y).float() #convert into float for division
    acc = correct.sum()/len(correct)
    return acc

In [None]:
def train(model, iterator, optimizer, criterion):

    epoch_loss = 0
    epoch_acc = 0
    total_len = 0

    model.train()
    
    if isinstance(criterion, nn.CrossEntropyLoss):
        dtype = torch.LongTensor
    elif isinstance(criterion, nn.BCEWithLogitsLoss):
        dtype = torch.FloatTensor

    for i, batch in enumerate(iterator):

        optimizer.zero_grad()
        device = batch.text.device
        labels = batch.label.type(dtype).to(device)
        predictions = model(batch.text).squeeze(1)
        loss = criterion(predictions, labels)
        acc = binary_accuracy(predictions, labels)
        loss.backward()
        optimizer.step()

        B = batch.label.shape[0]

        epoch_loss += B * loss.item()
        epoch_acc += B * acc.item()

        total_len += B


        if i > len(iterator):
            break

    return epoch_loss / total_len, epoch_acc / total_len

In [None]:
def evaluate(model, iterator, criterion):

    epoch_loss = 0
    epoch_acc = 0
    total_len = 0

    model.eval()
    
    if isinstance(criterion, nn.CrossEntropyLoss):
        dtype = torch.LongTensor
    elif isinstance(criterion, nn.BCEWithLogitsLoss):
        dtype = torch.FloatTensor

    with torch.no_grad():

        for i, batch in enumerate(iterator):
            
            device = batch.text.device
            labels = batch.label.type(dtype).to(device)
            predictions = model(batch.text).squeeze(1)

            loss = criterion(predictions, labels)

            acc = binary_accuracy(predictions, labels)
            B = batch.label.shape[0]

            epoch_loss += B * loss.item()
            epoch_acc += B * acc.item()
            total_len += B

            if i > len(iterator):
                break

    return epoch_loss / total_len, epoch_acc / total_len

In [None]:
for epoch in range(N_EPOCHS):

    train_loss, train_acc = train(model, train_iterator, optimizer, criterion)
    test_loss, test_acc = evaluate(model, test_iterator, criterion)
    valid_loss, valid_acc = evaluate(model, valid_iterator, criterion)

    log['train_loss'].append(train_loss)
    log['test_loss'].append(test_loss)
    log['train_acc'].append(train_acc)
    log['test_acc'].append(test_acc)
    log['valid_acc'].append(valid_acc)
    log['valid_loss'].append(valid_loss)

    if best_result["valid_acc"] < valid_acc:
        best_result["epoch"] = epoch
        best_result["train_acc"] = train_acc
        best_result["valid_acc"] = valid_acc
        best_result["test_acc"] = test_acc

    #if args.fout is not None:
    #    with open(args.fout+f"{model_name}-best.pkl", 'wb') as f:
    #        pickle.dump(best_result, f)
    print(f'| Epoch: {epoch+1:.2} | Train Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}% | Val. Loss: {valid_loss:.3f} | Val. Acc: {valid_acc*100:.2f}% | Test Loss: {test_loss:.3f} | Test Acc: {test_acc*100:.2f}% |')
    print ("TEST ACCURACY:", np.round(best_result["test_acc"] * 100, 2))