In [1]:
%load_ext autoreload 
%autoreload 2

In [2]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"  
os.environ["CUDA_VISIBLE_DEVICES"]="0"

In [3]:
import torch 
import numpy as np

In [4]:
import torch.nn as nn

In [5]:
import tttorch as t3

In [18]:
class TTEmbedding(nn.Module):
    def __init__(self, num_embeddings, embedding_dim, tt_rank):
        super(TTEmbedding, self).__init__()
        self.num_embeddings = num_embeddings
        self.embedding_dim = embedding_dim
        self.tt_matrix = t3.random_matrix([num_embeddings, embedding_dim], tt_rank=tt_rank, is_params=True)
        self.parameters = nn.ParameterList(self.tt_matrix.tt_cores)
        
    def forward(self, x):
        batch_size = x.shape[0]
        sent_size = x.shape[1]
        
        
        x = x.contiguous().view(-1)
        

        
        x_ind = t3.ind2sub(self.num_embeddings, x).long()
        
        x_ind = x_ind.flip(1)
        
        #print(x_ind)
        
        rows = t3.gather_rows(self.tt_matrix, x_ind).full()       
        rows = rows.view(batch_size, sent_size, -1)
        return rows
    
#     def to(self, device):
#         super(TTEmbedding, self).to(device)
#         self.tt_matrix = self.tt_matrix.to(device)
        

In [7]:
from torchtext import data

SEED = 1234

torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

TEXT = data.Field(tokenize='spacy')
LABEL = data.LabelField(dtype=torch.float)

  return f(*args, **kwds)
  return f(*args, **kwds)
  return f(*args, **kwds)
  return f(*args, **kwds)


In [8]:

from torchtext import datasets

train_data, test_data = datasets.IMDB.splits(TEXT, LABEL)

In [9]:
import random

train_data, valid_data = train_data.split(random_state=random.seed(SEED))

In [10]:
TEXT.build_vocab(train_data, max_size=24998)
LABEL.build_vocab(train_data)

In [11]:
BATCH_SIZE = 64

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

train_iterator, valid_iterator, test_iterator = data.BucketIterator.splits(
    (train_data, valid_data, test_data), 
    batch_size=BATCH_SIZE,
    device=device)

In [12]:
import torch.nn as nn

class RNN(nn.Module):
    def __init__(self, input_dim, embedding_dim, hidden_dim, output_dim):
        super().__init__()
        
        #self.embedding = nn.Embedding(input_dim, embedding_dim)
        self.rnn = nn.LSTM(embedding_dim, hidden_dim)
        self.fc = nn.Linear(hidden_dim, output_dim)
        
        
    def forward(self, x):

        #x = [sent len, batch size]
        
        #embedded = self.embedding(x)
        
        #embedded = [sent len, batch size, emb dim]
        
        output, (hidden, cell) = self.rnn(x)
        
        #output = [sent len, batch size, hid dim]
        #hidden = [1, batch size, hid dim]
        
        #assert torch.equal(output[-1,:,:], hidden.squeeze(0))
        
        #hidden = torch.cat((hidden[-2, :, :], hidden[-1, :, :]), dim=1)
        
        return self.fc(hidden.squeeze(0))

In [21]:
INPUT_DIM = len(TEXT.vocab)
EMBEDDING_DIM = 100
HIDDEN_DIM = 256
OUTPUT_DIM = 1

model = RNN(INPUT_DIM, EMBEDDING_DIM, HIDDEN_DIM, OUTPUT_DIM).to(device)

In [22]:
ttemb = TTEmbedding([5, 5, 5, 5, 5, 2, 2, 2], [2, 5, 2, 5, 1, 1, 1, 1], tt_rank=4).to(device)

In [23]:
import torch.optim as optim

optimizer = optim.Adam(list(model.parameters()) + list(ttemb.parameters()))

In [24]:
criterion = nn.BCEWithLogitsLoss()

In [25]:
#ttemb = TTEmbedding([5, 5, 5, 5, 5, 2, 2, 2], [2, 5, 2, 5, 1, 1, 1, 1], tt_rank=4).to(device)
#model = model.to(device)
criterion = criterion.to(device)

In [26]:
def binary_accuracy(preds, y):
    """
    Returns accuracy per batch, i.e. if you get 8/10 right, this returns 0.8, NOT 8
    """

    #round predictions to the closest integer
    rounded_preds = torch.round(torch.sigmoid(preds))
    correct = (rounded_preds == y).float() #convert into float for division 
    acc = correct.sum()/len(correct)
    return acc

In [27]:
def train(model, iterator, optimizer, criterion):
    
    epoch_loss = 0
    epoch_acc = 0
    
    model.train()
    
    
    for i, batch in enumerate(iterator):
        
        optimizer.zero_grad()
                
        emb = ttemb(batch.text.permute(1, 0)).permute(1, 0, 2)
        predictions = model(emb).squeeze(1)
        
        loss = criterion(predictions, batch.label)
        
        acc = binary_accuracy(predictions, batch.label)
        
        loss.backward()
        
        optimizer.step()
        
        epoch_loss += loss.item()
        epoch_acc += acc.item()
        
        if i > len(iterator):
            break
        
        
    return epoch_loss / len(iterator), epoch_acc / len(iterator)

In [28]:
def evaluate(model, iterator, criterion):
    
    epoch_loss = 0
    epoch_acc = 0
    
    model.eval()
    
    with torch.no_grad():
    
        for i, batch in enumerate(iterator):
            
            emb = ttemb(batch.text.permute(1, 0)).permute(1, 0, 2)
            predictions = model(emb).squeeze(1)
            
            loss = criterion(predictions, batch.label)
            
            acc = binary_accuracy(predictions, batch.label)

            epoch_loss += loss.item()
            epoch_acc += acc.item()
            
            if i > len(iterator):
                break
        
    return epoch_loss / len(iterator), epoch_acc / len(iterator)

In [None]:
N_EPOCHS = 5

for epoch in range(N_EPOCHS):

    train_loss, train_acc = train(model, train_iterator, optimizer, criterion)
    valid_loss, valid_acc = evaluate(model, valid_iterator, criterion)
    
    print(f'| Epoch: {epoch+1:02} | Train Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}% | Val. Loss: {valid_loss:.3f} | Val. Acc: {valid_acc*100:.2f}% |')

| Epoch: 01 | Train Loss: 0.699 | Train Acc: 50.81% | Val. Loss: 0.693 | Val. Acc: 48.89% |


In [28]:
class TTRNN(nn.Module):
    def __init__(self, input_dim, embedding_dim, hidden_dim, output_dim):
        super().__init__()
        
        self.embedding = TTEmbedding([5, 5, 5, 5, 5, 2, 2, 2], [2, 5, 2, 5, 1, 1, 1, 1], tt_rank=4)
        self.rnn = nn.LSTM(embedding_dim, hidden_dim)
        self.fc = nn.Linear(hidden_dim, output_dim)
        
    def forward(self, x):

        #x = [sent len, batch size]
        
        embedded = self.embedding(x.permute(1, 0)).permute(1, 0, 2)
        
        print(embedded)
        
        #embedded = [sent len, batch size, emb dim]
        
        output, (hidden, cell) = self.rnn(embedded)
        
        #output = [sent len, batch size, hid dim]
        #hidden = [1, batch size, hid dim]
        
        #assert torch.equal(output[-1,:,:], hidden.squeeze(0))
        
        return self.fc(hidden.squeeze(0))
    
#     def to(self, device):
#         super().to(device)
#         #self.embedding.to(device)

In [29]:
model_tt = TTRNN(INPUT_DIM, EMBEDDING_DIM, HIDDEN_DIM, OUTPUT_DIM)

In [30]:
model_tt.to(device)

TTRNN(
  (embedding): TTEmbedding(
    (parameters): ParameterList(
        (0): Parameter containing: [torch.cuda.FloatTensor of size 1x5x2x4 (GPU 0)]
        (1): Parameter containing: [torch.cuda.FloatTensor of size 4x5x5x4 (GPU 0)]
        (2): Parameter containing: [torch.cuda.FloatTensor of size 4x5x2x4 (GPU 0)]
        (3): Parameter containing: [torch.cuda.FloatTensor of size 4x5x5x4 (GPU 0)]
        (4): Parameter containing: [torch.cuda.FloatTensor of size 4x5x1x4 (GPU 0)]
        (5): Parameter containing: [torch.cuda.FloatTensor of size 4x2x1x4 (GPU 0)]
        (6): Parameter containing: [torch.cuda.FloatTensor of size 4x2x1x4 (GPU 0)]
        (7): Parameter containing: [torch.cuda.FloatTensor of size 4x2x1x1 (GPU 0)]
    )
  )
  (rnn): LSTM(100, 256)
  (fc): Linear(in_features=256, out_features=1, bias=True)
)

In [31]:
optimizer = optim.Adam(model_tt.parameters())

In [41]:
#model_tt.embedding.to(device)

In [42]:
N_EPOCHS = 5

for epoch in range(N_EPOCHS):

    train_loss, train_acc = train(model_tt, train_iterator, optimizer, criterion)
    valid_loss, valid_acc = evaluate(model_tt, valid_iterator, criterion)
    
    print(f'| Epoch: {epoch+1:02} | Train Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}% | Val. Loss: {valid_loss:.3f} | Val. Acc: {valid_acc*100:.2f}% |')

RuntimeError: cuda runtime error (59) : device-side assert triggered at ../aten/src/THC/generic/THCTensorCopy.cpp:70

In [32]:
for batch in train_iterator:
    break

In [20]:
model_tt.embedding.forward

SyntaxError: invalid syntax (<ipython-input-20-01b379a06da9>, line 1)

In [37]:
stuff = batch.text.permute(1, 0)

In [40]:
model_tt.embedding.forward(stuff)

RuntimeError: cuda runtime error (59) : device-side assert triggered at ../aten/src/THC/generic/THCTensorCopy.cpp:70