In [1]:
%load_ext autoreload 
%autoreload 2

In [2]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"  
os.environ["CUDA_VISIBLE_DEVICES"]="0"

In [3]:
import torch 
import numpy as np

In [4]:
import torch.nn as nn

In [5]:
import t3nsor as t3

In [9]:
class TTEmbedding(nn.Module):
    def __init__(self, num_embeddings, embedding_dim, tt_rank):
        super(TTEmbedding, self).__init__()
        self.num_embeddings = num_embeddings
        self.embedding_dim = embedding_dim
        self.tt_matrix = t3.random_matrix([num_embeddings, embedding_dim], stddev=0.1, tt_rank=tt_rank, is_params=True)
        self.parameters = nn.ParameterList(self.tt_matrix.tt_cores)
        
    def forward(self, x):
        
        x = x.permute(1, 0)
        
        batch_size = x.shape[0]
        sent_size = x.shape[1]
        
        
        x = x.contiguous().view(-1)
        

        
        x_ind = t3.ind2sub(self.num_embeddings, x).long()
        
        x_ind = x_ind.flip(1)
        
        #print(x_ind)
        
        rows = t3.gather_rows(self.tt_matrix, x_ind).full()       
        rows = rows.view(batch_size, sent_size, -1)
        return rows.permute(1, 0, 2)
    
#     def to(self, device):
#         super(TTEmbedding, self).to(device)
#         self.tt_matrix = self.tt_matrix.to(device)
        

In [44]:
from torchtext import data

SEED = 1234

torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

TEXT = data.Field(tokenize='spacy', fix_length=500)
LABEL = data.LabelField(dtype=torch.float)

In [45]:

from torchtext import datasets

train_data, test_data = datasets.IMDB.splits(TEXT, LABEL)

In [46]:
import random

train_data, valid_data = train_data.split(random_state=random.seed(SEED))

In [47]:
TEXT.build_vocab(train_data, max_size=24998)
LABEL.build_vocab(train_data)

In [48]:
BATCH_SIZE = 64

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

train_iterator, valid_iterator, test_iterator = data.BucketIterator.splits(
    (train_data, valid_data, test_data), 
    batch_size=BATCH_SIZE,
    device=device)

In [68]:
n = np.array([5, 5, 5, 5, 5, 2, 2, 2])
m = np.array([2, 2, 2, 2, 2, 1, 1, 1])

In [72]:
25000 * 32 

800000

In [49]:
import torch.nn as nn

class RNN(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim, n_layers, bidirectional, dropout):
        super().__init__()
        
        self.embedding = TTEmbedding([5, 5, 5, 5, 5, 2, 2, 2], [2, 2, 2, 2, 2, 1, 1, 1], tt_rank=8).to(device)
        self.rnn = nn.LSTM(embedding_dim, hidden_dim, num_layers=n_layers, bidirectional=bidirectional, dropout=dropout)
        self.fc = nn.Linear(hidden_dim*2, output_dim)
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, x):
        
        #x = [sent len, batch size]
        
        embedded = self.dropout(self.embedding(x))
        
        #embedded = [sent len, batch size, emb dim]
        
        output, (hidden, cell) = self.rnn(embedded)
        
        #output = [sent len, batch size, hid dim * num directions]
        #hidden = [num layers * num directions, batch size, hid dim]
        #cell = [num layers * num directions, batch size, hid dim]
        
        #concat the final forward (hidden[-2,:,:]) and backward (hidden[-1,:,:]) hidden layers
        #and apply dropout
        
        hidden = self.dropout(torch.cat((hidden[-2,:,:], hidden[-1,:,:]), dim=1))
                
        #hidden = [batch size, hid dim * num directions]
            
        return self.fc(hidden.squeeze(0))

In [50]:
INPUT_DIM = len(TEXT.vocab)
EMBEDDING_DIM = 32
HIDDEN_DIM = 256
OUTPUT_DIM = 1
N_LAYERS = 2
BIDIRECTIONAL = True
DROPOUT = 0.5

model = RNN(INPUT_DIM, EMBEDDING_DIM, HIDDEN_DIM, OUTPUT_DIM, N_LAYERS, BIDIRECTIONAL, DROPOUT)

In [51]:
import torch.optim as optim

optimizer = optim.Adam(model.parameters())

In [52]:
criterion = nn.BCEWithLogitsLoss()

In [53]:
model = model.to(device)
criterion = criterion.to(device)

In [54]:
def binary_accuracy(preds, y):
    """
    Returns accuracy per batch, i.e. if you get 8/10 right, this returns 0.8, NOT 8
    """

    #round predictions to the closest integer
    rounded_preds = torch.round(torch.sigmoid(preds))
    correct = (rounded_preds == y).float() #convert into float for division 
    acc = correct.sum()/len(correct)
    return acc

In [55]:
def train(model, iterator, optimizer, criterion):
    
    epoch_loss = 0
    epoch_acc = 0
    
    model.train()
    
    
    for i, batch in enumerate(iterator):
        
        optimizer.zero_grad()
                

        predictions = model(batch.text).squeeze(1)
        
        loss = criterion(predictions, batch.label)
        
        acc = binary_accuracy(predictions, batch.label)
        
        loss.backward()
        
        optimizer.step()
        
        epoch_loss += loss.item()
        epoch_acc += acc.item()
        
        if i > len(iterator):
            break
        
        
    return epoch_loss / len(iterator), epoch_acc / len(iterator)

In [56]:
def evaluate(model, iterator, criterion):
    
    epoch_loss = 0
    epoch_acc = 0
    
    model.eval()
    
    with torch.no_grad():
    
        for i, batch in enumerate(iterator):
            

            predictions = model(batch.text).squeeze(1)
            
            loss = criterion(predictions, batch.label)
            
            acc = binary_accuracy(predictions, batch.label)

            epoch_loss += loss.item()
            epoch_acc += acc.item()
            
            if i > len(iterator):
                break
        
    return epoch_loss / len(iterator), epoch_acc / len(iterator)

In [65]:
N_EPOCHS = 10

for epoch in range(N_EPOCHS):

    train_loss, train_acc = train(model, train_iterator, optimizer, criterion)
    valid_loss, valid_acc = evaluate(model, valid_iterator, criterion)
    
    print(f'| Epoch: {epoch+1:02} | Train Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}% | Val. Loss: {valid_loss:.3f} | Val. Acc: {valid_acc*100:.2f}% |')

| Epoch: 01 | Train Loss: 0.454 | Train Acc: 80.16% | Val. Loss: 0.409 | Val. Acc: 81.59% |
| Epoch: 02 | Train Loss: 0.390 | Train Acc: 83.66% | Val. Loss: 0.357 | Val. Acc: 84.20% |
| Epoch: 03 | Train Loss: 0.355 | Train Acc: 85.95% | Val. Loss: 0.329 | Val. Acc: 85.69% |
| Epoch: 04 | Train Loss: 0.320 | Train Acc: 87.17% | Val. Loss: 0.332 | Val. Acc: 86.73% |
| Epoch: 05 | Train Loss: 0.301 | Train Acc: 88.38% | Val. Loss: 0.325 | Val. Acc: 86.30% |


KeyboardInterrupt: 

In [73]:
import torch.nn as nn

class plainRNN(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim, n_layers, bidirectional, dropout):
        super().__init__()
        
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.rnn = nn.LSTM(embedding_dim, hidden_dim, num_layers=n_layers, bidirectional=bidirectional, dropout=dropout)
        self.fc = nn.Linear(hidden_dim*2, output_dim)
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, x):
        
        #x = [sent len, batch size]
        
        embedded = self.dropout(self.embedding(x))
        
        #embedded = [sent len, batch size, emb dim]
        
        output, (hidden, cell) = self.rnn(embedded)
        
        #output = [sent len, batch size, hid dim * num directions]
        #hidden = [num layers * num directions, batch size, hid dim]
        #cell = [num layers * num directions, batch size, hid dim]
        
        #concat the final forward (hidden[-2,:,:]) and backward (hidden[-1,:,:]) hidden layers
        #and apply dropout
        
        hidden = self.dropout(torch.cat((hidden[-2,:,:], hidden[-1,:,:]), dim=1))
                
        #hidden = [batch size, hid dim * num directions]
            
        return self.fc(hidden.squeeze(0))

In [74]:
model = plainRNN(INPUT_DIM, EMBEDDING_DIM, HIDDEN_DIM, OUTPUT_DIM, N_LAYERS, BIDIRECTIONAL, DROPOUT)

In [75]:
optimizer = optim.Adam(model.parameters())

In [76]:
model = model.to(device)

In [77]:
#model_tt.embedding.to(device)

In [78]:
N_EPOCHS = 20

for epoch in range(N_EPOCHS):

    train_loss, train_acc = train(model, train_iterator, optimizer, criterion)
    valid_loss, valid_acc = evaluate(model, valid_iterator, criterion)
    
    print(f'| Epoch: {epoch+1:02} | Train Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}% | Val. Loss: {valid_loss:.3f} | Val. Acc: {valid_acc*100:.2f}% |')

| Epoch: 01 | Train Loss: 0.696 | Train Acc: 53.85% | Val. Loss: 0.694 | Val. Acc: 49.55% |
| Epoch: 02 | Train Loss: 0.693 | Train Acc: 54.69% | Val. Loss: 0.689 | Val. Acc: 55.35% |
| Epoch: 03 | Train Loss: 0.687 | Train Acc: 56.66% | Val. Loss: 0.634 | Val. Acc: 64.59% |
| Epoch: 04 | Train Loss: 0.653 | Train Acc: 63.19% | Val. Loss: 0.643 | Val. Acc: 63.12% |
| Epoch: 05 | Train Loss: 0.637 | Train Acc: 64.38% | Val. Loss: 0.592 | Val. Acc: 68.40% |
| Epoch: 06 | Train Loss: 0.590 | Train Acc: 69.94% | Val. Loss: 0.612 | Val. Acc: 66.68% |
| Epoch: 07 | Train Loss: 0.605 | Train Acc: 68.62% | Val. Loss: 0.515 | Val. Acc: 75.90% |
| Epoch: 08 | Train Loss: 0.546 | Train Acc: 73.42% | Val. Loss: 0.483 | Val. Acc: 77.10% |
| Epoch: 09 | Train Loss: 0.493 | Train Acc: 77.95% | Val. Loss: 0.451 | Val. Acc: 80.05% |
| Epoch: 10 | Train Loss: 0.464 | Train Acc: 80.10% | Val. Loss: 0.416 | Val. Acc: 81.30% |
| Epoch: 11 | Train Loss: 0.433 | Train Acc: 81.14% | Val. Loss: 0.404 | Val. Ac

In [32]:
for batch in train_iterator:
    break

In [20]:
model_tt.embedding.forward

SyntaxError: invalid syntax (<ipython-input-20-01b379a06da9>, line 1)

In [37]:
stuff = batch.text.permute(1, 0)

In [40]:
model_tt.embedding.forward(stuff)

RuntimeError: cuda runtime error (59) : device-side assert triggered at ../aten/src/THC/generic/THCTensorCopy.cpp:70