In [1]:
import torch
from torch import nn, Tensor
from torch.nn import TransformerEncoder, TransformerEncoderLayer
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader, dataset
import numpy as np
from matplotlib import pyplot
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from nltk.corpus import stopwords
from nltk import word_tokenize
import nltk
from data_loading_code import preprocess_pandas
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, classification_report

import math
import os
from tempfile import TemporaryDirectory
from typing import Tuple

import time


device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("device is", device)

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


device is cpu


In [6]:
class TransformerModel(nn.Module):

    def __init__(self, ntoken: int, d_model: int, nhead: int, d_hid: int,
                 nlayers: int, dropout: float = 0.5):
        super().__init__()
        self.model_type = 'Transformer'
        self.pos_encoder = PositionalEncoding(d_model, dropout)
        encoder_layers = TransformerEncoderLayer(d_model, nhead, d_hid, dropout)
        self.transformer_encoder = TransformerEncoder(encoder_layers, nlayers)
        self.embedding = nn.Embedding(ntoken, d_model)
        self.d_model = d_model
        self.linear = nn.Linear(d_model, ntoken)

        self.init_weights()

    def init_weights(self) -> None:
        initrange = 0.1
        self.embedding.weight.data.uniform_(-initrange, initrange)
        self.linear.bias.data.zero_()
        self.linear.weight.data.uniform_(-initrange, initrange)

    def forward(self, src: Tensor, src_mask: Tensor = None) -> Tensor:
        """
        Arguments:
            src: Tensor, shape ``[seq_len, batch_size]``
            src_mask: Tensor, shape ``[seq_len, seq_len]``

        Returns:
            output Tensor of shape ``[seq_len, batch_size, ntoken]``
        """
        src = self.embedding(src) * math.sqrt(self.d_model)
        src = self.pos_encoder(src)
        if src_mask is None:
            """Generate a square causal mask for the sequence. The masked positions are filled with float('-inf').
            Unmasked positions are filled with float(0.0).
            """
            src_mask = nn.Transformer.generate_square_subsequent_mask(len(src)).to(device)
        output = self.transformer_encoder(src, src_mask)
        output = self.linear(output)
        return output

class PositionalEncoding(nn.Module):

    def __init__(self, d_model: int, dropout: float = 0.1, max_len: int = 5000):
        super().__init__()
        self.dropout = nn.Dropout(p=dropout)

        position = torch.arange(max_len).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2) * (-math.log(10000.0) / d_model))
        pe = torch.zeros(max_len, 1, d_model)
        pe[:, 0, 0::2] = torch.sin(position * div_term)
        pe[:, 0, 1::2] = torch.cos(position * div_term)
        self.register_buffer('pe', pe)

    def forward(self, x: Tensor) -> Tensor:
        """
        Arguments:
            x: Tensor, shape ``[seq_len, batch_size, embedding_dim]``
        """
        x = x + self.pe[:x.size(0)]
        return self.dropout(x)

In [15]:
from torchtext.datasets import WikiText2
from torchtext.data.utils import get_tokenizer
from torchtext.vocab import build_vocab_from_iterator       
import copy     # pre-process

data = pd.read_csv("amazon_cells_labelled.txt", delimiter='\t', header=None)
data.columns = ['Sentence', 'Class']
data['index'] = data.index                                          # add new column index
columns = ['index', 'Class', 'Sentence']
data = preprocess_pandas(data, columns)   

#train_iter, val_iter, training_labels, validation_labels = train_test_split( # split the data into training, validation, and test splits
#    data['Sentence'].values.astype('U'),
#    data['Class'].values.astype('int32'),
#    test_size=0.10,
#    random_state=0,
#    shuffle=True
#)

train_iter, val_iter, test_iter = \
              np.split(data.sample(frac=1, random_state=42), 
                       [int(.6*len(data)), int(.8*len(data))])

train_iter2 = copy.deepcopy(train_iter)

tokenizer = get_tokenizer('basic_english')
vocab = build_vocab_from_iterator(map(tokenizer, train_iter2["Sentence"]), specials=['<unk>'])
vocab.set_default_index(vocab['<unk>'])

#nltk.download('punkt')
#nltk.download('stopwords')
# get data, pre-process and split              

def data_process(raw_text_iter: dataset.IterableDataset) -> Tensor:
    """Converts raw text into a flat Tensor."""
    data2 = [torch.tensor(vocab(tokenizer(item)), dtype=torch.long) for item in raw_text_iter]
    print(data2)
    return torch.cat(tuple(filter(lambda t: t.numel() > 0, data2)))


train_data_nb = data_process(train_iter["Sentence"])
val_data_nb = data_process(val_iter["Sentence"])
test_data_nb = data_process(test_iter["Sentence"])


def batchify(data: Tensor, bsz: int) -> Tensor:
    """Divides the data into ``bsz`` separate sequences, removing extra elements
    that wouldn't cleanly fit.

    Arguments:
        data: Tensor, shape ``[N]``
        bsz: int, batch size

    Returns:
        Tensor of shape ``[N // bsz, bsz]``
    """
    seq_len = data.size(0) // bsz
    data = data[:seq_len * bsz]
    data = data.view(bsz, seq_len).t().contiguous()
    return data.to(device)

batch_size = 40
eval_batch_size = 40
train_data = batchify(train_data_nb, batch_size)  # shape ``[seq_len, batch_size]``
val_data = batchify(val_data_nb, eval_batch_size)
test_data = batchify(test_data_nb, eval_batch_size)

#training_data, validation_data, training_labels, validation_labels = train_test_split( # split the data into training, validation, and test splits
#    data['Sentence'].values.astype('U'),
#    data['Class'].values.astype('int32'),
#    test_size=0.10,
#    random_state=0,
#    shuffle=True
#)
#
## vectorize data using TFIDF and transform for PyTorch for scalability
word_vectorizer = TfidfVectorizer(analyzer='word', ngram_range=(1,2), max_features=50000, max_df=0.5, use_idf=True, norm='l2')
training_data = word_vectorizer.fit_transform(train_iter["Sentence"])        # transform texts to sparse matrix
training_data = training_data.todense()                             # convert to dense matrix for Pytorch
vocab_size = len(word_vectorizer.vocabulary_)
validation_data = word_vectorizer.transform(val_iter["Sentence"])
validation_data = validation_data.todense()
testing_data = word_vectorizer.transform(test_iter["Sentence"])
testing_data = testing_data.todense()
#
train_x_tensor = torch.from_numpy(np.array(training_data)).type(torch.FloatTensor)
train_y_tensor = torch.from_numpy(np.array(train_iter["Class"])).long()
validation_x_tensor = torch.from_numpy(np.array(validation_data)).type(torch.FloatTensor)
validation_y_tensor = torch.from_numpy(np.array(val_iter["Class"])).long()
test_x_tensor = torch.from_numpy(np.array(testing_data)).type(torch.FloatTensor)
test_y_tensor = torch.from_numpy(np.array(test_iter["Class"])).long()
#
#class TransfromerDataset(torch.utils.data.Dataset):
#    def __init__(self, datasetA, bptt):
#        self.source = datasetA
#        self.bptt = bptt
#
#    def __getitem__(self, i):
#        seq_len = min(self.bptt, len(self.source) - 1 - i)
#        data = self.source[i:i+seq_len]
#        target = self.source[i+1:i+1+seq_len].reshape(-1)
#        return data, target
#
#    def __len__(self):
#        return min(len(self.datasetA))
#    
#train_ds = ConcatDataset(train_x_tensor,train_y_tensor)
#val_ds = ConcatDataset(validation_x_tensor,validation_y_tensor)
#train_loader = DataLoader(train_ds,batch_size=5)
#val_loader = DataLoader(val_ds,batch_size=5)

  return bound(*args, **kwds)


[tensor([1280,  194,    9,  167,   16,  243,    2,  551,    3,  254,   16,   10,
          26,   68,   12]), tensor([   2, 1303,    7,  566,  435,    4,  182,    5,  106, 1295,    6,  419,
           1]), tensor([  3,  61, 140,  48,   8,  29,   1]), tensor([   8,   59,   18,  121,    2,  333, 1315,  412,    4,    5,  170,    2,
         120,   89,    3,  187,    9,  126,    5,   24,    1]), tensor([  3, 140,  48,   8, 117,   9, 816,  12]), tensor([   5,   30,  119,   21,   14,  124,  674,    6,    4,   10,  209,   51,
          47,    2, 1365,   31,  138,    1]), tensor([  73,   11,   25,  123,    2,  162,  327,   22,    3,  138,    4, 1102,
          73,   11,   25,   72,    8,   13,    1]), tensor([   3,   92,    2,  606,   49,   16,   14,   94,  333, 1335,    4,    5,
          78,   60, 1346,   46, 1127,   14,  892,  257,   38,  155,    1]), tensor([147,  29,   6,   4, 130,  15,  93,  12]), tensor([ 23,  26, 542,  16,   2,  68,   1]), tensor([365,   5,  30,  62, 365,   5,   7, 104,

In [125]:
bptt = 35
def get_batch(source: Tensor, i: int) -> Tuple[Tensor, Tensor]:
    """
    Args:
        source: Tensor, shape ``[full_seq_len, batch_size]``
        i: int

    Returns:
        tuple (data, target), where data has shape ``[seq_len, batch_size]`` and
        target has shape ``[seq_len * batch_size]``
    """
    seq_len = min(bptt, len(source) - 1 - i)
    data = source[i:i+seq_len]
    target = source[i+1:i+1+seq_len].reshape(-1)
    return data, target


In [26]:
ntokens = len(vocab)  # size of vocabulary
emsize = 200  # embedding dimension
d_hid = 200  # dimension of the feedforward network model in ``nn.TransformerEncoder``
nlayers = 2  # number of ``nn.TransformerEncoderLayer`` in ``nn.TransformerEncoder``
nhead = 2  # number of heads in ``nn.MultiheadAttention``
dropout = 0.5  # dropout probability
model = TransformerModel(ntokens, emsize, nhead, d_hid, nlayers, dropout).to(device)



In [127]:
criterion = nn.CrossEntropyLoss()
lr = 5.0  # learning rate
optimizer = torch.optim.SGD(model.parameters(), lr=lr)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1.0, gamma=0.95)

def train(model: nn.Module) -> None:
    model.train()  # turn on train mode
    total_loss = 0.
    log_interval = 2
    start_time = time.time()

    num_batches = len(train_data) // bptt
    for batch, i in enumerate(range(0, train_data.size(0) - 1, bptt)):
        data, targets = get_batch(train_data, i)
        output = model(data)
        output_flat = output.view(-1, ntokens)
        loss = criterion(output_flat, targets)

        optimizer.zero_grad()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5)
        optimizer.step()

        total_loss += loss.item()
        if batch % log_interval == 0 and batch > 0:
            lr = scheduler.get_last_lr()[0]
            ms_per_batch = (time.time() - start_time) * 1000 / log_interval
            cur_loss = total_loss / log_interval
            ppl = math.exp(cur_loss)
            print(f'| epoch {epoch:3d} | {batch:5d}/{num_batches:5d} batches | '
                  f'lr {lr:02.2f} | ms/batch {ms_per_batch:5.2f} | '
                  f'loss {cur_loss:5.2f} | ppl {ppl:8.2f}')
            total_loss = 0
            start_time = time.time()

def evaluate(model: nn.Module, eval_data: Tensor) -> float:
    model.eval()  # turn on evaluation mode
    total_loss = 0.
    with torch.no_grad():
        for i in range(0, eval_data.size(0) - 1, bptt):
            data, targets = get_batch(eval_data, i)
            seq_len = data.size(0)
            output = model(data)
            output_flat = output.view(-1, ntokens)
            total_loss += seq_len * criterion(output_flat, targets).item()
    return total_loss / (len(eval_data) - 1)

In [128]:
best_val_loss = float('inf')
epochs = 40

with TemporaryDirectory() as tempdir:
    best_model_params_path = os.path.join(tempdir, "best_model_params.pt")

    for epoch in range(1, epochs + 1):
        epoch_start_time = time.time()
        train(model)
        val_loss = evaluate(model, val_data)
        val_ppl = math.exp(val_loss)
        elapsed = time.time() - epoch_start_time
        print('-' * 89)
        print(f'| end of epoch {epoch:3d} | time: {elapsed:5.2f}s | '
            f'valid loss {val_loss:5.2f} | valid ppl {val_ppl:8.2f}')
        print('-' * 89)

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            torch.save(model.state_dict(), best_model_params_path)

        scheduler.step()
    model.load_state_dict(torch.load(best_model_params_path)) # load best model states

| epoch   1 |     2/    5 batches | lr 5.00 | ms/batch 366.25 | loss 11.37 | ppl 86476.92
| epoch   1 |     4/    5 batches | lr 5.00 | ms/batch 270.41 | loss  7.09 | ppl  1200.46
-----------------------------------------------------------------------------------------
| end of epoch   1 | time:  1.76s | valid loss  7.54 | valid ppl  1886.95
-----------------------------------------------------------------------------------------
| epoch   2 |     2/    5 batches | lr 4.75 | ms/batch 556.83 | loss 11.25 | ppl 77003.74
| epoch   2 |     4/    5 batches | lr 4.75 | ms/batch 296.31 | loss  8.01 | ppl  3009.15
-----------------------------------------------------------------------------------------
| end of epoch   2 | time:  2.05s | valid loss  7.11 | valid ppl  1221.75
-----------------------------------------------------------------------------------------
| epoch   3 |     2/    5 batches | lr 4.51 | ms/batch 315.74 | loss  9.95 | ppl 20896.78
| epoch   3 |     4/    5 batches | lr 4.5

In [129]:
inp = "I like this phone"
tokens = tokenizer(inp)
embed = torch.tensor([vocab(tokens)], dtype=torch.long)
print(inp)
print(tokens)
print(embed)
print(embed.shape)
print(embed.dtype)
print(type(embed))
output = model(embed)
print(output.shape)
output_flat = output.view(-1, ntokens)
print(output_flat.shape)
print(output_flat)


I like this phone
['i', 'like', 'this', 'phone']
tensor([[ 3, 56,  8, 13]])
torch.Size([1, 4])
torch.int64
<class 'torch.Tensor'>
torch.Size([1, 4, 1370])
torch.Size([4, 1370])
tensor([[-1.8296,  2.7443,  2.9381,  ..., -0.5169, -1.7309, -0.1962],
        [-1.1747,  6.1568,  6.1707,  ...,  0.4665,  0.0516,  1.0210],
        [-0.6044,  4.5088,  0.7598,  ..., -0.1893, -0.1202, -0.2370],
        [-0.8980,  8.1770,  2.6977,  ..., -2.3811, -0.0260,  1.2922]],
       grad_fn=<ViewBackward0>)


In [7]:
class FFF(nn.Module):
    def __init__(self,vocab_sz, encoder):
        super().__init__()
        self.encoder = encoder
        self.linear1=nn.Linear(vocab_sz,500)
        self.linear2=nn.Linear(500,40)
        self.linear3=nn.Linear(40,2)
        self.sm = nn.Softmax()
    
    def forward(self,x):
        x = self.encoder(x)
        x = torch.mean(x.view(-1, ntokens),dim=0)
        x = self.linear1(x)
        x = self.linear2(x)
        x = self.linear3(x)
        x = self.sm(x)
        return x

In [131]:
torch.Size([1, 4, 1370])

torch.Size([1, 4, 1370])

In [132]:
x = torch.rand((4,8))
print(x)
x = x[0,:]
print(x)


tensor([[0.6774, 0.8247, 0.9867, 0.4849, 0.6684, 0.8655, 0.6002, 0.3804],
        [0.5083, 0.0290, 0.4226, 0.9104, 0.7849, 0.4518, 0.6379, 0.6347],
        [0.2157, 0.0756, 0.0663, 0.9185, 0.6724, 0.1614, 0.9023, 0.2908],
        [0.5358, 0.2454, 0.2430, 0.1433, 0.6246, 0.2797, 0.8637, 0.1444]])
tensor([0.6774, 0.8247, 0.9867, 0.4849, 0.6684, 0.8655, 0.6002, 0.3804])


In [16]:
class ConcatDataset(torch.utils.data.Dataset):
    def __init__(self, datasetA, datasetB):
        self.datasetA = datasetA
        self.datasetB = datasetB

    def __getitem__(self, i):
        inp = list(self.datasetA["Sentence"])[i]
        tokens = tokenizer(inp)
        embed = torch.tensor(vocab(tokens), dtype=torch.long)
        label = F.one_hot(self.datasetB[i],num_classes=2)
        return embed,label

    def __len__(self):
        return min(len(self.datasetA),len(self.datasetB))

In [134]:
def collate_fn_padd(batch):
    '''
    Padds batch of variable length

    note: it converts things ToTensor manually here since the ToTensor transform
    assume it takes in images rather than arbitrary tensors.
    '''
    ## get sequence lengths
    lengths = torch.tensor([ t.shape[0] for t in batch ]).to(device)
    ## padd
    batch = [ torch.Tensor(t).to(device) for t in batch ]
    batch = torch.nn.utils.rnn.pad_sequence(batch)
    ## compute mask
    mask = (batch != 0).to(device)
    return batch, lengths, mask


train_ds = ConcatDataset(train_iter,train_y_tensor)
val_ds = ConcatDataset(val_iter,validation_y_tensor)
print(len(training_data),len(train_y_tensor))
train_loader = DataLoader(train_ds,batch_size=1)
val_loader = DataLoader(val_ds,batch_size=1)

600 600


In [135]:
list(train_iter["Sentence"])[2]

'i would highly recommend this product.'

In [136]:
it = iter(train_loader)

In [137]:
next(it)

[tensor([[1280,  194,    9,  167,   16,  243,    2,  551,    3,  254,   16,   10,
            26,   68,   12]]),
 tensor([[0, 1]])]

In [138]:
def train_model(model, criterion, optimizer, train_loader, val_loader, num_epochs):
    best_val_loss = float('inf')
    best_model = model

    for epoch in range(num_epochs):
        print(f"Starting epoch {epoch+1} of {num_epochs}")
        model.train()
        train_loss = 0.0
        for batch_nr, (inputs, labels) in enumerate(train_loader):
            inputs = inputs.to(device)
            labels = labels.to(device).to(torch.float)
            
            optimizer.zero_grad()
            outputs = model(inputs)
            #print(inputs)
            #print(outputs.unsqueeze(0))
            #print(labels)
            loss = criterion(outputs.unsqueeze(0), labels)
            loss = loss.to(device)

            if (batch_nr%80 == 0):
                print(f"Processing batch number {batch_nr+1} of {len(train_loader)}")
                print(outputs)
                print(labels)
                print("current loss",loss.item())
                
            loss.backward()
            optimizer.step()
            train_loss += loss.item() * inputs.size(0)
        
        train_loss /= len(train_loader.dataset)
    
    # Validation
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs = inputs.to(device)
            labels = labels.to(device).to(torch.float)
            outputs = model(inputs)

            loss = criterion(outputs.unsqueeze(0), labels)
            val_loss += loss.item() * inputs.size(0)
    
    val_loss /= len(val_loader.dataset)
    
    if (epoch + 1) % 10 == 0:
        print(f'Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}')
    
    # Save the best model
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        best_model = model
    return best_model

In [139]:
LEARNING_RATE = 0.0001
EPOCHS = 10

model2 = FFF(len(vocab),model)

print(device)
criterion = torch.nn.CrossEntropyLoss().to(device)
# optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

optimizer = torch.optim.Adam(model2.parameters(), lr=LEARNING_RATE)

# Train the model
trained_model = train_model(model2, criterion, optimizer, train_loader, val_loader, EPOCHS)

cpu
Starting epoch 1 of 10
Processing batch number 1 of 600
tensor([0.4277, 0.5723], grad_fn=<SoftmaxBackward0>)
tensor([[0., 1.]])
current loss 0.6234543323516846


  return self._call_impl(*args, **kwargs)


Processing batch number 81 of 600
tensor([0.5089, 0.4911], grad_fn=<SoftmaxBackward0>)
tensor([[0., 1.]])
current loss 0.7020509839057922
Processing batch number 161 of 600
tensor([0.0488, 0.9512], grad_fn=<SoftmaxBackward0>)
tensor([[1., 0.]])
current loss 1.2429015636444092
Processing batch number 241 of 600
tensor([0.6377, 0.3623], grad_fn=<SoftmaxBackward0>)
tensor([[1., 0.]])
current loss 0.5648726224899292
Processing batch number 321 of 600
tensor([0.2620, 0.7380], grad_fn=<SoftmaxBackward0>)
tensor([[1., 0.]])
current loss 0.9591572284698486
Processing batch number 401 of 600
tensor([0.1663, 0.8337], grad_fn=<SoftmaxBackward0>)
tensor([[1., 0.]])
current loss 1.0814727544784546
Processing batch number 481 of 600
tensor([0.4837, 0.5163], grad_fn=<SoftmaxBackward0>)
tensor([[0., 1.]])
current loss 0.6769664287567139
Processing batch number 561 of 600
tensor([0.3085, 0.6915], grad_fn=<SoftmaxBackward0>)
tensor([[0., 1.]])
current loss 0.5198602676391602
Starting epoch 2 of 10
Proce

In [140]:
while True:
    inp = input("Leave a review:")
    if inp=="exit":
        break
    tokens = tokenizer(inp)
    embed = torch.tensor(vocab(tokens), dtype=torch.long)
    pred = model2(embed).tolist()
    print(pred)
    if(pred[0]>0.5):
        print("You seem to dislike this thing")
    else:
        print("You seem to like this thing")

In [13]:
class ConcatDataset2(torch.utils.data.Dataset):
    def __init__(self, datasetA, datasetB):
        self.datasetA = datasetA
        self.datasetB = datasetB

    def __getitem__(self, i):
        inp = self.datasetA[i]
        label = F.one_hot(self.datasetB[i],num_classes=2)
        return inp,label

    def __len__(self):
        return min(len(self.datasetA),len(self.datasetB))

In [17]:
train_ds = ConcatDataset2(train_x_tensor,train_y_tensor)
val_ds = ConcatDataset2(validation_x_tensor,validation_y_tensor)
train_loader = DataLoader(train_ds,batch_size=5)
val_loader = DataLoader(val_ds,batch_size=5)

In [5]:
class ANN(nn.Module):
    def __init__(self,num_classes):
        super().__init__()
        
        self.act = nn.LeakyReLU()

        # input: 7277        
        self.fc1 = nn.Linear(in_features=5226, out_features=1000) 
        self.fc2 = nn.Linear(in_features=1000, out_features=100)
        self.fc3 = nn.Linear(in_features=100, out_features=10)
        self.fc4 = nn.Linear(in_features=10, out_features=2)
        self.Softmax = nn.Softmax(dim=1)


    
    def forward(self, x):
        #FC Layer 1
        x = self.fc1(x)
        x = self.act(x)

        #FC Layer 2
        x = self.fc2(x)   
        x = self.act(x)    

        #FC Layer 3
        x = self.fc3(x)   
        x = self.act(x)    

        #FC Layer 4
        x = self.fc4(x)    
        
        #Softmax
        out = self.Softmax(x)

        return out

In [144]:
def train_model(model, criterion, optimizer, train_loader, val_loader, num_epochs):
    best_val_loss = float('inf')
    best_model = model

    for epoch in range(num_epochs):
        print(f"Starting epoch {epoch+1} of {num_epochs}")
        model.train()
        train_loss = 0.0
        for batch_nr, (inputs, labels) in enumerate(train_loader):
            inputs = inputs.to(device)
            labels = labels.to(device).to(torch.float)
            
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss = loss.to(device)

            if (batch_nr%20 == 0):
                print(f"Processing batch number {batch_nr+1} of {len(train_loader)}")
                print("current loss",loss.item())
                
            loss.backward()
            optimizer.step()
            train_loss += loss.item() * inputs.size(0)
        
        train_loss /= len(train_loader.dataset)
    
    # Validation
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs = inputs.to(device)
            labels = labels.to(device).to(torch.float)
            outputs = model(inputs)
        
            loss = criterion(outputs, labels)
            val_loss += loss.item() * inputs.size(0)
    
    val_loss /= len(val_loader.dataset)
    
    if (epoch + 1) % 10 == 0:
        print(f'Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}')
    
    # Save the best model
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        best_model = model
    return best_model

In [145]:
LEARNING_RATE = 0.0001
EPOCHS = 6

print(device)
model_ANN = ANN(num_classes=2).to(device)
criterion = torch.nn.CrossEntropyLoss().to(device)
# optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

optimizer = torch.optim.Adam(model_ANN.parameters(), lr=LEARNING_RATE)

# Train the model
trained_model = train_model(model_ANN, criterion, optimizer, train_loader, val_loader, EPOCHS)

cpu
Starting epoch 1 of 6
Processing batch number 1 of 120
current loss 0.6638696789741516


Processing batch number 21 of 120
current loss 0.684109091758728
Processing batch number 41 of 120
current loss 0.7254304885864258
Processing batch number 61 of 120
current loss 0.7045425176620483
Processing batch number 81 of 120
current loss 0.7457168698310852
Processing batch number 101 of 120
current loss 0.6840313076972961
Starting epoch 2 of 6
Processing batch number 1 of 120
current loss 0.662720799446106
Processing batch number 21 of 120
current loss 0.6831053495407104
Processing batch number 41 of 120
current loss 0.7227659225463867
Processing batch number 61 of 120
current loss 0.7015211582183838
Processing batch number 81 of 120
current loss 0.7396436929702759
Processing batch number 101 of 120
current loss 0.676557719707489
Starting epoch 3 of 6
Processing batch number 1 of 120
current loss 0.6484014391899109
Processing batch number 21 of 120
current loss 0.651297926902771
Processing batch number 41 of 120
current loss 0.6380811333656311
Processing batch number 61 of 120
cu

In [22]:
#ANN
test_ds2 = ConcatDataset2(test_x_tensor,test_y_tensor)
test_loader2 = DataLoader(test_ds2,batch_size=1)

#Transformer
test_ds1 = ConcatDataset(test_iter,test_y_tensor)
test_loader1 = DataLoader(test_ds1,batch_size=1)

In [24]:
model_ANN = torch.load("final_ANN_model.pt")
model2 = torch.load("final_Transformer_model.pt")

criterion = torch.nn.CrossEntropyLoss().to(device)

In [18]:
def test_model_ANN(model, test_loader, criterion):
    model.eval()  # Set the model to evaluation mode
    with torch.no_grad():
        test_loss = 0.0
        correct = 0
        total = 0
        all_predictions = []
        all_targets = []

        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device).to(torch.float)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            test_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            _, label2 = torch.max(labels.data, 1)
            total += labels.size(0)
            correct += (predicted == label2).sum().item()
            all_predictions.extend(predicted.cpu().numpy())
            all_targets.extend(labels.cpu().numpy())

        test_loss /= len(test_loader.dataset)
        accuracy = 100 * correct / total
        
        # Calculate confusion matrix
        #conf_matrix = confusion_matrix(all_targets, all_predictions)

    return test_loss, accuracy#, conf_matrix

def test_model_TRANS(model, test_loader, criterion):
    model.eval()  # Set the model to evaluation mode
    with torch.no_grad():
        test_loss = 0.0
        correct = 0
        total = 0
        all_predictions = []
        all_targets = []

        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device).to(torch.float)
            outputs = model(inputs).unsqueeze(0)
            loss = criterion(outputs, labels)
            test_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            _, label2 = torch.max(labels.data, 1)
            total += labels.size(0)
            correct += (predicted == label2).sum().item()
            all_predictions.extend(predicted.cpu().numpy())
            all_targets.extend(labels.cpu().numpy())

        test_loss /= len(test_loader.dataset)
        accuracy = 100 * correct / total
        
        # Calculate confusion matrix
        #conf_matrix = confusion_matrix(all_targets, all_predictions)

    return test_loss, accuracy#, conf_matrix

In [27]:
print(test_model_ANN(model_ANN,test_loader2,criterion))
print(test_model_TRANS(model2,test_loader1,criterion))

(0.48657106310129167, 82.5)


  return self._call_impl(*args, **kwargs)


(0.5993227829039097, 71.0)


In [36]:
while True:
    inp = input("Leave a review:")
    if inp=="exit":
        break
    tokens = tokenizer(inp)
    embed = torch.tensor(vocab(tokens), dtype=torch.long)
    pred = model2(embed).tolist()
    print("USER:",inp)
    if(pred[0]>0.5):
        print("You seem to dislike this thing.",pred)
    else:
        print("You seem to like this thing.",pred)

USER: I like this
You seem to like this thing. [4.090084086032838e-15, 1.0]
USER: I hate this
You seem to dislike this thing. [1.0, 2.8463009726920063e-08]
USER: I love it
You seem to like this thing. [1.9255504698725443e-16, 1.0]


In [35]:
while True:
    inp = input("Leave a review:")
    if inp=="exit":
        break
    tdata = word_vectorizer.transform([" ".join(word_tokenize(inp))])
    tdata = torch.Tensor(tdata.todense())
    tdata = torch.from_numpy(np.array(tdata)).type(torch.FloatTensor)
    print("USER:",inp)
    model_ANN.to(device)
    pred = model_ANN(tdata).tolist()
    if(pred[0][0]>0.5):
        print("You seem to dislike this thing.",pred[0])
    else:
        print("You seem to like this thing.",pred[0])

USER: I like this product
You seem to dislike this thing. [0.6804090738296509, 0.31959089636802673]
USER: I love this
You seem to like this thing. [0.0036055261734873056, 0.9963944554328918]
USER: I hate this
You seem to dislike this thing. [0.962023913860321, 0.03797613084316254]


In [151]:
print("ANN:",sum(p.numel() for p in model_ANN.parameters()))
print("Transformer:",sum(p.numel() for p in model2.parameters()))

ANN: 5328132
Transformer: 1738992
