In [None]:
import pandas as pd
import numpy as np
import torch

from torchtext.legacy import datasets

from torchtext.legacy.data import Field, LabelField
from torchtext.legacy.data import BucketIterator

from torchtext.vocab import Vectors, GloVe

import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import random
from tqdm.autonotebook import tqdm

In [None]:
TEXT = Field(sequential=True, lower=True, include_lengths=True)  # Поле текста
LABEL = LabelField(dtype=torch.float)                            # Поле метки

In [None]:
SEED = 1234

torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

Будем классифицировать комментарии к фильмам с сайта IMDB на положительные и отрицательные.

In [None]:
train, test = datasets.IMDB.splits(TEXT, LABEL)  # загрузим датасет
train, valid = train.split(random_state=random.seed(SEED))  # разобьем на части

downloading aclImdb_v1.tar.gz


aclImdb_v1.tar.gz: 100%|██████████| 84.1M/84.1M [00:02<00:00, 33.7MB/s]


In [None]:
TEXT.build_vocab(train)
LABEL.build_vocab(train)

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"

train_iter, valid_iter, test_iter = BucketIterator.splits(
    (train, valid, test), 
    batch_size = 64,
    sort_within_batch = True,
    device = device)

## GRU

Для начала попробуем реализовать рекурентную нейронные сеть, в частности, GRU.

In [None]:
class GRU_cell(nn.Module):
  def __init__(self, embed_size, hidden_size, num_directions, layer):
    super(GRU_cell, self).__init__()
    layer_input_size = embed_size if layer == 0 else hidden_size * num_directions
    self.w_xh = nn.Parameter(torch.rand(3, layer_input_size, hidden_size, device = device))
    self.w_hh = nn.Parameter(torch.rand(3, hidden_size, hidden_size, device = device))
    self.b_rx = nn.Parameter(torch.rand(3, hidden_size, device = device))
    self.b_rh = nn.Parameter(torch.rand(3, hidden_size, device = device))

    std = 1.0 / np.sqrt(hidden_size)
    for w in self.parameters():
      w.data.uniform_(-std, std)

  def forward(self, input, hidden):
      r = torch.sigmoid(
        input @ self.w_xh[0] + self.b_rx[0] + hidden @ self.w_hh[0] + self.b_rh[0]
      )
      z = torch.sigmoid(
        input @ self.w_xh[1] + self.b_rx[1] + hidden @ self.w_hh[1] + self.b_rh[1]
      )
      n = torch.tanh(
        input @ self.w_xh[2] + self.b_rx[2] + r * (hidden @ self.w_hh[2] + self.b_rh[2])
      )
      hidden_next = (1 - z) * n + z * hidden
      return hidden_next


In [None]:
class GRU(nn.Module):
    def __init__(self, embed_size, hidden_size, bidirectional, n_layers):
        super(GRU, self).__init__()

        self.num_directions = 2 if bidirectional else 1
        self.n_layers = n_layers
        self.embed_size = embed_size
        self.hidden_size = hidden_size

        self.Cells = nn.ModuleList(modules=None)
        for i in range(n_layers):
          self.Cells.append(GRU_cell(embed_size, hidden_size, self.num_directions, i))
        

    def forward(self, x, hidden = None):
        '''
        x – torch.FloatTensor with the shape (seq_length, bs, emb_size)
        hidden - torch.FloatTensro with the shape (n_layers * num_directions, bs, hidden_size)

        return: out = [seq_length*n_layers, bs, hidden_size*num_directions]
                last_hidden = out[-1] = [bs, hidden_size*num_directions]
        '''
        
        input = x
        seq_length = x.size(0)
        out = []
        hidden = torch.zeros(self.n_layers*self.num_directions, x.size(1), self.hidden_size, dtype=x.dtype).to(device)

        for layer in range(n_layers):
          h_forward = torch.zeros(seq_length, x.size(1), self.hidden_size, dtype=x.dtype).to(device)
          h_backward = torch.zeros(seq_length, x.size(1), self.hidden_size, dtype=x.dtype).to(device)
          for dir in range(self.num_directions):
            h_curr = hidden[layer+dir]
            if dir == 0:
              for b_size in range(seq_length):
                  h_curr = self.Cells[layer](input[b_size], h_curr)
                  h_forward[b_size] = h_curr
            else:
              for b_size in reversed(range(seq_length)):
                  h_curr = self.Cells[layer](input[b_size], h_curr)
                  h_backward[b_size] = h_curr

          if self.num_directions == 2:
            out = torch.cat((h_forward, h_backward), dim = 2)
          else:
            out = h_forward
          input = out

        return out, out[-1]

In [None]:
class RNNBaseline(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim, n_layers, 
                 bidirectional, dropout, pad_idx):
        super(RNNBaseline, self).__init__()
        self.hidden_dim = hidden_dim
        self.num_dir = 2 if bidirectional == True else 1

        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx = pad_idx)
        
        self.rnn_my = GRU(embedding_dim, hidden_dim, bidirectional, n_layers)
        
        self.drop = nn.Dropout(p=dropout)

        if bidirectional:
          self.fc = nn.Linear(2*hidden_dim, output_dim)
        else: 
          self.fc = nn.Linear(hidden_dim, output_dim)
        
    def forward(self, text, text_lengths):
        #text = [sent len, batch size]

        embedded = self.embedding(text)

        out, hidden = self.rnn_my(embedded)

        #hidden = [hidden.shape[1], self.num_dir*self.hidden_dim)
                                 
        return self.fc(self.drop(hidden))

Поиграйтесь с гиперпараметрами

In [None]:
vocab_size = len(TEXT.vocab)
emb_dim = 100
hidden_dim = 256
output_dim = 1
n_layers = 1
bidirectional = True
dropout = 0.2
PAD_IDX = TEXT.vocab.stoi[TEXT.pad_token]
patience=3

In [None]:
model = RNNBaseline(
    vocab_size=vocab_size,
    embedding_dim=emb_dim,
    hidden_dim=hidden_dim,
    output_dim=output_dim,
    n_layers=n_layers,
    bidirectional=bidirectional,
    dropout=dropout,
    pad_idx=PAD_IDX
)

model = model.to(device)
opt = torch.optim.Adam(model.parameters(), lr=0.001)
loss_func = nn.BCEWithLogitsLoss()

max_epochs = 20

In [None]:
import numpy as np

min_loss = np.inf

cur_patience = 0

for epoch in range(1, max_epochs + 1):
    train_loss = 0.0
    model.train()
    pbar = tqdm(enumerate(train_iter), total=len(train_iter), leave=False)
    pbar.set_description(f"Epoch {epoch}")
    train_acc = 0
    num_objs = 0
    for it, batch in pbar:
      opt.zero_grad()
      input = batch.text[0]
      labels = batch.label
      text_lengths = batch.text[1].cpu()

      pred = model(input, text_lengths)
      loss = loss_func(pred, labels.unsqueeze(1).float())
      loss.backward()
      opt.step()
      
      probs = [1 if el > 0.5 else 0 for el in torch.sigmoid(pred)]
      train_acc += (labels.cpu().numpy() == probs).sum()

      num_objs += len(labels)
      train_loss += loss.item()

    train_acc = train_acc/num_objs
    train_loss /= len(train_iter)


    val_loss = 0.0
    model.eval()
    with torch.no_grad():
      correct = 0
      num_objs = 0
      pbar = tqdm(enumerate(valid_iter), total=len(valid_iter), leave=False)
      pbar.set_description(f"Epoch {epoch}")
      for it, batch in pbar:
        input = batch.text[0]
        labels = batch.label
        text_lengths = batch.text[1].cpu()
        
        pred = model(input, text_lengths)

        probs = [1 if el > 0.5 else 0 for el in torch.sigmoid(pred)]
        correct += (labels.cpu().numpy() == probs).sum()
        
        val_loss += loss_func(pred, labels.unsqueeze(1).float())
        num_objs += len(labels)

    print(f"Train accuracy: {train_acc}  Valid accuracy: {correct/num_objs}")
    val_loss /= len(valid_iter)
    if val_loss < min_loss:
        min_loss = val_loss
        best_model = model.state_dict()
    else:
        cur_patience += 1
        if cur_patience == patience:
            cur_patience = 0
            break
    
    print('Epoch: {}, Training Loss: {}, Validation Loss: {}'.format(epoch, train_loss, val_loss))
torch.save(best_model, 'model_')

  0%|          | 0/274 [00:00<?, ?it/s]

  0%|          | 0/118 [00:00<?, ?it/s]

Train accuracy: 0.5216  Valid accuracy: 0.5172
Epoch: 1, Training Loss: 0.6953614736995558, Validation Loss: 0.6904997825622559


  0%|          | 0/274 [00:00<?, ?it/s]

  0%|          | 0/118 [00:00<?, ?it/s]

Train accuracy: 0.6416  Valid accuracy: 0.7272
Epoch: 2, Training Loss: 0.6261770012822464, Validation Loss: 0.5525497198104858


  0%|          | 0/274 [00:00<?, ?it/s]

  0%|          | 0/118 [00:00<?, ?it/s]

Train accuracy: 0.7790285714285714  Valid accuracy: 0.7732
Epoch: 3, Training Loss: 0.47969160794559185, Validation Loss: 0.5052889585494995


  0%|          | 0/274 [00:00<?, ?it/s]

  0%|          | 0/118 [00:00<?, ?it/s]

Train accuracy: 0.8749714285714286  Valid accuracy: 0.8424
Epoch: 4, Training Loss: 0.30907265613548945, Validation Loss: 0.36554858088493347


  0%|          | 0/274 [00:00<?, ?it/s]

  0%|          | 0/118 [00:00<?, ?it/s]

Train accuracy: 0.9365714285714286  Valid accuracy: 0.8468
Epoch: 5, Training Loss: 0.1749076246671433, Validation Loss: 0.3837381899356842


  0%|          | 0/274 [00:00<?, ?it/s]

  0%|          | 0/118 [00:00<?, ?it/s]

Train accuracy: 0.9706857142857143  Valid accuracy: 0.838
Epoch: 6, Training Loss: 0.0923173253349688, Validation Loss: 0.4571317136287689


  0%|          | 0/274 [00:00<?, ?it/s]

  0%|          | 0/118 [00:00<?, ?it/s]

Train accuracy: 0.9897142857142858  Valid accuracy: 0.8485333333333334


In [None]:
#torch.save(model.state_dict(), 'model_gru_2layers_bidir_dropout0_4')

Оценим f1-score на тестовом датасете.

In [None]:
from sklearn.metrics import f1_score

model.eval()
f1 = 0
with torch.no_grad():
  pbar = tqdm(enumerate(test_iter), total=len(test_iter), leave=False)
  for it, batch in pbar:
      input = batch.text[0]
      labels = batch.label
      text_lengths = batch.text[1].cpu()
      pred = model(input, text_lengths)
      prob = [1 if el > 0.5 else 0 for el in torch.sigmoid(pred)]
      f1 += f1_score(labels.cpu().int(), prob)
print(f1/len(test_iter))

  0%|          | 0/391 [00:00<?, ?it/s]

0.7954722531911281


## CNN

In [None]:
TEXT = Field(sequential=True, lower=True, batch_first=True)  # batch_first
LABEL = LabelField(batch_first=True, dtype=torch.float)

train, tst = datasets.IMDB.splits(TEXT, LABEL)
trn, vld = train.split(random_state=random.seed(SEED))

TEXT.build_vocab(trn)
LABEL.build_vocab(trn)

device = "cuda" if torch.cuda.is_available() else "cpu"

downloading aclImdb_v1.tar.gz


aclImdb_v1.tar.gz: 100%|██████████| 84.1M/84.1M [00:01<00:00, 75.5MB/s]


In [None]:
train_iter, val_iter, test_iter = BucketIterator.splits(
        (trn, vld, tst),
        batch_sizes=(128, 256, 256),
        sort=False,
        sort_key= lambda x: len(x.text),
        sort_within_batch=True,
        device=device,
        repeat=False,
)

In [None]:
class CNN(nn.Module):
    def __init__(
        self,
        vocab_size,
        emb_dim,
        out_channels,
        kernel_sizes,
        dropout=0.5,
    ):
        super().__init__()
        
        self.embedding = nn.Embedding(vocab_size, emb_dim)

        self.conv_0 = nn.Sequential(
            nn.Conv1d(emb_dim, out_channels, kernel_size=kernel_sizes[0], padding=1, stride=2),
            nn.BatchNorm1d(out_channels)
        )
        
        self.conv_1 = nn.Sequential(
            nn.Conv1d(emb_dim, out_channels, kernel_size=kernel_sizes[1], padding=1, stride=2),
            nn.BatchNorm1d(out_channels)
        )
        
        self.conv_2 = nn.Sequential(
            nn.Conv1d(emb_dim, out_channels, kernel_size=kernel_sizes[2], padding=1, stride=2),
            nn.BatchNorm1d(out_channels)
        )
        
        self.fc = nn.Linear(len(kernel_sizes) * out_channels, 1)
        
        self.dropout = nn.Dropout(dropout)
        
        
    def forward(self, text):
        
        embedded = self.embedding(text)
        embedded = embedded.permute(0, 2, 1)  
        
        conved_0 = F.relu(self.conv_0(embedded))  
        conved_1 = F.relu(self.conv_1(embedded))  
        conved_2 = F.relu(self.conv_2(embedded))  
        pooled_0 = F.max_pool1d(conved_0, conved_0.shape[2]).squeeze(2)
        pooled_1 = F.max_pool1d(conved_1, conved_1.shape[2]).squeeze(2)
        pooled_2 = F.max_pool1d(conved_2, conved_2.shape[2]).squeeze(2)
        
        cat = self.dropout(torch.cat((pooled_0, pooled_1, pooled_2), dim=1))
            
        return self.fc(cat)

In [None]:
kernel_sizes = [3, 4, 5]
vocab_size = len(TEXT.vocab)
out_channels=64
dropout = 0.5
dim = 300
patience=3

model = CNN(vocab_size=vocab_size, emb_dim=dim, out_channels=out_channels,
            kernel_sizes=kernel_sizes, dropout=dropout)

In [None]:
model.to(device)

CNN(
  (embedding): Embedding(201386, 300)
  (conv_0): Sequential(
    (0): Conv1d(300, 64, kernel_size=(3,), stride=(2,), padding=(1,))
    (1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (conv_1): Sequential(
    (0): Conv1d(300, 64, kernel_size=(3,), stride=(2,), padding=(1,))
    (1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (conv_2): Sequential(
    (0): Conv1d(300, 64, kernel_size=(3,), stride=(2,), padding=(1,))
    (1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (fc): Linear(in_features=192, out_features=1, bias=True)
  (dropout): Dropout(p=0.5, inplace=False)
)

In [None]:
opt = torch.optim.Adam(model.parameters())
loss_func = nn.BCEWithLogitsLoss()
max_epochs = 30

Теперь обучим модель

In [None]:
import numpy as np

min_loss = np.inf

cur_patience = 0

for epoch in range(1, max_epochs + 1):
    train_loss = 0.0
    model.train()
    train_iter.create_batches()
    pbar = tqdm(enumerate(train_iter), total=len(train_iter), leave=False)
    pbar.set_description(f"Epoch {epoch}")
    for it, batch in pbar: 
      opt.zero_grad()
      pred = model(batch.text)
      loss = loss_func(pred.squeeze(), batch.label)
      train_loss+=loss.item()
      loss.backward()
      opt.step()

    train_loss /= len(train_iter)
    val_loss = 0.0
    model.eval()
    with torch.no_grad():
      correct = 0
      num_objs = 0
      pbar = tqdm(enumerate(val_iter), total=len(val_iter), leave=False)
      pbar.set_description(f"Epoch {epoch}")
      for it, batch in pbar:
        pred = model(batch.text)
        probs = [1 if el > 0.5 else 0 for el in torch.sigmoid(pred)]
        val_loss += loss_func(pred, batch.label.unsqueeze(1))

        correct += (batch.label.cpu().numpy() == probs).sum()
        num_objs += len(batch.label)

      print(f"Validation accuracy: {correct/num_objs}")


    val_loss /= len(val_iter)
    if val_loss < min_loss:
        min_loss = val_loss
        best_model = model.state_dict()
    else:
        cur_patience += 1
        if cur_patience == patience:
            cur_patience = 0
            break
    
    print('Epoch: {}, Training Loss: {}, Validation Loss: {}'.format(epoch, train_loss, val_loss))
torch.save(best_model, 'cnn_model')

  0%|          | 0/137 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

Validation accuracy: 0.7565333333333333
Epoch: 1, Training Loss: 0.780003671663521, Validation Loss: 0.5372025966644287


  0%|          | 0/137 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

Validation accuracy: 0.7821333333333333
Epoch: 2, Training Loss: 0.5803888318312429, Validation Loss: 0.49415966868400574


  0%|          | 0/137 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

Validation accuracy: 0.7804
Epoch: 3, Training Loss: 0.5125036298358527, Validation Loss: 0.4692533314228058


  0%|          | 0/137 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

Validation accuracy: 0.8037333333333333
Epoch: 4, Training Loss: 0.47017786994467686, Validation Loss: 0.4360731244087219


  0%|          | 0/137 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

Validation accuracy: 0.8145333333333333
Epoch: 5, Training Loss: 0.4259244297107641, Validation Loss: 0.4094788134098053


  0%|          | 0/137 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

Validation accuracy: 0.8265333333333333
Epoch: 6, Training Loss: 0.37414685870609143, Validation Loss: 0.38513752818107605


  0%|          | 0/137 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

Validation accuracy: 0.8409333333333333
Epoch: 7, Training Loss: 0.33578238380651404, Validation Loss: 0.35760244727134705


  0%|          | 0/137 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

Validation accuracy: 0.8469333333333333
Epoch: 8, Training Loss: 0.2900715851000626, Validation Loss: 0.3455049991607666


  0%|          | 0/137 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

Validation accuracy: 0.8574666666666667
Epoch: 9, Training Loss: 0.26013797717372866, Validation Loss: 0.33404654264450073


  0%|          | 0/137 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

Validation accuracy: 0.8609333333333333
Epoch: 10, Training Loss: 0.22478462521829745, Validation Loss: 0.3266007602214813


  0%|          | 0/137 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

Validation accuracy: 0.8621333333333333
Epoch: 11, Training Loss: 0.1921181248052277, Validation Loss: 0.3199133574962616


  0%|          | 0/137 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

Validation accuracy: 0.8586666666666667
Epoch: 12, Training Loss: 0.17183197017786275, Validation Loss: 0.3285227119922638


  0%|          | 0/137 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

Validation accuracy: 0.8610666666666666
Epoch: 13, Training Loss: 0.14395931662216674, Validation Loss: 0.3197861313819885


  0%|          | 0/137 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

Validation accuracy: 0.8629333333333333
Epoch: 14, Training Loss: 0.12502829480345232, Validation Loss: 0.3281550705432892


  0%|          | 0/137 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

Validation accuracy: 0.8617333333333334


Также оценим f1-score.

In [None]:
from sklearn.metrics import f1_score

model.eval()
f1 = 0
with torch.no_grad():
  pbar = tqdm(enumerate(test_iter), total=len(test_iter), leave=False)
  for it, batch in pbar:
      pred = model(batch.text)
      probs = [1 if el > 0.5 else 0 for el in torch.sigmoid(pred)]
      f1 += f1_score(batch.label.cpu().int(), probs)
print('f1 score: ', f1/len(test_iter))

  0%|          | 0/98 [00:00<?, ?it/s]

fi score:  0.8597345480998573


## Немного визуализации

Теперь посмотрим, как модель научилась определять контекст слов и предложений.

In [None]:
!pip install -q captum

[K     |████████████████████████████████| 1.4 MB 6.6 MB/s 
[?25h

In [None]:
from captum.attr import LayerIntegratedGradients, TokenReferenceBase, visualization

PAD_IND = TEXT.vocab.stoi['pad']

token_reference = TokenReferenceBase(reference_token_idx=PAD_IND)
lig = LayerIntegratedGradients(model, model.embedding)

In [None]:
def forward_with_softmax(inp):
    logits = model(inp)
    return torch.softmax(logits, 0)[0][1]

def forward_with_sigmoid(input):
    return torch.sigmoid(model(input))


# accumalate couple samples in this array for visualization purposes
vis_data_records_ig = []

def interpret_sentence(model, sentence, min_len = 7, label = 0):
    model.eval()
    text = [tok for tok in TEXT.tokenize(sentence)]
    if len(text) < min_len:
        text += ['pad'] * (min_len - len(text))
    indexed = [TEXT.vocab.stoi[t] for t in text]

    model.zero_grad()

    input_indices = torch.tensor(indexed, device=device)
    input_indices = input_indices.unsqueeze(0)
    
    # input_indices dim: [sequence_length]
    seq_length = min_len

    # predict
    pred = forward_with_sigmoid(input_indices).item()
    pred_ind = round(pred)

    # generate reference indices for each sample
    reference_indices = token_reference.generate_reference(seq_length, device=device).unsqueeze(0)

    # compute attributions and approximation delta using layer integrated gradients
    attributions_ig, delta = lig.attribute(input_indices, reference_indices, \
                                           n_steps=5000, return_convergence_delta=True)

    print('pred: ', LABEL.vocab.itos[pred_ind], '(', '%.2f'%pred, ')', ', delta: ', abs(delta))

    add_attributions_to_visualizer(attributions_ig, text, pred, pred_ind, label, delta, vis_data_records_ig)
    
def add_attributions_to_visualizer(attributions, text, pred, pred_ind, label, delta, vis_data_records):
    attributions = attributions.sum(dim=2).squeeze(0)
    attributions = attributions / torch.norm(attributions)
    attributions = attributions.cpu().detach().numpy()

    # storing couple samples in an array for visualization purposes
    vis_data_records.append(visualization.VisualizationDataRecord(
                            attributions,
                            pred,
                            LABEL.vocab.itos[pred_ind],
                            LABEL.vocab.itos[label],
                            LABEL.vocab.itos[1],
                            attributions.sum(),       
                            text,
                            delta))

In [None]:
interpret_sentence(model, 'It was a fantastic performance !', label=1)
interpret_sentence(model, 'Best film ever', label=1)
interpret_sentence(model, 'Such a great show!', label=1)
interpret_sentence(model, 'It was a horrible movie', label=0)
interpret_sentence(model, 'I\'ve never watched something as bad', label=0)
interpret_sentence(model, 'It is a disgusting movie!', label=0)

pred:  pos ( 1.00 ) , delta:  tensor([0.0003], device='cuda:0', dtype=torch.float64)
pred:  neg ( 0.33 ) , delta:  tensor([9.8022e-06], device='cuda:0', dtype=torch.float64)
pred:  pos ( 0.97 ) , delta:  tensor([6.3744e-06], device='cuda:0', dtype=torch.float64)
pred:  neg ( 0.00 ) , delta:  tensor([8.3985e-05], device='cuda:0', dtype=torch.float64)
pred:  neg ( 0.01 ) , delta:  tensor([2.4240e-05], device='cuda:0', dtype=torch.float64)
pred:  neg ( 0.01 ) , delta:  tensor([6.1230e-05], device='cuda:0', dtype=torch.float64)


In [None]:
print('Visualize attributions based on Integrated Gradients')
visualization.visualize_text(vis_data_records_ig);

Visualize attributions based on Integrated Gradients


True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
pos,pos (1.00),pos,1.23,It was a fantastic performance ! pad
,,,,
pos,neg (0.33),pos,1.02,Best film ever pad pad pad pad
,,,,
pos,pos (0.97),pos,1.47,Such a great show! pad pad pad
,,,,
neg,neg (0.00),pos,-0.92,It was a horrible movie pad pad
,,,,
neg,neg (0.01),pos,-0.62,I've never watched something as bad pad
,,,,
