<a href="https://colab.research.google.com/github/FelixDeMan/DLVU/blob/master/DL3_RNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
pip install wget



In [2]:
import torch
if torch.cuda.is_available():  
  dev = "cuda:0" 
else:  
  dev = "cpu"  
device = torch.device(dev) 

In [3]:
import wget, os, gzip, pickle, random, re, sys

IMDB_URL = 'http://dlvu.github.io/data/imdb.{}.pkl.gz'
IMDB_FILE = 'imdb.{}.pkl.gz'

PAD, START, END, UNK = '.pad', '.start', '.end', '.unk'

def load_imdb(final=False, val=5000, seed=0, voc=None, char=False):

    cst = 'char' if char else 'word'

    imdb_url = IMDB_URL.format(cst)
    imdb_file = IMDB_FILE.format(cst)

    if not os.path.exists(imdb_file):
        wget.download(imdb_url)

    with gzip.open(imdb_file) as file:
        sequences, labels, i2w, w2i = pickle.load(file)

    if voc is not None and voc < len(i2w):
        nw_sequences = {}

        i2w = i2w[:voc]
        w2i = {w: i for i, w in enumerate(i2w)}

        mx, unk = voc, w2i['.unk']
        for key, seqs in sequences.items():
            nw_sequences[key] = []
            for seq in seqs:
                seq = [s if s < mx else unk for s in seq]
                nw_sequences[key].append(seq)

        sequences = nw_sequences

    if final:
        return (sequences['train'], labels['train']), (sequences['test'], labels['test']), (i2w, w2i), 2

    # Make a validation split
    random.seed(seed)

    x_train, y_train = [], []
    x_val, y_val = [], []

    val_ind = set( random.sample(range(len(sequences['train'])), k=val) )
    for i, (s, l) in enumerate(zip(sequences['train'], labels['train'])):
        if i in val_ind:
            x_val.append(s)
            y_val.append(l)
        else:
            x_train.append(s)
            y_train.append(l)

    return (x_train, y_train), \
           (x_val, y_val), \
           (i2w, w2i), 2


def gen_sentence(sent, g):

    symb = '_[a-z]*'

    while True:

        match = re.search(symb, sent)
        if match is None:
            return sent

        s = match.span()
        sent = sent[:s[0]] + random.choice(g[sent[s[0]:s[1]]]) + sent[s[1]:]

def gen_dyck(p):
    open = 1
    sent = '('
    while open > 0:
        if random.random() < p:
            sent += '('
            open += 1
        else:
            sent += ')'
            open -= 1

    return sent

def gen_ndfa(p):

    word = random.choice(['abc!', 'uvw!', 'klm!'])

    s = ''
    while True:
        if random.random() < p:
            return 's' + s + 's'
        else:
            s+= word

def load_brackets(n=50_000, seed=0):
    return load_toy(n, char=True, seed=seed, name='dyck')

def load_ndfa(n=50_000, seed=0):
    return load_toy(n, char=True, seed=seed, name='ndfa')

def load_toy(n=50_000, char=True, seed=0, name='lang'):

    random.seed(0)

    if name == 'lang':
        sent = '_s'

        toy = {
            '_s': ['_s _adv', '_np _vp', '_np _vp _prep _np', '_np _vp ( _prep _np )', '_np _vp _con _s' , '_np _vp ( _con _s )'],
            '_adv': ['briefly', 'quickly', 'impatiently'],
            '_np': ['a _noun', 'the _noun', 'a _adj _noun', 'the _adj _noun'],
            '_prep': ['on', 'with', 'to'],
            '_con' : ['while', 'but'],
            '_noun': ['mouse', 'bunny', 'cat', 'dog', 'man', 'woman', 'person'],
            '_vp': ['walked', 'walks', 'ran', 'runs', 'goes', 'went'],
            '_adj': ['short', 'quick', 'busy', 'nice', 'gorgeous']
        }

        sentences = [ gen_sentence(sent, toy) for _ in range(n)]
        sentences.sort(key=lambda s : len(s))

    elif name == 'dyck':

        sentences = [gen_dyck(7./16.) for _ in range(n)]
        sentences.sort(key=lambda s: len(s))

    elif name == 'ndfa':

        sentences = [gen_ndfa(1./4.) for _ in range(n)]
        sentences.sort(key=lambda s: len(s))

    else:
        raise Exception(name)

    tokens = set()
    for s in sentences:

        if char:
            for c in s:
                tokens.add(c)
        else:
            for w in s.split():
                tokens.add(w)

    i2t = [PAD, START, END, UNK] + list(tokens)
    t2i = {t:i for i, t in enumerate(i2t)}

    sequences = []
    for s in sentences:
        if char:
            tok = list(s)
        else:
            tok = s.split()
        sequences.append([t2i[t] for t in tok])

    return sequences, (i2t, t2i)

In [4]:
import torch

# Load data 
(x_train, y_train), (x_val, y_val), (i2w, w2i), numcls = load_imdb(final=False)


In [5]:
print([i2w[w] for w in x_train[141]])
print(len(x_train))


['possibly', 'the', 'best', 'movie', 'ever', 'created', 'in', 'the', 'history', 'of', 'jeffrey', 'combs', 'career', 'and', 'one', 'that', 'should', 'be', 'looked', 'upon', 'by', 'all', 'talent', 'in', 'hollywood', 'for', 'his', 'versatility', 'charisma', 'and', 'uniqueness', 'he', 'brings', 'through', 'his', 'characters', 'and', 'his', 'knowledge', 'of', 'acting']
20000


In [6]:
print(w2i['.pad'])

0


In [7]:
# Pad x data to right size

# w2i['.pad'] = 0
pad_val = 0
# w2i['.start'] = 1
start_val = 1
# w2i['.end'] = 2
end_val = 2

longest_len = max([len(x) for x in x_train]) +2 # +2 because appending start and end val
print(longest_len)
for review in x_train:
  review.insert(0, start_val)
  review.append(end_val)
  while len(review)< longest_len:
    review.append(pad_val)


# Verify
print(x_train[0])
print([len(x) for x in x_train])
print(max([len(x) for x in x_train]))

2516
[1, 14, 19, 9, 379, 22, 11, 50, 52, 53, 290, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

In [8]:
# Same procedure for x_val:
longest_len = max([len(x) for x in x_val]) +2 # +2 because appending start and end val
print(longest_len)
for review in x_val:
  review.insert(0, start_val)
  review.append(end_val)
  while len(review)< longest_len:
    review.append(pad_val)


# Verify
#print(x_val[0])
#print([len(x) for x in x_val])
print(max([len(x) for x in x_val]))
len(i2w)

1869
1869


99430

In [9]:
x_train = torch.Tensor(x_train).type(torch.LongTensor)
y_train = torch.Tensor(y_train).type(torch.LongTensor)

x_val = torch.Tensor(x_val).type(torch.LongTensor)
y_val = torch.Tensor(y_val).type(torch.LongTensor)

#batch = torch.tensor(lists, dtype=torch.long)

In [10]:
batch_size = 16

trainloader = torch.utils.data.DataLoader([[x_train[i], y_train[i]] for i in range(len(y_train))], batch_size, shuffle=True)
valloader = torch.utils.data.DataLoader([[x_val[i], y_val[i]] for i in range(len(y_val))], batch_size, shuffle= True)

In [None]:
import torch.nn as nn
import torch.nn.functional as F

# input shape x_train = 4 x 2520
class Net(nn.Module):
    
    def __init__(self, input_size, output_size):
        
        super().__init__()
        
        #Input nn.Embedding: num_embeddings, embedding_dim, padding_idx=None, max_norm=None, norm_type=2.0, scale_grad_by_freq=False, sparse=False, _weight=None, device=None, dtype=None
        # len(i2w) = 99430
        embedding_dim = 300
        hidden_size = embedding_dim  #idk what to pick here?
        num_cls = 2 #labrl of y 0,1 positive/negative review

        self.layer1 = nn.Embedding(num_embeddings = 99430, embedding_dim = embedding_dim , padding_idx = 0) #num embeddings is tot num of tokens?? 99430
       
        
        self.layer2 = nn.Linear(in_features = (embedding_dim), out_features= hidden_size)

        
        self.layer3 = nn.Linear(embedding_dim, num_cls)
        
    def forward(self, input):
        
        
        #print(input.shape)
        #first layer
        emb = self.layer1(input)
         #Does embedding with padding_idx cut to same length within batch?
        print(emb.shape)
        #second layer   
        # should only work on last dimension, thus 2,emb
        hidden = self.layer2(emb)
        #print(hidden.shape)
        #non-linearity
        non_linear_hidden = F.relu(hidden)

        permuted = non_linear_hidden.permute(0,2,1)
        #print(permuted.shape)
        #maxpool
        pooled = F.max_pool1d(permuted, kernel_size=permuted.shape[2]).permute(0,2,1)
        #print(pooled.shape)
        pooled = torch.reshape(pooled, (4,300))
        output = self.layer3(pooled)
        #print(pooled.shape)
        
        
        return output

net = Net(x_train.shape[1], 2)
print(net)

Net(
  (layer1): Embedding(99430, 300, padding_idx=0)
  (layer2): Linear(in_features=300, out_features=300, bias=True)
  (layer3): Linear(in_features=300, out_features=2, bias=True)
)


In [None]:
torch.cuda.is_available()

True

In [None]:

import torch.optim as optim
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(elman.parameters(), lr = 0.001, momentum = 0.0)

In [None]:
def train(epochs): 
    for epoch in range(epochs):
        running_loss = 0.0
        
        for i, data in enumerate(trainloader, 0):
          if i < 10:
            # Extract input and label correctly
            inputs, labels = data
            
            #zero param grads
            optimizer.zero_grad()
            
            #forward
            outputs = net(inputs)
            print(outputs)
            softmaxed = F.Softmax(outputs)
            #print(labels.shape)
            #loss
            loss = F.cross_entropy(outputs, labels)
            
            #backward
            loss.backward()
            #optimize
            optimizer.step()
            
            running_loss += loss.item()
            
            if i % 1000 == 999: #print every 1000 batches
                print('[%d, %5d] loss: %.3f ' %
                      (epoch +1, i+1, running_loss / 1000))
                running_loss = 0.0
    print('Finished training')

In [None]:
train(1)

torch.Size([4, 2516, 300])
tensor([[-1.6149,  1.7649],
        [-1.4353,  1.4896],
        [-1.5706,  1.4931],
        [-1.9777,  1.6436]], grad_fn=<AddmmBackward0>)
torch.Size([4, 2516, 300])
tensor([[-1.8653,  1.8281],
        [-1.3624,  1.6659],
        [-1.3791,  1.5496],
        [-1.4680,  1.3672]], grad_fn=<AddmmBackward0>)
torch.Size([4, 2516, 300])
tensor([[-1.4164,  1.5986],
        [-1.2377,  1.4789],
        [-1.4040,  1.7105],
        [-1.7218,  1.6532]], grad_fn=<AddmmBackward0>)
torch.Size([4, 2516, 300])
tensor([[-1.4951,  1.6093],
        [-1.4782,  1.7714],
        [-1.4488,  1.3882],
        [-1.5478,  1.7015]], grad_fn=<AddmmBackward0>)
torch.Size([4, 2516, 300])
tensor([[-1.5080,  1.4813],
        [-1.7250,  1.4005],
        [-1.8098,  1.8194],
        [-1.4289,  1.3295]], grad_fn=<AddmmBackward0>)
torch.Size([4, 2516, 300])
tensor([[-1.5762,  1.4889],
        [-1.3236,  1.7975],
        [-1.4276,  1.2572],
        [-1.3881,  1.5978]], grad_fn=<AddmmBackward0>)
torc

In [None]:
def test(model):
    correct = 0
    total = 0
    with torch.no_grad():
        for i, data in enumerate(valloader):
          if i == 100:
            break
          images, labels = data
            
            #calc output by running images through network
          outputs = model(images)
          _, predicted = torch.max(outputs.data,1)
          total += labels.size(0)
          correct += (predicted == labels).sum().item()
            
    print('Accuracy of network is ', correct / total)

In [None]:
test()

In [None]:
torch.save(net,'/content/imdb_model.pt' )

In [None]:
net = torch.load('/content/imdb_model.pt')

# PART III

In [11]:
import torch.nn as nn
import torch.nn.functional as F
class Elman(nn.Module):
  
  def __init__(self, insize = 300, outsize = 300, hsize = 300):
    super().__init__()

    self.lin1 =  nn.Linear(in_features= insize + hsize, out_features=hsize)
    self.lin2 =  nn.Linear(in_features= hsize, out_features= outsize)
  
  def forward(self, x, hidden = None):
    b, t, e = x.size()

    if hidden is None:
      hidden = torch.zeros(b, e, dtype=torch.float).to(device)

    outs = []
    for i in range(t):
      inp = torch.cat([x[:, i, :], hidden], dim = 1)
      hidden = self.lin1(inp)
      hidden = F.relu(hidden)
      yi = self.lin2(hidden)
      out = yi
    
      outs.append(out[:, None, :])

    return torch.cat(outs, dim=1), hidden

In [12]:
import torch.nn as nn
import torch.nn.functional as F

# input shape x_train = 4 x 2520
class ElNet(nn.Module):
    
    def __init__(self, input_size, output_size):
        
        super().__init__()
        
        #Input nn.Embedding: num_embeddings, embedding_dim, padding_idx=None, max_norm=None, norm_type=2.0, scale_grad_by_freq=False, sparse=False, _weight=None, device=None, dtype=None
        # len(i2w) = 99430
        embedding_dim = 300
        hidden_size = embedding_dim  #idk what to pick here?
        num_cls = 2 #labrl of y 0,1 positive/negative review

        self.layer1 = nn.Embedding(num_embeddings = 99430, embedding_dim = embedding_dim , padding_idx = 0) #num embeddings is tot num of tokens?? 99430
       
        
        self.layer2 = Elman(insize = (embedding_dim), outsize= hidden_size, hsize = 300)

        self.layer3 = nn.Linear(embedding_dim, num_cls)
        
    def forward(self, input):
        
        
        #print(input.shape)
        #first layer
        emb = self.layer1(input)
         #Does embedding with padding_idx cut to same length within batch?
        
        #second layer   
        # should only work on last dimension, thus 2,emb
        el_out, hidden = self.layer2(emb)
        
        #non-linearity
        non_linear_hidden = F.relu(el_out)

        permuted = non_linear_hidden.permute(0,2,1)
        
        #maxpool
        pooled = F.max_pool1d(permuted, kernel_size=permuted.shape[2]).permute(0,2,1)
        
        pooled = torch.squeeze(pooled, 1)
        output = self.layer3(pooled)
        #print(pooled.shape)
        
        
        return output

elman = ElNet(x_train.shape[1], 2)
print(elman)
elman.to(device)

ElNet(
  (layer1): Embedding(99430, 300, padding_idx=0)
  (layer2): Elman(
    (lin1): Linear(in_features=600, out_features=300, bias=True)
    (lin2): Linear(in_features=300, out_features=300, bias=True)
  )
  (layer3): Linear(in_features=300, out_features=2, bias=True)
)


ElNet(
  (layer1): Embedding(99430, 300, padding_idx=0)
  (layer2): Elman(
    (lin1): Linear(in_features=600, out_features=300, bias=True)
    (lin2): Linear(in_features=300, out_features=300, bias=True)
  )
  (layer3): Linear(in_features=300, out_features=2, bias=True)
)

In [14]:
import pandas as pd

def train(epochs, model, GPU):
  import torch.optim as optim
  criterion = nn.CrossEntropyLoss()
  optimizer = optim.Adam(model.parameters(), lr = 0.01)
 
  data = list()
  for epoch in range(epochs):
  
        print("Epoch ", epoch)
        loss_epoch = 0
        running_loss = 0.0
        losses = []
        for i, data in enumerate(trainloader, 0):
            if i < 500:
              # Extract input and label correctly
              inputs, labels = data
              if GPU:
                inputs = inputs.to(device)
                labels = labels.to(device)
              
              #zero param grads
              optimizer.zero_grad()
              
              #forward
              outputs = model(inputs)
              #outputs = F.softmax(outputs)
              
              #loss
              loss = criterion(outputs, labels)
              
              #backward
              loss.backward()
              #optimize
              optimizer.step()
              
              running_loss += loss.item()
              loss_epoch += loss.item()
              losses.append(loss.item()) 
              if i % 50 == 49: #print every 1000 batches
                  print('[%d, %5d] loss: %.3f ' %
                        (epoch +1, i+1, running_loss / 49))
                  running_loss = 0.0
              data.append({'update' : i, 'epoch': epoch, 'loss': loss.item()})
  print('Finished training')
  return data

In [None]:
data_implemented = train(10, elman, True)

Epoch  0


In [None]:
import torch.nn as nn
import torch.nn.functional as F

# input shape x_train = 4 x 2520
class ElNNet(nn.Module):
    
    def __init__(self, input_size, output_size):
        
        super().__init__()
        
        #Input nn.Embedding: num_embeddings, embedding_dim, padding_idx=None, max_norm=None, norm_type=2.0, scale_grad_by_freq=False, sparse=False, _weight=None, device=None, dtype=None
        # len(i2w) = 99430
        embedding_dim = 300
        hidden_size = embedding_dim  #idk what to pick here?
        num_cls = 2 #labrl of y 0,1 positive/negative review

        self.layer1 = nn.Embedding(num_embeddings = 99430, embedding_dim = embedding_dim , padding_idx = 0) #num embeddings is tot num of tokens?? 99430
       
        
        self.layer2 = nn.RNN(embedding_dim, hidden_size, 300, batch_first=True)

        
        self.layer3 = nn.Linear(embedding_dim, num_cls)
        
    def forward(self, input):
        
        
        #print(input.shape)
        #first layer
        emb = self.layer1(input)
         #Does embedding with padding_idx cut to same length within batch?
       
        #second layer   
        # should only work on last dimension, thus 2,emb
        el_out, hidden = self.layer2(emb)
        
        #non-linearity
        non_linear_hidden = F.relu(el_out)

        permuted = non_linear_hidden.permute(0,2,1)
        #print(permuted.shape)
        #maxpool
        pooled = F.max_pool1d(permuted, kernel_size=permuted.shape[2]).permute(0,2,1)
        #print(pooled.shape)
        pooled = torch.squeeze(pooled, 1)
        output = self.layer3(pooled)
        #print(pooled.shape)
        
        
        return output
elmaNN = ElNNet(x_train.shape[1], 2)
print(elmaNN)
elmaNN.to(device)

ElNNet(
  (layer1): Embedding(99430, 300, padding_idx=0)
  (layer2): RNN(300, 300, num_layers=300, batch_first=True)
  (layer3): Linear(in_features=300, out_features=2, bias=True)
)


ElNNet(
  (layer1): Embedding(99430, 300, padding_idx=0)
  (layer2): RNN(300, 300, num_layers=300, batch_first=True)
  (layer3): Linear(in_features=300, out_features=2, bias=True)
)

In [None]:
lossesElman = train(1, elman, True)



[1,     3] loss: 0.686 
[1,     6] loss: 0.706 
[1,     9] loss: 0.693 
[1,    12] loss: 0.686 
[1,    15] loss: 0.700 
[1,    18] loss: 0.695 
[1,    21] loss: 0.693 
[1,    24] loss: 0.693 
[1,    27] loss: 0.701 
[1,    30] loss: 0.697 
[1,    33] loss: 0.691 
[1,    36] loss: 0.697 
[1,    39] loss: 0.685 
[1,    42] loss: 0.698 
[1,    45] loss: 0.681 
[1,    48] loss: 0.701 
[1,    51] loss: 0.699 
[1,    54] loss: 0.696 
[1,    57] loss: 0.691 
[1,    60] loss: 0.692 
[1,    63] loss: 0.696 
[1,    66] loss: 0.691 
[1,    69] loss: 0.700 
[1,    72] loss: 0.693 
[1,    75] loss: 0.696 
[1,    78] loss: 0.692 
[1,    81] loss: 0.691 
[1,    84] loss: 0.700 
[1,    87] loss: 0.692 
[1,    90] loss: 0.690 
[1,    93] loss: 0.689 
[1,    96] loss: 0.690 
[1,    99] loss: 0.691 
[1,   102] loss: 0.696 
[1,   105] loss: 0.694 
[1,   108] loss: 0.690 
[1,   111] loss: 0.694 
[1,   114] loss: 0.692 
[1,   117] loss: 0.693 
[1,   120] loss: 0.694 
[1,   123] loss: 0.695 
[1,   126] loss:

KeyboardInterrupt: ignored

In [None]:
lossesElmaNN = train(1, elmaNN, True)

torch.Size([4, 2516, 300])




torch.Size([4, 2516, 300])
torch.Size([4, 2516, 300])
[1,     3] loss: 0.695 
torch.Size([4, 2516, 300])
torch.Size([4, 2516, 300])
torch.Size([4, 2516, 300])
[1,     6] loss: 0.696 
torch.Size([4, 2516, 300])
torch.Size([4, 2516, 300])
torch.Size([4, 2516, 300])
[1,     9] loss: 0.696 
torch.Size([4, 2516, 300])
torch.Size([4, 2516, 300])
torch.Size([4, 2516, 300])
[1,    12] loss: 0.693 
torch.Size([4, 2516, 300])
torch.Size([4, 2516, 300])
torch.Size([4, 2516, 300])
[1,    15] loss: 0.692 
torch.Size([4, 2516, 300])
torch.Size([4, 2516, 300])
torch.Size([4, 2516, 300])
[1,    18] loss: 0.693 
torch.Size([4, 2516, 300])
torch.Size([4, 2516, 300])
torch.Size([4, 2516, 300])
[1,    21] loss: 0.693 
torch.Size([4, 2516, 300])
torch.Size([4, 2516, 300])
torch.Size([4, 2516, 300])
[1,    24] loss: 0.692 
torch.Size([4, 2516, 300])
torch.Size([4, 2516, 300])
torch.Size([4, 2516, 300])
[1,    27] loss: 0.692 
torch.Size([4, 2516, 300])
torch.Size([4, 2516, 300])
torch.Size([4, 2516, 300])
[

In [None]:
test(elmann)

In [None]:

import torch.optim as optim
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(elman.parameters(), lr = 0.001, momentum = 0.0)

NameError: ignored

In [None]:
def pad_per_batch(input, batch_size):
  n_batches = len(input) / batch_size
  for i in n_batches:
    longest_len = max([len(x) for x in x_train]) +2 # +2 because appending start and end val
    print(longest_len)
    for review in x_train:
      review.insert(0, start_val)
      review.append(end_val)
      while len(review)< longest_len:
      review.append(pad_val)
