# Pytorch Amin Acid Language Model

In [None]:
import torch
import torch.nn as nn
import pandas as pd
import numpy as np
from pathlib import Path
import time
import pickle
from IPython.display import HTML, display

In [None]:
if torch.cuda.is_available():  
  dev = "cuda:0" 
else:  
  dev = "cpu" 

In [None]:
dev

'cuda:0'

In [None]:
torch.manual_seed(42)

<torch._C.Generator at 0x7f0b3629c710>

Add nice css for my table

In [None]:
HTML("""
<style>
table, th, td {
  border: 1px solid black;
}
</style>
""")

## Load the data

In [None]:
data_file = Path('/content/LM_data_2021-03-11.csv')
df = pd.read_csv(data_file, sep=';')
df.head()

Unnamed: 0,Entry,Entry name,Sequence
0,P68307,NU3M_BALMU,MNLLLTLLTNTTLALLLVFIAFWLPQLNVYAEKTSPYECGFDPMGS...
1,P0CY61,O162_CONBU,MKLTCVLIIAVLFLTAITADDSRDKQVYRAVGLIDKMRRIRASEGC...
2,Q0VIL3,OTOMP_DANRE,MDLPGGHLAVVLFLFVLVSMSTENNIIRWCTVSDAEDQKCLDLAGN...
3,A1W9I4,NUSB_ACISJ,MTDSTHPTPSARPPRQPRTGTTGTGARKAGSKSGRSRAREFALQAL...
4,Q8DBX0,OMPU_VIBVU,MKKTLIALSVSAAAVATGVNAAELYNQDGTSLDMGGRAEARLSMKD...


In [None]:
df.drop(['Entry', 'Entry name'], axis = 1, inplace=True)
df.head()

Unnamed: 0,Sequence
0,MNLLLTLLTNTTLALLLVFIAFWLPQLNVYAEKTSPYECGFDPMGS...
1,MKLTCVLIIAVLFLTAITADDSRDKQVYRAVGLIDKMRRIRASEGC...
2,MDLPGGHLAVVLFLFVLVSMSTENNIIRWCTVSDAEDQKCLDLAGN...
3,MTDSTHPTPSARPPRQPRTGTTGTGARKAGSKSGRSRAREFALQAL...
4,MKKTLIALSVSAAAVATGVNAAELYNQDGTSLDMGGRAEARLSMKD...


## Tokenize the data

In [None]:
# Set-up numpy generator for random numbers
random_number_generator = np.random.default_rng(seed=42)
KMER_SIZE = 3

In [None]:
# Tokenize the protein sequence (or any sequence) in kmers.
def tokenize(protein_seqs, kmer_sz):
    kmers = set()
    # Loop over protein sequences
    for protein_seq in protein_seqs:
        # Loop over the whole sequence
        for i in range(len(protein_seq) - (kmer_sz - 1)):
            # Add kmers to the set, thus only unique kmers will remain
            kmers.add(protein_seq[i: i + kmer_sz])
            
    # Map kmers for one hot-encoding
    kmer_to_id = dict()
    id_to_kmer = dict()
    
    for ind, kmer in enumerate(kmers):
        kmer_to_id[kmer] = ind
        id_to_kmer[ind] = kmer
        
    vocab_sz = len(kmers)
    
    assert vocab_sz == len(kmer_to_id.keys())
    
    # Tokenize the protein sequence to integers
    tokenized = []
    for protein_seq in protein_seqs:
        sequence = []
        for i in  range(len(protein_seq) - (kmer_sz -1)):
            # Convert kmer to integer
            kmer = protein_seq[i: i + kmer_sz]
            sequence.append(kmer_to_id[kmer])
            
        tokenized.append(sequence)
            
    
    return tokenized, vocab_sz, kmer_to_id, id_to_kmer

In [None]:
# Tokenize the protein sequence
tokenized_seqs, vocab_sz, kmer_to_id, id_to_kmer = tokenize(df['Sequence'], KMER_SIZE)

In [None]:
vocab_sz

9317

In [None]:
tokenized_seqs[0][:10]

[4362, 4302, 5206, 5167, 595, 4474, 5167, 9043, 8106, 3485]

In [None]:
data = []
for seq in tokenized_seqs:
    for kmer in seq:
        data.append(kmer)

## Dataset

In [None]:
class AminoLMDataset(torch.utils.data.Dataset):
    def __init__(self, data, seq_len):
        self.data = torch.Tensor(data)
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
            
        xs = torch.LongTensor(data[idx: idx + seq_len])
        targets = data[idx + 1: idx + seq_len + 1]

        ys = []

        for target in targets:
          y = torch.tensor(target)
          ys.append(y)

        ys = torch.stack(ys)

        ys = ys.to(dev)
        xs = xs.to(dev) 
    
        return xs, ys

## Building the LM Model

In [None]:
# Hyperparameters
emb_dim = 400 # Embeddding dimension
hid_sz = 1150 # Hidden size
num_layers = 3 # Number of LSTM layers stacked together
seq_len = num_layers

# Dropout parameters

embed_p = 0.1 # Dropout probability on the embedding
hidden_p = 0.3 # Dropout probability on hidden-to-hidden weight matrices
# Dropout tussen de inputs van de LSTMs moet ik er nog in bouwen
input_p = 0.3 # Dropout probablity on the LSTM input between LSTMS
weight_p = 0.5 # Dropout probability on LSTM-to-LSTM weight matrices

In [None]:
class WeightDropout(torch.nn.Module):
  "Apply dropout to LSTM's hidden-hidden weights"
    
  def __init__(self, module, weight_p):
    super(WeightDropout, self).__init__()
    self.module = module
    self.weight_p = weight_p

    # Save the name of the layer weights in a list
    num_layers = module.num_layers
    layer_base_name = 'weight_hh_l'      
    self.layer_weights = [layer_base_name + str(i) for i in range(num_layers)]

    # Make a copy of the weights in weightname_raw
    for weight in self.layer_weights:

      w = getattr(self.module, weight)
      del module._parameters[weight]
      self.module.register_parameter(f'{weight}_raw', torch.nn.Parameter(w))

  def _setweights(self):
    "Apply dropout to the raw weights"
    for weight in self.layer_weights:
      raw_w = getattr(self.module, f'{weight}_raw')
      if self.training:
          w = torch.nn.functional.dropout(raw_w, p=self.weight_p)
      else:
          w = raw_w.clone()
      setattr(self.module, weight, w)
    
  def forward(self, *args):
    self._setweights()
    return self.module(*args)

### Weightdropout testing

In [None]:
tst_inp = torch.rand(1, 1, 400)
test_lstm = nn.LSTM(emb_dim, hid_sz, num_layers=1)
test_lstm.forward(tst_inp)

(tensor([[[ 0.0711,  0.1139, -0.0432,  ..., -0.0202,  0.0326,  0.0285]]],
        grad_fn=<StackBackward>),
 (tensor([[[ 0.0711,  0.1139, -0.0432,  ..., -0.0202,  0.0326,  0.0285]]],
         grad_fn=<StackBackward>),
  tensor([[[ 0.1271,  0.2095, -0.0826,  ..., -0.0360,  0.0644,  0.0608]]],
         grad_fn=<StackBackward>)))

In [None]:
# Een list maken met alle hidden-hidden weight namen in je module (voor dit netwerk is het dus 1 layer maar dus 1 iets in je lijst)
num_layers = test_lstm.num_layers
layer_base_name = 'weight_hh_l'
layer_weights =  [layer_base_name + str(i) for i in range(num_layers)]
layer_weights

['weight_hh_l0']

In [None]:
# Oefenen om deze parameters op te halen
for weight in layer_weights:
  w = getattr(test_lstm, weight)
  del test_lstm._parameters[weight]
  test_lstm.register_parameter(f'{weight}_raw', torch.nn.Parameter(w))

In [None]:
for weight in layer_weights:
  raw_w = getattr(test_lstm, f'{weight}_raw')
  w = torch.nn.functional.dropout(raw_w, weight_p)
  setattr(test_lstm, weight, w)

In [None]:
w = getattr(test_lstm, 'weight_hh_l0')
print(w)

tensor([[ 0.0000,  0.0000, -0.0000,  ...,  0.0078,  0.0252, -0.0284],
        [ 0.0267, -0.0450,  0.0031,  ..., -0.0150, -0.0000,  0.0520],
        [-0.0279,  0.0000, -0.0258,  ..., -0.0207, -0.0000, -0.0000],
        ...,
        [ 0.0191,  0.0000, -0.0000,  ...,  0.0363, -0.0000, -0.0264],
        [-0.0056, -0.0332, -0.0000,  ..., -0.0317,  0.0000,  0.0000],
        [ 0.0123, -0.0233,  0.0000,  ...,  0.0000, -0.0157, -0.0000]],
       grad_fn=<MulBackward0>)


In [None]:
dp_test = WeightDropout(test_lstm, weight_p)
dp_test(tst_inp)

(tensor([[[ 0.0711,  0.1139, -0.0432,  ..., -0.0202,  0.0326,  0.0285]]],
        grad_fn=<StackBackward>),
 (tensor([[[ 0.0711,  0.1139, -0.0432,  ..., -0.0202,  0.0326,  0.0285]]],
         grad_fn=<StackBackward>),
  tensor([[[ 0.1271,  0.2095, -0.0826,  ..., -0.0360,  0.0644,  0.0608]]],
         grad_fn=<StackBackward>)))

## Create AWD_LSTM model

In [None]:
class AWD_LSTM(torch.nn.Module):
    def __init__(self, num_layers, vocab_sz, emb_dim, hid_sz, hidden_p, embed_p, input_p, weight_p, batch_sz = 1):
        super(AWD_LSTM, self).__init__()
        
        # Embedding with droput
        self.encoder = torch.nn.Embedding(vocab_sz, emb_dim)
        self.emb_drop = torch.nn.Dropout(p=embed_p)

        
        # Dropouts on the inputs and the hidden layers
        self.input_dp = torch.nn.Dropout(p=input_p)
        self.hid_dp = torch.nn.Dropout(p=hidden_p)

        # Create a list of lstm layers with wieghtdropout
        self.lstms = []
        for i in range(num_layers):
            self.lstms.append(
                WeightDropout(nn.LSTM(input_size=emb_dim, hidden_size=hid_sz, num_layers=1), weight_p))
        self.lstms = nn.ModuleList(self.lstms)

        # Decoder
        self.decoder = torch.nn.Linear(hid_sz, vocab_sz)

        # Save all variables        
        self.num_layers = num_layers
        self.vocab_sz = vocab_sz
        self.emb_dim = emb_dim
        self.hid_sz = hid_sz
        self.hidden_p = hidden_p
        self.embed_p = embed_p
        self.input_p = input_p
        self.weight_p = weight_p
        self.batch_sz = batch_sz

        # Initialize hidden layers        
        self.reset_hidden()
        self.last_hiddens = (self.hidden_state, self.cell_state)
                
    def forward(self, xs):
        """Forward pass AWD-LSTM""" 

        ys = []
        
        hiddens = self.last_hiddens

        hidden_states = [hiddens]

        for i, lstm in enumerate(self.lstms):

          embed = self.encoder(xs[i])
          embed_dp = self.emb_drop(embed)
          input_dp = self.input_dp(embed_dp)

          hiddens_dp = []

          for hidden_state in hidden_states[i]:
            hiddens_dp.append(self.hid_dp(hidden_state))

          hiddens_dp = tuple(hiddens_dp)
          output, hiddens = lstm(input_dp.view(1, 1, -1), hiddens_dp) 


          det_hiddens = []

          for hidden in hiddens:
            det_hiddens.append(hidden.detach())

          det_hiddens = tuple(det_hiddens)

          hidden_states.append(det_hiddens)
            
          y = self.decoder(output)

          # Outputs moeten squeezen om de loss toe te kunnen passen, maar moet nog even kijken of de juiste waardes dan wel worden mee genomen

          ys.append(y.squeeze(0).squeeze(0))

        y = torch.stack(ys, dim=0)

        self.last_hiddens = hidden_states[-1]
        
        return y
    
    def reset_hidden(self):
        self.hidden_state = torch.zeros((1, self.batch_sz, self.hid_sz)).to(dev)
        self.cell_state = torch.zeros((1, self.batch_sz, self.hid_sz)).to(dev)
        self.last_hiddens = (self.hidden_state, self.cell_state)

In [None]:
num_layers = 3

In [None]:
model = AWD_LSTM(num_layers, vocab_sz, emb_dim, hid_sz, hidden_p, embed_p, input_p, weight_p)
model = model.to(dev)
model

AWD_LSTM(
  (encoder): Embedding(9317, 400)
  (emb_drop): Dropout(p=0.1, inplace=False)
  (input_dp): Dropout(p=0.3, inplace=False)
  (hid_dp): Dropout(p=0.3, inplace=False)
  (lstms): ModuleList(
    (0): WeightDropout(
      (module): LSTM(400, 1150)
    )
    (1): WeightDropout(
      (module): LSTM(400, 1150)
    )
    (2): WeightDropout(
      (module): LSTM(400, 1150)
    )
  )
  (decoder): Linear(in_features=1150, out_features=9317, bias=True)
)

## Training the model

In [None]:
training_set = AminoLMDataset(data, seq_len)

In [None]:
training_loader = torch.utils.data.DataLoader(training_set, batch_size=1, shuffle=False)

In [None]:
total_train_len = len(training_loader)
total_train_len

58461351

In [None]:
# Test for the real work
for i, entry in enumerate(training_loader, 0):
    xs, ys = entry[0], entry[1]

    outputs = model(xs.squeeze(0))

    print(outputs.shape)
    print(ys.shape)

    loss = criterion(outputs, ys.squeeze(0))
    print(loss)
    
    break

torch.Size([3, 9317])
torch.Size([1, 3])


  self.dropout, self.training, self.bidirectional, self.batch_first)


NameError: ignored

In [None]:
# Hyperparameters
learning_rate = 0.01
epochs = 1

In [None]:
# Costfunction and optimize algorithm
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr= learning_rate)

NameError: ignored

In [None]:
display(HTML(
    """<table>
        <thead>
          <tr>
          <th>Epoch</th>
          <th>Percentage</th>
          <th>Loss</th>
          <th>Time</th>
          </tr>
        </thead>
        <tbody>
        """
))

for epoch in range(epochs):
    
    start_time = time.time()

    model.reset_hidden()
    
    # Initialize loss at 0
    epoch_loss = 0.0
    
    # Iterations (in between epoch) loss
    iteration_loss = 0.0

    for i, entry in enumerate(training_loader, 0):
        
        
        model.zero_grad()
        
        xs, ys = entry[0], entry[1]
        
        outputs = model(xs.squeeze(0))
        loss = criterion(outputs, ys.squeeze(0))
        
        loss.backward()
        optimizer.step()
        
        epoch_loss += loss.item()
        iteration_loss += loss.item()
        
        if i % 1e4 == 0:

            round_time = time.time()
            duration = round(((round_time - start_time) / 60), 0) # To convert to minutes
            
            perc = round((i / total_train_len * 100), 2)

            iteration_loss = round((iteration_loss / 1e4), 2)

            display(HTML(
            """<tr>
              <td>{}</td>
              <td>{}</td>
              <td>{}</td>
              <td>{}</td>
              </tr>""".format(str(epoch + 1), str(perc), str(iteration_loss), str(duration))
            ))

            iteration_loss = 0.0
    
    loss_history.append(epoch_loss)
    
    print(f'Epoch {str(epoch + 1)} Train loss: {str(epoch_loss)}.')

display(HTML('</tbody></table>'))

print('Finished training')

Epoch,Percentage,Loss,Time


  self.dropout, self.training, self.bidirectional, self.batch_first)


KeyboardInterrupt: ignored

## Save Model for Training Later

In [None]:
from google.colab import drive
from pathlib import Path


drive.mount('content/', force_remount=True)
base = Path('/content/content/My Drive/')


Mounted at content/


In [None]:
filename = '1_percent_AA_LM_v2.pt'
file_dir = Path('/content/content/MyDrive/' + filename)
file_dir

PosixPath('/content/content/MyDrive/1_percent_AA_LM_v2.pt')

In [None]:
torch.save(model, file_dir)

## Load Model for Further Training

In [None]:
from google.colab import drive
from pathlib import Path


drive.mount('content/', force_remount=True)
base = Path('/content/content/My Drive/')

Mounted at content/


In [None]:
model_path = Path('/content/content/MyDrive/1_percent_AA_LM_v2.pt')
model = torch.load(model_path)
model

AWD_LSTM(
  (encoder): Embedding(9317, 400)
  (emb_drop): Dropout(p=0.1, inplace=False)
  (input_dp): Dropout(p=0.3, inplace=False)
  (hid_dp): Dropout(p=0.3, inplace=False)
  (lstms): ModuleList(
    (0): WeightDropout(
      (module): LSTM(400, 1150)
    )
    (1): WeightDropout(
      (module): LSTM(400, 1150)
    )
    (2): WeightDropout(
      (module): LSTM(400, 1150)
    )
  )
  (decoder): Linear(in_features=1150, out_features=9317, bias=True)
)

### Train Further with Data of which the location is known

In [None]:
# Tokenize the protein sequence (or any sequence) in kmers.
def tokenize(df, protein_seqs_column, kmer_sz, premade_vocab=False):
    
    if not premade_vocab:
        kmers = set()
        # Loop over protein sequences
        for protein_seq in df[protein_seqs_column]:
            # Loop over the whole sequence
            for i in range(len(protein_seq) - (kmer_sz - 1)):
                # Add kmers to the set, thus only unique kmers will remain
                kmers.add(protein_seq[i: i + kmer_sz])

        # Map kmers for one hot-encoding
        kmer_to_id = dict()
        id_to_kmer = dict()

        for ind, kmer in enumerate(kmers):
            kmer_to_id[kmer] = ind
            id_to_kmer[ind] = kmer

        vocab_sz = len(kmers)

        assert vocab_sz == len(kmer_to_id.keys())
    
    else:
        kmer_to_id, id_to_kmer = premade_vocab
        vocab_sz = len(kmer_to_id)
    
    # Tokenize the protein sequence to integers
    tokenized = []
    for i, protein_seq in enumerate(df[protein_seqs_column], 0):
        sequence = []
        
        # If the kmer can't be found these indexes should be deleted
        remove_idxs = []
        
        for i in  range(len(protein_seq) - (kmer_sz -1)):
            # Convert kmer to integer
            kmer = protein_seq[i: i + kmer_sz]
            
            # For some reason, some kmers miss. Thus these sequences have to be removed
            try:
                sequence.append(kmer_to_id[kmer])
            except:
                remove_idxs.append(i)
            
        tokenized.append(sequence)
            
    df['tokenized_seqs'] = tokenized
    
    df.drop(remove_idxs, inplace=True)
    
    return df, vocab_sz, kmer_to_id, id_to_kmer

In [None]:
data_file = Path('/content/content/MyDrive/protein_data_2021-04-04.csv')
df = pd.read_csv(data_file, sep=';')
df.head()

Unnamed: 0,Sequence,Subcellular location [CC],Location
0,MTDTVFSNSSNRWMYPSDRPLQSNDKEQLQAGWSVHPGGQPDRQRK...,"SUBCELLULAR LOCATION: Cytoplasmic vesicle, sec...",Cytoplasm
1,MDTDSQRSHLSSFTMKLMDKFHSPKIKRTPSKKGKPAEVSVKIPEK...,SUBCELLULAR LOCATION: Early endosome {ECO:0000...,Endosome
2,MEDSTSPKQEKENQEELGETRRPWEGKTAASPQYSEPESSEPLEAK...,"SUBCELLULAR LOCATION: Cytoplasm, cytoskeleton,...",Cytoplasm
3,MALPGARARGWAAAARAAQRRRRVENAGGSPSPEPAGRRAALYVHW...,SUBCELLULAR LOCATION: Mitochondrion {ECO:00003...,Mitochondrion
4,MALLVDRVRGHWRIAAGLLFNLLVSICIVFLNKWIYVYHGFPNMSL...,SUBCELLULAR LOCATION: Membrane {ECO:0000305}; ...,Cell membrane


In [None]:
df.drop(['Subcellular location [CC]'], axis = 1, inplace=True)
df.head()

Unnamed: 0,Sequence,Location
0,MTDTVFSNSSNRWMYPSDRPLQSNDKEQLQAGWSVHPGGQPDRQRK...,Cytoplasm
1,MDTDSQRSHLSSFTMKLMDKFHSPKIKRTPSKKGKPAEVSVKIPEK...,Endosome
2,MEDSTSPKQEKENQEELGETRRPWEGKTAASPQYSEPESSEPLEAK...,Cytoplasm
3,MALPGARARGWAAAARAAQRRRRVENAGGSPSPEPAGRRAALYVHW...,Mitochondrion
4,MALLVDRVRGHWRIAAGLLFNLLVSICIVFLNKWIYVYHGFPNMSL...,Cell membrane


In [None]:
len(df)

16614

Data should be tokenized with the same vocab as for the other vocabulary.

In [None]:
# Load the vocabolary from the Language Model
vocab_save_file = '/content/content/MyDrive/LM_vocab.pkl'
vocab = pickle.load(open(vocab_save_file, 'rb'))

In [None]:
# Tokenize the protein sequence
df, vocab_sz, kmer_to_id, id_to_kmer = tokenize(df, 'Sequence', KMER_SIZE, vocab)

In [None]:
df.head(5)

Unnamed: 0,Sequence,Location,tokenized_seqs
0,MTDTVFSNSSNRWMYPSDRPLQSNDKEQLQAGWSVHPGGQPDRQRK...,Cytoplasm,"[3884, 8570, 3840, 6832, 2277, 2221, 1020, 904..."
1,MDTDSQRSHLSSFTMKLMDKFHSPKIKRTPSKKGKPAEVSVKIPEK...,Endosome,"[8772, 7207, 1857, 1688, 5461, 3901, 4899, 424..."
2,MEDSTSPKQEKENQEELGETRRPWEGKTAASPQYSEPESSEPLEAK...,Cytoplasm,"[1565, 3797, 2513, 516, 1428, 6558, 6568, 7337..."
3,MALPGARARGWAAAARAAQRRRRVENAGGSPSPEPAGRRAALYVHW...,Mitochondrion,"[8939, 2538, 9262, 4438, 2547, 302, 60, 3064, ..."
4,MALLVDRVRGHWRIAAGLLFNLLVSICIVFLNKWIYVYHGFPNMSL...,Cell membrane,"[8939, 6897, 6013, 1021, 3034, 2863, 8501, 697..."


In [None]:
df.dropna(inplace=True)
len(df)

16614

In [None]:
data = []
for seq in df['tokenized_seqs']:
    for kmer in seq:
        data.append(kmer)

### Train with the new data

In [None]:
training_set = AminoLMDataset(data, seq_len)

In [None]:
training_loader = torch.utils.data.DataLoader(training_set, batch_size=1, shuffle=False)

In [None]:
total_train_len = len(training_loader)
total_train_len

9616156

In [None]:
# Hyperparameters
learning_rate = 0.01
epochs = 10

In [None]:
# Costfunction and optimize algorithm
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr= learning_rate)

In [None]:
display(HTML(
    """<table>
        <thead>
          <tr>
          <th>Epoch</th>
          <th>Percentage</th>
          <th>Loss</th>
          <th>Time</th>
          </tr>
        </thead>
        <tbody>
        """
))

for epoch in range(epochs):
    
  start_time = time.time()

  model.reset_hidden()
  
  # Initialize loss at 0
  epoch_loss = 0.0
  iteration_loss = 0.0
  
  for i, entry in enumerate(training_loader, 0):
      
     

      model.zero_grad()
      
      xs, ys = entry[0], entry[1]
      
      outputs = model(xs.squeeze(0))
      loss = criterion(outputs, ys.squeeze(0))
      
      loss.backward()
      optimizer.step()
      
      epoch_loss += loss.item()
      iteration_loss += loss.item()
      
      if i % 1.5e4 == 0:
          
          round_time = time.time()
          duration = round(((round_time - start_time) / 60), 0) # To convert to minutes
          start_time = time.time()
          
          perc = round((i / total_train_len * 100), 2)

          iteration_loss = round((iteration_loss / 1.5e4), 2)

          display(HTML(
          """<tr>
            <td>{}</td>
            <td>{}</td>
            <td>{}</td>
            <td>{}</td>
            </tr>""".format(str(epoch + 1), str(perc), str(iteration_loss), str(duration))
          ))

          iteration_loss = 0.0
  
  loss_history.append(epoch_loss)
  
  print(f'Epoch {str(epoch + 1)} Train loss: {str(epoch_loss)}.')

display(HTML('</tbody></table>'))        
print('Finished training')

Epoch,Percentage,Loss,Time


  self.dropout, self.training, self.bidirectional, self.batch_first)


KeyboardInterrupt: ignored

In [None]:
filename = 'AA_LM_v2.pt'
file_dir = Path('/content/content/MyDrive/' + filename)
file_dir

PosixPath('/content/content/MyDrive/AA_LM_v2.pt')

In [None]:
torch.save(model, file_dir)

> https://arxiv.org/pdf/1801.06146.pdf