# Neural network training


```
COPYLEFT LESTERRRY, 2020
```



In [None]:
#This cell connects GDrive to Colab. You can either use this or upload files manually.
##################

from google.colab import drive
drive.mount('/content/drive')

In [None]:
#This cell is used for configuration
##################

from collections import Counter
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import pickle
import time

TRAIN_TEXT_FILE_PATH = 'drive/My Drive/colab/textbook.txt' #Path to your textbook file
HIDDEN_SIZE = 256 #Adjust this to make training more complicated. This value should be as big as the textbook.
LAYERS = 3 #Number of LSTM layers. Try adjusting too.
DEVICE = torch.device('cuda') #Device which will be used while training.
#Check whether the machine you're going to further launch 'serve.py' script supports CUDA, and decide which one to choose.
#To use CUDA in Colab, make sure you've enabled Hardware GPU Acceleration in IDE settings.

MODEL_PATH = 'drive/My Drive/colab/model3.bin' #Place to save/fetch your model to/from.


In [None]:
#This cell sets everything up. Make sure 'textbook.txt' is in executable dir and all libraries are downloaded
##################

with open(TRAIN_TEXT_FILE_PATH) as text_file:
    text_sample = text_file.readlines()
text_sample = ' '.join(text_sample)

def text_to_seq(text_sample):
    char_counts = Counter(text_sample)
    char_counts = sorted(char_counts.items(), key = lambda x: x[1], reverse=True)

    sorted_chars = [char for char, _ in char_counts]
    print(sorted_chars)
    char_to_idx = {char: index for index, char in enumerate(sorted_chars)}
    idx_to_char = {v: k for k, v in char_to_idx.items()}
    sequence = np.array([char_to_idx[char] for char in text_sample])
    print(f'CtI: {char_to_idx}')
    print(f'ItC: {idx_to_char}')
    return sequence, char_to_idx, idx_to_char

sequence, char_to_idx, idx_to_char = text_to_seq(text_sample)

SEQ_LEN = 256
BATCH_SIZE = 16

def get_batch(sequence):
    trains = []
    targets = []
    for _ in range(BATCH_SIZE):
        batch_start = np.random.randint(0, len(sequence) - SEQ_LEN)
        chunk = sequence[batch_start: batch_start + SEQ_LEN]
        train = torch.LongTensor(chunk[:-1]).view(-1, 1)
        target = torch.LongTensor(chunk[1:]).view(-1, 1)
        trains.append(train)
        targets.append(target)
    return torch.stack(trains, dim=0), torch.stack(targets, dim=0)
    
def evaluate(model, char_to_idx, idx_to_char, start_text=' ', prediction_len=200, temp=0.3):
    hidden = model.init_hidden()
    idx_input = [char_to_idx[char] for char in start_text]
    train = torch.LongTensor(idx_input).view(-1, 1, 1).to(device)
    predicted_text = start_text
    
    _, hidden = model(train, hidden)
        
    inp = train[-1].view(-1, 1, 1)
    
    for i in range(prediction_len):
        output, hidden = model(inp.to(device), hidden)
        output_logits = output.cpu().data.view(-1)
        p_next = F.softmax(output_logits / temp, dim=-1).detach().cpu().data.numpy()        
        top_index = np.random.choice(len(char_to_idx), p=p_next)
        inp = torch.LongTensor([top_index]).view(-1, 1, 1).to(device)
        predicted_char = idx_to_char[top_index]
        predicted_text += predicted_char
    
    return predicted_text

class TextRNN(nn.Module):
    
    def __init__(self, input_size, hidden_size, embedding_size, n_layers=1):
        super(TextRNN, self).__init__()
        
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.embedding_size = embedding_size
        self.n_layers = n_layers

        self.encoder = nn.Embedding(self.input_size, self.embedding_size)
        self.lstm = nn.LSTM(self.embedding_size, self.hidden_size, self.n_layers)
        self.dropout = nn.Dropout(0.2)
        self.fc = nn.Linear(self.hidden_size, self.input_size)
        
    def forward(self, x, hidden):
        x = self.encoder(x).squeeze(2)
        out, (ht1, ct1) = self.lstm(x, hidden)
        out = self.dropout(out)
        x = self.fc(out)
        return x, (ht1, ct1)
    
    def init_hidden(self, batch_size=1):
        return (torch.zeros(self.n_layers, batch_size, self.hidden_size, requires_grad=True).to(device),
               torch.zeros(self.n_layers, batch_size, self.hidden_size, requires_grad=True).to(device))

In [None]:
#This cell launches learning process. Stop it whenever the Loss Rate value stops decreasing.
##################

#WARNING: DO NOT stop code execution until permitted so, or the model may be damaged.

device = DEVICE
model = TextRNN(input_size=len(idx_to_char), hidden_size=HIDDEN_SIZE, embedding_size=128, n_layers=LAYERS)
model.to(device)

i = 1
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-2, amsgrad=True)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, 
    patience=5, 
    verbose=True, 
    factor=0.5
)

n_epochs = 50000
loss_avg = []

for epoch in range(n_epochs):
    model.train()
    train, target = get_batch(sequence)
    train = train.permute(1, 0, 2).to(device)
    target = target.permute(1, 0, 2).to(device)
    hidden = model.init_hidden(BATCH_SIZE)

    output, hidden = model(train, hidden)
    loss = criterion(output.permute(1, 2, 0), target.squeeze(-1).permute(1, 0))
    
    loss.backward()
    optimizer.step()
    optimizer.zero_grad()
    
    loss_avg.append(loss.item())
    if len(loss_avg) >= 50:
        i += 1
        mean_loss = np.mean(loss_avg)
        print(f'LR: {mean_loss}')
        scheduler.step(mean_loss)
        loss_avg = []
        model.eval()
        predicted_text = evaluate(model, char_to_idx, idx_to_char)
        print(predicted_text)
        if i == 5:
          print("You now have 5 seconds to stop execution")
          time.sleep(5)
          print("DO NOT stop execution until further notice")
          i = 0

In [None]:
#This cell saves model to a binary file for further use
##################

file = open(MODEL_PATH, "wb")
torch.save(model, file)
file.close()

In [None]:
#This cell converts model for a specific device type
##################

model_out = "drive/My Drive/colab/model3.bin" #Output file path
conv_device = torch.device('cpu') #Output device

file = open(MODEL_PATH, "rb")
mod = torch.load(file)
file.close()
mod.eval()
mod.to(conv_device)
file = open(model_out, "wb")
torch.save(model, file)
file.close()

In [None]:
#This cell should be used for testing saved model
file = open(MODEL_PATH, "rb")
mod = torch.load(file)
file.close()
mod.eval()
print(evaluate(
    mod,
    char_to_idx,
    idx_to_char,
    temp=0.3,
    prediction_len=200,
    start_text=' ',
    )
)