# **Character RNN**
Simple character-level RNN to generate text :)

In [None]:
%load_ext watermark
%watermark -a 'NavinKumarMNK' -v -p torch

## **Libraries**

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import pytorch_lightning as pl
import string, re, time, random, unidecode
import matplotlib.pyplot as plt
import pandas as pd
from torch.utils.data import DataLoader, Dataset 
import os

os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
torch.backends.cudnn.deterministic = True
device = "cuda" if torch.cuda.is_available else "cpu"
device = torch.device(device)

## **Dataset**

In [2]:
with open('covid19-faq.txt', 'r') as f:
    textfile = f.read()

# Add 
textfile = unidecode.unidecode(textfile) 

In [3]:
TEXT_PORTION_SIZE = 512
EPOCHS = 10
LEARNING_RATE = 0.001
EMBEDDING_DIM = 100
HIDDEN_DIM = 100
RANDOM_SEED = 143
NUM_HIDDEN_LAYERS = 3

In [4]:
text_list = textfile.split("\n")
i=0
while (i < len(text_list)):
    if(len(text_list[i]) == 0):
        del text_list[i]
    else:
        i+=1

In [5]:

for i in range(len(text_list)):
    if(len(text_list[i]) < TEXT_PORTION_SIZE):
        temp = len(text_list[i])
        while(temp < TEXT_PORTION_SIZE):
            temp = len(text_list[i])
            text_list[i] = text_list[i] + " " + text_list[i]
            

In [6]:
for i in range(len(text_list)):
    if(len(text_list[i]) > TEXT_PORTION_SIZE):
        text_list[i] = text_list[i][:TEXT_PORTION_SIZE]      


In [7]:
text_list[0] + '.'

'This filterable guide to common questions about COVID-19 testing and safety measures will help you navigate a safe semester on campus. Filter by audience and/or topic, or use the search function. This filterable guide to common questions about COVID-19 testing and safety measures will help you navigate a safe semester on campus. Filter by audience and/or topic, or use the search function. This filterable guide to common questions about COVID-19 testing and safety measures will help you navigate a safe semes.'

In [8]:
def char_to_tensor(text:str) -> torch.Tensor:
    lst = [string.printable.index(c) for c in text]
    tensor = torch.tensor(lst)
    return tensor

print(char_to_tensor(text_list[0]))

tensor([55, 17, 18, 28, 94, 15, 18, 21, 29, 14, 27, 10, 11, 21, 14, 94, 16, 30,
        18, 13, 14, 94, 29, 24, 94, 12, 24, 22, 22, 24, 23, 94, 26, 30, 14, 28,
        29, 18, 24, 23, 28, 94, 10, 11, 24, 30, 29, 94, 38, 50, 57, 44, 39, 74,
         1,  9, 94, 29, 14, 28, 29, 18, 23, 16, 94, 10, 23, 13, 94, 28, 10, 15,
        14, 29, 34, 94, 22, 14, 10, 28, 30, 27, 14, 28, 94, 32, 18, 21, 21, 94,
        17, 14, 21, 25, 94, 34, 24, 30, 94, 23, 10, 31, 18, 16, 10, 29, 14, 94,
        10, 94, 28, 10, 15, 14, 94, 28, 14, 22, 14, 28, 29, 14, 27, 94, 24, 23,
        94, 12, 10, 22, 25, 30, 28, 75, 94, 41, 18, 21, 29, 14, 27, 94, 11, 34,
        94, 10, 30, 13, 18, 14, 23, 12, 14, 94, 10, 23, 13, 76, 24, 27, 94, 29,
        24, 25, 18, 12, 73, 94, 24, 27, 94, 30, 28, 14, 94, 29, 17, 14, 94, 28,
        14, 10, 27, 12, 17, 94, 15, 30, 23, 12, 29, 18, 24, 23, 75, 94, 55, 17,
        18, 28, 94, 15, 18, 21, 29, 14, 27, 10, 11, 21, 14, 94, 16, 30, 18, 13,
        14, 94, 29, 24, 94, 12, 24, 22, 

In [9]:
def input_targets(sample:str) -> list:
    text = char_to_tensor(sample+'.')
    return text[:-1], text[1:]

X, y = input_targets(text_list[0])
X, y

(tensor([55, 17, 18, 28, 94, 15, 18, 21, 29, 14, 27, 10, 11, 21, 14, 94, 16, 30,
         18, 13, 14, 94, 29, 24, 94, 12, 24, 22, 22, 24, 23, 94, 26, 30, 14, 28,
         29, 18, 24, 23, 28, 94, 10, 11, 24, 30, 29, 94, 38, 50, 57, 44, 39, 74,
          1,  9, 94, 29, 14, 28, 29, 18, 23, 16, 94, 10, 23, 13, 94, 28, 10, 15,
         14, 29, 34, 94, 22, 14, 10, 28, 30, 27, 14, 28, 94, 32, 18, 21, 21, 94,
         17, 14, 21, 25, 94, 34, 24, 30, 94, 23, 10, 31, 18, 16, 10, 29, 14, 94,
         10, 94, 28, 10, 15, 14, 94, 28, 14, 22, 14, 28, 29, 14, 27, 94, 24, 23,
         94, 12, 10, 22, 25, 30, 28, 75, 94, 41, 18, 21, 29, 14, 27, 94, 11, 34,
         94, 10, 30, 13, 18, 14, 23, 12, 14, 94, 10, 23, 13, 76, 24, 27, 94, 29,
         24, 25, 18, 12, 73, 94, 24, 27, 94, 30, 28, 14, 94, 29, 17, 14, 94, 28,
         14, 10, 27, 12, 17, 94, 15, 30, 23, 12, 29, 18, 24, 23, 75, 94, 55, 17,
         18, 28, 94, 15, 18, 21, 29, 14, 27, 10, 11, 21, 14, 94, 16, 30, 18, 13,
         14, 94, 29, 24, 94,

In [10]:
dataset = []
for i in range(len(text_list)):
    x,y = input_targets(text_list[i])
    data = torch.cat((x, y), 0)
    dataset.append(data.tolist())

df = pd.DataFrame(dataset)
df.to_csv("data.csv")
    

In [11]:
class Covid19Dataset(Dataset):
    def __init__(self, csv_file, transform=None):
        self.csv_file = pd.read_csv(csv_file)
    
    def __len__(self):
        return len(self.csv_file)
    
    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
        X, y = self.csv_file.iloc[idx, 0:512], self.csv_file.iloc[idx, 512:1024]
        return X, y
        

In [12]:
data = Covid19Dataset("./data.csv")
X, y = data[0]
y

511     28
512     17
513     18
514     28
515     94
        ..
1018    28
1019    14
1020    22
1021    14
1022    28
Name: 0, Length: 512, dtype: int64

## **Model : LSTM-Cell**

In [14]:
class LSTMCell(pl.LightningModule):
    def __init__(self, input_size:int, embed_size:int, hidden_size:int,
                  output_size:int) -> None:
        super(LSTMCell, self).__init__()
        self.dataset_split()
        self.embed_size = embed_size
        self.hidden_size = hidden_size
        self.save_hyperparameters()
        self.embed = torch.nn.Embedding(num_embeddings=input_size, 
                                        embedding_dim = embed_size)
        self.LSTMCell = torch.nn.LSTMCell(input_size=embed_size,
                                  hidden_size = hidden_size
                                    )
        self.fc = torch.nn.Linear(hidden_size, output_size)
        
    def forward(self, character:torch.Tensor, hidden:torch.Tensor, cell_state:torch.Tensor) -> (torch.Tensor, torch.Tensor, torch.Tensor):
        embedded = self.embed(character)
        print(embedded.shape)
        (hidden, cell_state) = self.LSTMCell(embedded, (hidden, cell_state))
        output = self.fc(hidden)
        return output, hidden, cell_state
    
    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=LEARNING_RATE)
    
    def dataset_split(self):
        self.dataset = Covid19Dataset("./data.csv")
        self.train_dataset, self.val_dataset = torch.utils.data.random_split(torch.Tensor(self.dataset).type(torch.int32),
                                                        [int(len(self.dataset)*0.9), 
                                                         len(self.dataset) - int(len(self.dataset)*0.9)])
    def train_dataloader(self) -> DataLoader:
        return DataLoader(dataset=self.train_dataset, num_workers=6, batch_size=32, shuffle=True)
    
    def val_dataloader(self) -> DataLoader:
        return DataLoader(dataset=self.val_dataset, num_workers=6, batch_size=32, shuffle=False)
    
    def test_dataloader(self) -> tuple:
        len = 100
        return ("T", len) 

In [15]:
def training_step(self, batch, batch_idx):
    print("Train",batch)
    (X, y) = batch
    hidden_state, cell_state = torch.Tensor(len(batch), self.hidden_size), torch.Tensor(len(batch), self.hidden_size)
    los=0
    for c in range(TEXT_PORTION_SIZE):
        outputs, hidden_state, cell_state = self(X[batch_idx][c].unsqueeze(0), hidden_state, cell_state)
        loss += F.cross_entropy(outputs, y[batch_idx][c].view(1))
    loss /= TEXT_PORTION_SIZE
    return {"loss" : loss}

def validation_step(self, batch, batch_idx):
    X, y = batch[:, 0], batch[:, 1]
    hidden_state, cell_state = torch.Tensor(len(batch), self.hidden_size), torch.Tensor(len(batch), self.hidden_size)
    loss=0
    for c in range(TEXT_PORTION_SIZE):
        new = X[:, c]
        outputs, hidden_state, cell_state = self(X[:, c], hidden_state, cell_state)
        loss += F.cross_entropy(outputs, y[batch_idx][c].view(1))
    loss /= TEXT_PORTION_SIZE
    
    return {"val_loss" : loss}

def test_step(self, data, temperature=0.8):
    (str_start, pred_len) = data
    hidden_state, cell_state = torch.Tensor(1, self.hidden_size), torch.Tensor(1, self.hidden_size)
    prime_input = char_to_tensor(str_start)
    predicted = str_start

    for c in range(len(str_start) - 1):
        inp = prime_input[c].unsqueeze(0)
        _, hidden_state, cell_state = self(inp, hidden_state, cell_state)
    inp = prime_input[-1].unsqueeze(0)

    for c in range(pred_len):
        outputs, hidden_state, cell_state = self(inp, hidden_state, cell_state)
        output_dist = outputs.data.view(-1).div(temperature).exp()
        top_i = torch.multinomial(output_dist, 1)[0]

        predicted_char = string.printable[top_i]
        predicted += predicted_char
        inp = char_to_tensor(predicted)
    
    return predicted


In [16]:
LSTMCell.training_step = training_step
LSTMCell.validation_step = validation_step
LSTMCell.test_step = test_step

In [17]:
torch.manual_seed(RANDOM_SEED)
model = LSTMCell(len(string.printable), EMBEDDING_DIM, HIDDEN_DIM, len(string.printable))
trainer = pl.Trainer(min_epochs=EPOCHS, max_epochs=EPOCHS, fast_dev_run=False,
                    log_every_n_steps=25, accelerator="gpu", devices=1)
trainer.fit(model)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name     | Type      | Params
---------------------------------------
0 | embed    | Embedding | 10.0 K
1 | LSTMCell | LSTMCell  | 117 K 
2 | fc       | Linear    | 12.9 K
---------------------------------------
140 K     Trainable params
0         Non-trainable params
140 K     Total params
0.563     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

/opt/conda/conda-bld/pytorch_1670525539683/work/aten/src/ATen/native/cuda/Indexing.cu:1141: indexSelectLargeIndex: block: [21,0,0], thread: [0,0,0] Assertion `srcIndex < srcSelectDimSize` failed.
/opt/conda/conda-bld/pytorch_1670525539683/work/aten/src/ATen/native/cuda/Indexing.cu:1141: indexSelectLargeIndex: block: [21,0,0], thread: [1,0,0] Assertion `srcIndex < srcSelectDimSize` failed.
/opt/conda/conda-bld/pytorch_1670525539683/work/aten/src/ATen/native/cuda/Indexing.cu:1141: indexSelectLargeIndex: block: [21,0,0], thread: [2,0,0] Assertion `srcIndex < srcSelectDimSize` failed.
/opt/conda/conda-bld/pytorch_1670525539683/work/aten/src/ATen/native/cuda/Indexing.cu:1141: indexSelectLargeIndex: block: [21,0,0], thread: [3,0,0] Assertion `srcIndex < srcSelectDimSize` failed.
/opt/conda/conda-bld/pytorch_1670525539683/work/aten/src/ATen/native/cuda/Indexing.cu:1141: indexSelectLargeIndex: block: [21,0,0], thread: [4,0,0] Assertion `srcIndex < srcSelectDimSize` failed.
/opt/conda/conda-bld

tensor([ 93, 309, 238, 101,  33, 260, 146, 236, 157, 203, 149, 329, 273, 352,
        316, 337, 233, 116, 292,  63, 318,  65, 111, 202, 284, 183, 364,  38,
         74,  66,  34, 275], device='cuda:0', dtype=torch.int32)
torch.Size([32, 100])


RuntimeError: CUDA error: device-side assert triggered

In [None]:
trainer.test(model)

## **Model : LSTM**

In [13]:
class LSTM(pl.LightningModule):
    def __init__(self, input_size:int, embed_size:int, hidden_size:int,
                  output_size:int, num_layers:int) -> None:
        super(LSTM, self).__init__()
        self.dataset_split()
        self.embed_size = embed_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.save_hyperparameters()
        self.embed = torch.nn.Embedding(num_embeddings=input_size, 
                                        embedding_dim = embed_size)
        self.LSTM = torch.nn.LSTM(input_size=embed_size,
                                  hidden_size = hidden_size,
                                     num_layers = num_layers)
        self.fc = torch.nn.Linear(hidden_size, output_size)
        
    def forward(self, features, hidden_and_cell_state):
        #features = features.view(1, -1)
        print(features.shape)
        embedded = self.embed(features)
        print(embedded.shape, hidden_and_cell_state)
        #[sentence/num_layers, batch_size, hidden_dim]
        output, hidden_and_cell_state = self.LSTM(embedded, hidden_and_cell_state)
        output.squeeze_(0)
        output.self.fc(output)
        return hidden_and_cell_state
    
    def init_zero_state(self):
        init_hidden = torch.zeros(self.num_layers, TEXT_PORTION_SIZE, self.hidden_size)
        init_cell = torch.zeros(self.num_layers, TEXT_PORTION_SIZE, self.hidden_size)
        return (init_hidden, init_cell)
        
    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=LEARNING_RATE)
    
    def dataset_split(self):
        self.dataset = Covid19Dataset("./data.csv")
        self.train_dataset, self.val_dataset = torch.utils.data.random_split(torch.Tensor(self.dataset).type(torch.int32),
                                                        [int(len(self.dataset)*0.9), 
                                                        len(self.dataset) - int(len(self.dataset)*0.9)])
    def train_dataloader(self) -> DataLoader:
        return DataLoader(dataset=self.train_dataset, num_workers=6, batch_size=32, shuffle=True)
    
    def val_dataloader(self) -> DataLoader:
        return DataLoader(dataset=self.val_dataset, num_workers=6, batch_size=32, shuffle=False)
    
    def test_dataloader(self) -> tuple:
        len = 100
        return ("T", len) 

In [14]:
def training_step(self, batch, batch_idx):
    print("Train",batch)
    (X, y) = batch
    hidden_and_cell_state = torch.Tensor(len(batch), self.hidden_size), torch.Tensor(len(batch), self.hidden_size)
    for c in range(TEXT_PORTION_SIZE):
        outputs, hidden_and_cell_state = self(X[c].unsqueeze(0), hidden_and_cell_state)
        loss += F.cross_entropy(outputs, y[c].view(1))
    loss /= TEXT_PORTION_SIZE
    return {"loss" : loss}

def validation_step(self, batch, batch_idx):
    X, y = batch[:, 0], batch[:, 1]
    print(X, X.shape)
    hidden_and_cell_state = self.init_zero_state()
    outputs, hidden_and_cell_state = self(X, hidden_and_cell_state)
    loss = F.cross_entropy(outputs, y.view(1))
    #loss /= TEXT_PORTION_SIZE
    
    return {"val_loss" : loss}

def test_step(self, data, temperature=0.8):
    (str_start, pred_len) = data
    hidden_state, cell_state = torch.Tensor(1, self.hidden_size), torch.Tensor(1, self.hidden_size)
    prime_input = char_to_tensor(str_start)
    predicted = str_start

    for c in range(len(str_start) - 1):
        inp = prime_input[c].unsqueeze(0)
        _, hidden_state, cell_state = self(inp, hidden_state, cell_state)
    inp = prime_input[-1].unsqueeze(0)

    for c in range(pred_len):
        outputs, hidden_state, cell_state = self(inp, hidden_state, cell_state)
        output_dist = outputs.data.view(-1).div(temperature).exp()
        top_i = torch.multinomial(output_dist, 1)[0]

        predicted_char = string.printable[top_i]
        predicted += predicted_char
        inp = char_to_tensor(predicted)
    
    return predicted


In [15]:
LSTM.training_step = training_step
LSTM.validation_step = validation_step
LSTM.test_step = test_step

In [16]:
model = LSTM(len(string.printable), EMBEDDING_DIM, TEXT_PORTION_SIZE, 
                 len(string.printable), 3)
trainer = pl.Trainer(min_epochs=EPOCHS, max_epochs=EPOCHS, fast_dev_run=False,
                    log_every_n_steps=25, accelerator="gpu", devices=1)
try:
    trainer.fit(model)
except Exception as e:
    print(e)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type      | Params
------------------------------------
0 | embed | Embedding | 10.0 K
1 | LSTM  | LSTM      | 242 K 
2 | fc    | Linear    | 10.1 K
------------------------------------
262 K     Trainable params
0         Non-trainable params
262 K     Total params
1.050     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

tensor([[ 86,  55,  24,  ...,  30,  23,  29],
        [ 97,  56,  23,  ...,  94,  10,  27],
        [ 34,  60,  24,  ...,  10,  18,  23],
        ...,
        [136,  36,  94,  ...,  15,  94,  10],
        [233,  48,  10,  ...,  28,  94,  10],
        [341,  44,  23,  ...,  16,  27,  10]], device='cuda:0',
       dtype=torch.int32) torch.Size([32, 512])
torch.Size([32, 512])
torch.Size([32, 512, 100]) (tensor([[[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]],

        [[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]],

        [[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0.

/opt/conda/conda-bld/pytorch_1670525539683/work/aten/src/ATen/native/cuda/Indexing.cu:1141: indexSelectLargeIndex: block: [160,0,0], thread: [96,0,0] Assertion `srcIndex < srcSelectDimSize` failed.
/opt/conda/conda-bld/pytorch_1670525539683/work/aten/src/ATen/native/cuda/Indexing.cu:1141: indexSelectLargeIndex: block: [160,0,0], thread: [97,0,0] Assertion `srcIndex < srcSelectDimSize` failed.
/opt/conda/conda-bld/pytorch_1670525539683/work/aten/src/ATen/native/cuda/Indexing.cu:1141: indexSelectLargeIndex: block: [160,0,0], thread: [98,0,0] Assertion `srcIndex < srcSelectDimSize` failed.
/opt/conda/conda-bld/pytorch_1670525539683/work/aten/src/ATen/native/cuda/Indexing.cu:1141: indexSelectLargeIndex: block: [160,0,0], thread: [99,0,0] Assertion `srcIndex < srcSelectDimSize` failed.
/opt/conda/conda-bld/pytorch_1670525539683/work/aten/src/ATen/native/cuda/Indexing.cu:1141: indexSelectLargeIndex: block: [160,0,0], thread: [32,0,0] Assertion `srcIndex < srcSelectDimSize` failed.
/opt/conda

RuntimeError: CUDA error: device-side assert triggered

In [None]:
trainer.test(model)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


RuntimeError: CUDA error: device-side assert triggered