In [None]:
# %load C:\\Users\\TEEM\\Desktop\\shakespears\\dataset.py
# import some packages you need here
import os
import torch
import pandas as pd
from skimage import io, transform
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils

# Ignore warnings
import warnings
warnings.filterwarnings("ignore")

plt.ion()   # interactive mode

class Shakespeare(Dataset):
    """ Shakespeare dataset

        To write custom datasets, refer to
        https://pytorch.org/tutorials/beginner/data_loading_tutorial.html

    Args:
        input_file: txt file

    Note:
        1) Load input file and construct character dictionary {index:character}.
           You need this dictionary to generate characters.
        2) Make list of character indices using the dictionary
        3) Split the data into chunks of sequence length 30. 
           You should create targets appropriately.
    """

    def __init__(self, input_file):

        # write your codes here
        with open(input_file, 'r') as file:
            self.text = file.read()
            
        # construct character dictionary {index:character}
        self.chars = sorted(set(self.text))
        self.char2idx = {char: idx for idx, char in enumerate(self.chars)}
        self.idx2char = {idx: char for idx, char in enumerate(self.chars)}
        
        # character indices
        self.text_indices = [self.char2idx[char] for char in self.text]
        
        # sequence length
        self.seq_length = 30
        
        self.data = []
        self.targets = []
        for i in range(0, len(self.text_indices) - self.seq_length):
            self.data.append(self.text_indices[i:i + self.seq_length])
            self.targets.append(self.text_indices[i + 1:i + self.seq_length + 1])    

            
    def __len__(self):

        # write your codes here
        return len(self.data)


    def __getitem__(self, idx):

        # write your codes here
        input_sq = torch.tensor(self.data[idx], dtype=torch.long)
        target_sq = torch.tensor(self.targets[idx], dtype=torch.long)

        return input_sq, target_sq

if __name__ == '__main__':

    # write test codes to verify your implementations
    dataset = Shakespeare('C:\\Users\\TEEM\\Desktop\\shakespears\\shakespeare_train.txt') 
    print(f"Dataset size: {len(dataset)}")

    

    

    

In [None]:
# %load C:\\Users\\TEEM\\Desktop\\shakespears\\model.py
import torch
import torch.nn as nn

class CharRNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, n_layers=1, dropout=0.5):

        # write your codes here
        super(CharRNN, self).__init__()
        self.hidden_size = hidden_size
        self.n_layers = n_layers
        
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.rnn = nn.RNN(hidden_size, hidden_size, n_layers, dropout=dropout, batch_first=True)
        self.dropout = nn.Dropout(dropout)
        self.fc = nn.Linear(hidden_size, output_size)


    def forward(self, input, hidden):

        # write your codes here
        embedded = self.embedding(input)
        output, hidden = self.rnn(embedded, hidden)
        output = self.dropout(output)
        output = self.fc(output.reshape(output.size(0) * output.size(1), output.size(2)))
        
        return output, hidden
    

    def init_hidden(self, batch_size):

        # write your codes here
        initial_hidden = torch.zeros(self.n_layers, batch_size, self.hidden_size)
        
        return initial_hidden


class CharLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, n_layers=1, dropout=0.5):

        # write your codes here
        super(CharLSTM, self).__init__()
        
        self.hidden_size = hidden_size
        self.n_layers = n_layers
        
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.lstm = nn.LSTM(hidden_size, hidden_size, n_layers, dropout=dropout, batch_first=True)
        self.dropout = nn.Dropout(dropout)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, input, hidden):

        # write your codes here
        embedded = self.embedding(input)
        output, hidden = self.lstm(embedded, hidden)
        output = self.dropout(output)
        output = self.fc(output.reshape(output.size(0) * output.size(1), output.size(2)))
        
        return output, hidden

    def init_hidden(self, batch_size):

        # write your codes here
        hidden_state = torch.zeros(self.n_layers, batch_size, self.hidden_size)
        cell_state = torch.zeros(self.n_layers, batch_size, self.hidden_size)
        
        return (hidden_state, cell_state)

In [None]:
# %load C:\\Users\\TEEM\\Desktop\\shakespears\\generate.py
# import some packages you need here
import numpy as np
import torch
import torch.nn.functional as F

def generate(model, seed_characters, temperature, device, char_to_idx, idx_to_char, length=100):
    """ Generate characters

    Args:
        model: trained model
        seed_characters: seed characters
        temperature: T
        args: other arguments if needed

    Returns:
        samples: generated characters
    """

    # write your codes here
    model.eval()
    samples = seed_characters
    input_seq = torch.tensor([char_to_idx[char] for char in seed_characters], dtype=torch.long).unsqueeze(0).to(device)
    hidden = model.init_hidden(1)
    
    with torch.no_grad():
        for _ in range(length):
            output, hidden = model(input_seq[:, -1:], hidden)
            output = output.squeeze().div(temperature).exp()
            probs = F.softmax(output, dim=-1).cpu().numpy()
            next_index = np.random.choice(probs.size, p=probs)
            next_char = idx_to_char[next_index]
            samples += next_char
            input_seq = torch.cat([input_seq, torch.tensor([[next_index]], dtype=torch.long).to(device)], dim=1)
    
    return samples

In [None]:
# %load C:\\Users\\TEEM\\Desktop\\shakespears\\main.py
import dataset
from model import CharRNN, CharLSTM

# import some packages you need here
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, SubsetRandomSampler
import numpy as np
import matplotlib.pyplot as plt
from dataset import Shakespeare
from model import CharRNN, CharLSTM
from generate import generate
import warnings 
warnings.filterwarnings(action='ignore')

def train(model, trn_loader, device, criterion, optimizer):
    """ Train function

    Args:
        model: network
        trn_loader: torch.utils.data.DataLoader instance for training
        device: device for computing, cpu or gpu
        criterion: cost function
        optimizer: optimization method, refer to torch.optim

    Returns:
        trn_loss: average loss value
    """

    # write your codes here
    model.train()
    total_loss = 0
    for inputs, targets in trn_loader:
        inputs, targets = inputs.to(device), targets.to(device)
        optimizer.zero_grad()
        outputs, _ = model(inputs, model.init_hidden(inputs.size(0)))
        loss = criterion(outputs.view(-1, outputs.size(-1)), targets.view(-1))
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        
    trn_loss = total_loss / len(trn_loader)

    return trn_loss

def validate(model, val_loader, device, criterion):
    """ Validate function

    Args:
        model: network
        val_loader: torch.utils.data.DataLoader instance for testing
        device: device for computing, cpu or gpu
        criterion: cost function

    Returns:
        val_loss: average loss value
    """

    # write your codes here
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for inputs, targets in val_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            outputs, _ = model(inputs, model.init_hidden(inputs.size(0)))
            loss = criterion(outputs.view(-1, outputs.size(-1)), targets.view(-1))
            total_loss += loss.item()
            
    val_loss = total_loss / len(val_loader)
    
    return val_loss


def main():
    """ Main function

        Here, you should instantiate
        1) DataLoaders for training and validation. 
           Try SubsetRandomSampler to create these DataLoaders.
        3) model
        4) optimizer
        5) cost function: use torch.nn.CrossEntropyLoss

    """

    # write your codes here
    input_file = 'C:\\Users\\TEEM\\Desktop\\shakespears\\shakespeare_train.txt'
    batch_size = 128
    sequence_length = 30
    num_epochs = 10
    learning_rate = 0.0001

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    # Dataset
    dataset = Shakespeare(input_file)
    dataset_size = len(dataset)
    indices = list(range(dataset_size))
    split = int(np.floor(0.2 * dataset_size))

    np.random.shuffle(indices)
    train_indices, val_indices = indices[split:], indices[:split]

    train_sampler = SubsetRandomSampler(train_indices)
    val_sampler = SubsetRandomSampler(val_indices)

    trn_loader = DataLoader(dataset, batch_size=batch_size, sampler=train_sampler)
    val_loader = DataLoader(dataset, batch_size=batch_size, sampler=val_sampler)
    
    criterion = nn.CrossEntropyLoss()

    # RNN
    rnn_model = CharRNN(input_size=len(dataset.char2idx), hidden_size=128, output_size=len(dataset.char2idx), n_layers=2).to(device)
    rnn_optimizer = optim.Adam(rnn_model.parameters(), lr=learning_rate)
    rnn_train_losses, rnn_val_losses = [], []

    for epoch in range(num_epochs):
        rnn_train_loss = train(rnn_model, trn_loader, device, criterion, rnn_optimizer)
        rnn_val_loss = validate(rnn_model, val_loader, device, criterion)
        rnn_train_losses.append(rnn_train_loss)
        rnn_val_losses.append(rnn_val_loss)
 
    
    # LSTM
    lstm_model = CharLSTM(input_size=len(dataset.char2idx), hidden_size=128, output_size=len(dataset.char2idx), n_layers=2).to(device)
    lstm_optimizer = optim.Adam(lstm_model.parameters(), lr=learning_rate)
    lstm_train_losses, lstm_val_losses = [], []

    for epoch in range(num_epochs):
        lstm_train_loss = train(lstm_model, trn_loader, device, criterion, lstm_optimizer)
        lstm_val_loss = validate(lstm_model, val_loader, device, criterion)
        lstm_train_losses.append(lstm_train_loss)
        lstm_val_losses.append(lstm_val_loss)
        
        
    # Plotting
    epochs = range(1, num_epochs + 1)
    plt.plot(epochs, rnn_train_losses, label='RNN Train Loss')
    plt.plot(epochs, rnn_val_losses, label='RNN Val Loss')
    plt.plot(epochs, lstm_train_losses, label='LSTM Train Loss')
    plt.plot(epochs, lstm_val_losses, label='LSTM Val Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.title('Training and Validation Loss')
    plt.show()
    
    
    # Generate samples
    seed_characters = "The characters"
    temperature = 0.7
    rnn_samples = generate(rnn_model, seed_characters, temperature, device, dataset.char2idx, dataset.idx2char)
    lstm_samples = generate(lstm_model, seed_characters, temperature, device, dataset.char2idx, dataset.idx2char)
    
    print("\nGenerated Samples with RNN:\n")
    for sample in rnn_samples:
        print(sample)
    
    print("\nGenerated Samples with LSTM:\n")
    for sample in lstm_samples:
        print(sample)    

if __name__ == '__main__':
    main()