## Import

In [62]:
import numpy as np
import json
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
import fastai
from fastai import *
from fastai.data.core import DataLoaders
from fastai.learner import Learner
from fastai.losses import CrossEntropyLossFlat
from fastai.metrics import accuracy
from fastai.optimizer import Adam
from fastai.callback.progress import ProgressCallback
from parse_preprocessed_data import get_inputs_and_targets

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

## Hyper-Parameters

In [63]:
seq_length = 50

hidden_size = 128
learning_rate = 2e-3
dropout = 0.5
batch_size = 100
num_layers = 3
max_epochs = 20
validation_prop = 0.2

early_stopping = True
patience = 20

## Load Data

In [64]:
char_to_ix, ix_to_char, vocab_size, inputs, targets = get_inputs_and_targets('data_preprocessed/mario.txt', seq_length)
vocab_size, inputs.shape, targets.shape

Unique chars: ['\n', '-', '<', '>', '?', 'B', 'E', 'Q', 'S', 'X', '[', ']', 'b', 'o', 'x']
Number of unique chars: 15


  0%|          | 0/37 [00:00<?, ?it/s]

(15, (124700, 50, 15), (124700, 50))

In [65]:
first_three_cols = inputs[0][:3 * 17]
np.savetxt('data_preprocessed/seed.txt', first_three_cols)

In [66]:
with open('data_preprocessed/char_to_ix.json', 'w+') as json_f:
    json.dump(char_to_ix, json_f)

with open('data_preprocessed/ix_to_char.json', 'w+') as json_f:
    json.dump(ix_to_char, json_f)

In [67]:
inputs.shape,inputs.dtype

((124700, 50, 15), dtype('float64'))

In [68]:
targets.shape, targets.dtype

((124700, 50), dtype('int32'))

In [70]:
# Convert inputs and targets to PyTorch tensors
inputs_tensor = torch.tensor(inputs, dtype=torch.float32)
targets_tensor = torch.tensor(targets, dtype=torch.int32)

# Create Dataset
dataset = torch.utils.data.TensorDataset(inputs_tensor, targets_tensor)

# Create DataLoader
train_dl = DataLoader(dataset, batch_size=batch_size, shuffle=True)

# For validation, ensure that you also provide inputs and targets as tensors and stack them into batches
valid_dl = DataLoader(dataset, batch_size=batch_size)

# Create DataLoaders object
dls = DataLoaders(train_dl, valid_dl)


## Model Callbacks

## Model

In [72]:
class CustomModel(nn.Module):
    def __init__(self, vocab_size, hidden_size, num_layers, dropout):
        super(CustomModel, self).__init__()
        self.lstm = nn.LSTM(input_size=vocab_size, hidden_size=hidden_size, num_layers=num_layers, dropout=dropout)
        self.dropout = nn.Dropout(dropout)
        self.linear = nn.Linear(in_features=hidden_size, out_features=vocab_size)
        self.log_softmax = nn.LogSoftmax(dim=2)

    def forward(self, x):
        output, _ = self.lstm(x)
        output = self.dropout(output)
        output = self.linear(output)  # Apply linear transformation
        output = self.log_softmax(output)
        return output

# Initialize model
model = CustomModel(vocab_size=vocab_size, hidden_size=hidden_size, num_layers=num_layers, dropout=dropout)


In [73]:
learn = Learner(dls, model, opt_func=Adam, loss_func=CrossEntropyLossFlat(), metrics=accuracy)

## Train Model

In [74]:
learn.fit(n_epoch=max_epochs, lr=learning_rate, cbs=ProgressCallback())

epoch,train_loss,valid_loss,accuracy,time


epoch,train_loss,valid_loss,accuracy,time
0,0.698476,0.684579,0.801721,00:08
1,0.677596,0.666439,0.805525,00:08
2,0.676487,0.664876,0.805048,00:08
3,0.671318,0.663819,0.805416,00:07
4,0.676062,0.663163,0.805481,00:08
5,0.673344,0.663369,0.805487,00:08
6,0.669844,0.663334,0.805218,00:08
7,0.672197,0.663267,0.805501,00:08
8,0.670393,0.662951,0.805416,00:07
9,0.671993,0.662566,0.805416,00:08


In [75]:
torch.save(model.state_dict(), 'trained_models/mario_lstm.h5')