In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import string
from tqdm import tqdm
import re
import random
import os

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [2]:
from gru_train import *

## Dataset

In [3]:
all_sentences = []
with open(sentences_file, 'r') as f:
    all_sentences = f.readlines()
all_sentences = [x.replace('\'', '') for x in all_sentences]
all_sentences = [re.sub('\W+', ' ', x.lower()).strip() for x in all_sentences]

all_sentences = list(set(all_sentences))

In [None]:
plt.plot(pd.DataFrame(list(' '.join(all_sentences).split(' '))).value_counts().values)
plt.yscale('log')

In [5]:
sentences = compile_sentences(sentences_file, word_count_threshold=100)


Importing sentences...


token set length:  28
unique words:  526824
word vocabulary length:  2829


In [6]:
model = GRUTextGenerator(vocab_size, embed_size, hidden_size)

In [7]:
train_dataloaders, test_dataloaders = build_datasets(sentences[:10])

## Model

In [15]:
# Example hyperparameters
vocab_size = 28  # a-z and space, \n chars
embed_size = 256
hidden_size = 512
batch_size = 64

# Create the model
model = GRUTextGenerator(vocab_size, embed_size, hidden_size)

## Training

In [None]:
train(model, train_dataloaders, test_dataloaders, vocab_size, device, epochs=epochs)

In [11]:
load_model(model, 'model_cache/model_epoch_1.pt', device=device)

Model loaded from model_cache/model_epoch_1.pt


# Inference

In [12]:
def vectorise(sentence):
    return torch.tensor([token_dict[x] for x in sentence], dtype=torch.int64, device=device)

def softmax(logits, temperature=1.0):
    scaled_logits = logits / temperature
    exp_logits = np.exp(scaled_logits - np.max(scaled_logits))
    probs = exp_logits / exp_logits.sum()
    return probs

def run_inference(model, sentence, temperature=1.0):
    model.eval()

    input = vectorise(sentence)

    output = model(input)

    logits = output[0][-1, :].detach().numpy()

    # print(logits)

    # Compute probabilities using softmax with temperature
    probs = softmax(logits, temperature)

    # Randomly sample from the probability distribution
    sampled_idx = np.random.choice(len(probs), p=probs)

    # sampled_idx = np.argmax(logits)
    # print(sampled_idx)

    # Find the corresponding token
    for a, i in token_dict.items():
        if sampled_idx == i:
            sentence += a
            return sentence

In [15]:
sentence = random.sample(string.ascii_lowercase, 1)[0]
sentence = 'children'
print(sentence, end='')

while sentence[-1] != '\n':
    sentence = run_inference(model, sentence, temperature=0.6)
    print(sentence[-1], end='')

children 

seeds


In [37]:
import numpy as np
import json
from json import JSONEncoder


In [38]:
class EncodeTensor(JSONEncoder,Dataset):
    def default(self, obj):
        if isinstance(obj, torch.Tensor):
            return obj.cpu().detach().numpy().tolist()
        return super(json.NpEncoder, self).default(obj)


In [None]:
# x = vectorise(sentence)
# y = model(x)[0]

# np.savetxt('test_data/generative_gru_x.csv', x.detach(), delimiter=',')
# np.savetxt('test_data/generative_gru_y.csv', y.detach(), delimiter=',')

with open('models/generative_gru.json', 'w') as json_file:
    json.dump(model.state_dict(), json_file,cls=EncodeTensor)