In [1]:
import sys
import pickle
import torch
import torch.nn as nn
import torch.nn.functional as F

import numpy as np


In [2]:
# set up the environment
sys.path.insert(0, 'models')

device = 'cuda' if torch.cuda.is_available() else 'cpu'

# Load model args and state dict from saved checkpoint

In [3]:
# the model was trained on a GeForce 3060 Ti GPU for 5000 steps with a batch size of 16
checkpoint_pt = 'out-shakespeare-char/ckpt.pt'
checkpoint = torch.load(checkpoint_pt)
model_state_dict = checkpoint['model']
model_args = checkpoint['model_args']

In [4]:
# need to modify the state_dict a bit if they were saved from a compiled model
# pytorch automatically adds a prefix of _orig_mod. to keys in state dict if the model has been compiled
unwanted_prefix = '_orig_mod.'
for k, v in list(model_state_dict.items()):
    if k.startswith(unwanted_prefix):
        model_state_dict[k[len(unwanted_prefix):]] = model_state_dict.pop(k)

In [5]:
from DecoderModels import MiniGPT

model = MiniGPT(**model_args)
model.load_state_dict(model_state_dict)
model.to(device)

MiniGPT(
  (input_embedding): Embedding(65, 768)
  (position_embedding): LearnablePositionEmbedding(
    (PE): Embedding(128, 768)
  )
  (drop): Dropout(p=0.2, inplace=False)
  (decoder): ModuleList(
    (0-11): 12 x DecoderLayer(
      (self_attn): MultiHeadSelfAttention(
        (heads): ModuleList(
          (0-11): 12 x SelfAttentionHead(
            (M_key): Linear(in_features=768, out_features=64, bias=False)
            (M_query): Linear(in_features=768, out_features=64, bias=False)
            (M_value): Linear(in_features=768, out_features=64, bias=False)
            (attn_dropout): Dropout(p=0.2, inplace=False)
          )
        )
        (projection): Linear(in_features=768, out_features=768, bias=True)
        (resid_dropout): Dropout(p=0.2, inplace=False)
      )
      (ffn): FeedForward(
        (ffn): Sequential(
          (0): Linear(in_features=768, out_features=3072, bias=True)
          (1): GELU(approximate='none')
          (2): Linear(in_features=3072, out_featu

In [6]:
model_args

{'num_layers': 12,
 'num_heads': 12,
 'embed_size': 768,
 'ctx_length': 128,
 'bias': False,
 'vocab_size': 65,
 'dropout': 0.2}

## Load dataset and dataloader

In [7]:
val_data = np.memmap('data/shakespeare_char/val.bin', dtype=np.uint16, mode='r')

with open('data/shakespeare_char/meta.pkl', 'rb') as m:
    meta = pickle.load(m)

def decode(l):
    return ''.join([meta['itos'][i] for i in l])
    
def encode(s):
    return [meta['stoi'][c] for c in s]

In [8]:
ctx_length = model_args['ctx_length']
batch_size = 16
def get_batch(split):
    data = train_data if split == 'train' else val_data
    data_len = (len(data) - 1) // ctx_length * ctx_length + 1
    data = data[:data_len]
    chunk_size = batch_size * ctx_length
    for i in range(0, data_len, chunk_size):
        end = min(data_len-1, i+chunk_size)
        x = torch.from_numpy(data[i:end].astype(np.int64)).reshape(-1, ctx_length)
        y = torch.from_numpy(data[i+1:end+1].astype(np.int64)).reshape(-1, ctx_length)
#         x = torch.stack([torch.from_numpy((data[i:i+ctx_length]).astype(np.int64)) for i in ix])
#         y = torch.stack([torch.from_numpy((data[i+1:i+1+ctx_length]).astype(np.int64)) for i in ix])
        x, y = x.to(device), y.to(device)
        yield x, y

## generate some examples

In [9]:
text = '\n'
seq = torch.Tensor([encode(text)]).to(torch.long).to('cuda')
for t in [0.5, 1.0, 1.5, 2.0]:
    result = model.generate(seq, 100, temperature=t, top_k=5)
    print(decode(result[0].tolist()))
    print("**********")


That shall she shall dine thee from the world.

Second Servingman:
The heavens to see the constate o
**********

Thou and transmen on and this sill-day of mark,
That his stones to the world.

Servant:
Well, what w
**********

And marry a murdder these forghasts, and tender former lights.

GLOUCESTER:
How far, teach the truth
**********

I'll sue task of a fight tidow trimpets the feast
Whatsoe's bust aganst to burnet, an order, stear o
**********


In [10]:
text = 'These violent delights'
seq = torch.Tensor([encode(text)]).to(torch.long).to('cuda')
for t in [0.5, 1.0, 1.5, 2.0]:
    result = model.generate(seq, 100, temperature=t, top_k=5)
    print(decode(result[0].tolist()))
    print("**********")

These violent delights, which they have made
Their fellows from their faces of their friends,
And their father was so far 
**********
These violent delights of the priest.

Lord Mayor:
And where is he then so free thine eyes,
The date of hope of traitors, 
**********
These violent delights, and wert the foest of speak triumph
That wash him they hear; what worst their flant,
Both, at her 
**********
These violent delights arit,
Bud thee, and thy,' taken, to be so,--all trees!--
They was declined without, at
Time shoroug
**********


In [11]:
text = 'To be or not to be'
seq = torch.Tensor([encode(text)]).to(torch.long).to('cuda')
for t in [0.5, 1.0, 1.5, 2.0]:
    result = model.generate(seq, 100, temperature=t, top_k=5)
    print(decode(result[0].tolist()))
    print("**********")

To be or not to be so.

KING RICHARD II:
So did I see the king of his honours
That hath beheld the heart of his highne
**********
To be or not to be a good of soul,
To seek a fault, as it is that are alms,
That thou shalt have something the happy d
**********
To be or not to beet holy trust again
The strange of the solemness of all
Of treacons them, all their side arm,
Turn'd
**********
To be or not to be set in somewharges. Then
still weeps your friendlot-peaction flocks me at tears
attends:--

POLIMER
**********


In [12]:
text = 'I like the movie Barbie'
seq = torch.Tensor([encode(text)]).to(torch.long).to('cuda')
for t in [0.5, 1.0, 1.5, 2.0]:
    result = model.generate(seq, 100, temperature=t, top_k=5)
    print(decode(result[0].tolist()))
    print("**********")

I like the movie Barbies and means,
To see the firest speech of the world:
We are not the crown for the fiery son,
And the 
**********
I like the movie Barbies and fearful that trence
In this forty brats any out.

POLIXENES:
O, with some foemile, will not st
**********
I like the movie Barbies
To leave it the wenches, and stale to tasquisities,--

CATESBY:
And, stay, mine own lords thapes t
**********
I like the movie Barbie thine;
Alack, a voint, be groat and loog,
Will so fail, as I am, is at,--
Than alrah o'er y'er hast
**********


## Quantitative Analysis

In [13]:
dataloader = get_batch('val')
losses = []
perplexity = 0
model.eval()
for inputs, labels in dataloader:
    _, loss = model(inputs, labels)
    losses.append(loss.item())
    
losses = np.array(losses)
print(f"Average model loss on the validation split is: {losses.mean()}")

Average model loss on the validation split is: 1.484342564236034
