# Model Evaluation

In [23]:
import numpy as np
import torch

## I. Set up the model from saved results

In [None]:
with open('openpyxl.txt', 'r', encoding="utf8") as fp:
    text=fp.read()

char_set = set(text) # make a set to count the unique characters
chars_sorted = sorted(char_set)
char2int = {ch:i for i,ch in enumerate(chars_sorted)} # contains our mapping
char_array = np.array(chars_sorted)

In [29]:
device = torch.device("cpu")

In [36]:
import torch.nn as nn

class RNN(nn.Module):
    def __init__(self, vocab_size, embed_dim, rnn_hidden_size):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim) 
        self.rnn_hidden_size = rnn_hidden_size
        self.rnn = nn.LSTM(embed_dim, rnn_hidden_size, 
                           batch_first=True)
        self.fc = nn.Linear(rnn_hidden_size, vocab_size)

    def forward(self, x, hidden, cell):
        out = self.embedding(x).unsqueeze(1)
        out, (hidden, cell) = self.rnn(out, (hidden, cell))
        out = self.fc(out).reshape(out.size(0), -1)
        return out, hidden, cell

    def init_hidden(self, batch_size):
        hidden = torch.zeros(1, batch_size, self.rnn_hidden_size)
        cell = torch.zeros(1, batch_size, self.rnn_hidden_size)
        return hidden.to(device), cell.to(device)
    
vocab_size = len(char_array)
embed_dim = 256
rnn_hidden_size = 512

In [38]:
model = RNN(vocab_size, embed_dim, rnn_hidden_size)
model.load_state_dict(torch.load('model.pth', weights_only=True))
model.eval()

RNN(
  (embedding): Embedding(97, 256)
  (rnn): LSTM(256, 512, batch_first=True)
  (fc): Linear(in_features=512, out_features=97, bias=True)
)

## II. Evaluation Function

Once initialized with the the input string, we can start evaluating the sequence.  


Each new character is evaluated from previous ones. Predicted logits are associated to each word of the vocab. To ensure, we don't get the same answer everytime, we sample the predicted distribution from the logits. We introduce a ```scale_factor``` which represent the temperature parameter and control the randomness. 

If ```scale_factor > 1.0``` the model is more certain, if ```scale_factor < 1.0``` we add more randomness.

In [161]:
# use for sampling
from torch.distributions.categorical import Categorical

In [84]:
def sample(model, starting_str, len_generated_text, scale_factor):

    # 1. Intialize model with input sequence
    encoded_input = torch.tensor([char2int[s] for s in starting_str])
    encoded_input = torch.reshape(encoded_input, (1, -1))
    
    generated_str = starting_str
    
    model.eval()
    hidden, cell = model.init_hidden(1)
    for c in range(len(starting_str)-1):
        _, hidden, cell = model(encoded_input[:, c].view(1), hidden, cell) 
    
    # 2. Generate sequence successively
    last_char = encoded_input[:, -1]
    for i in range(len_generated_text):
        logits, hidden, cell = model(last_char.view(1), hidden, cell) 
        logits = torch.squeeze(logits, 0)
        scaled_logits = logits * scale_factor # temperature variable to control randomness
        m = Categorical(logits=scaled_logits)
        last_char = m.sample()
        generated_str += str(char_array[last_char])

    return generated_str

In [150]:
print(sample(model, starting_str='def', len_generated_text=500, scale_factor=10.))

default = blurRade
        self.showVertical = grouping
        self.showVertical = grouping
        self.showVertical = grouping
        self.showVertical = grouping
        self.border = value


class ConditionalFormat(Serialisable):

    tagname = "pivotCache"

    t = Set(values=(['self.conditionalFormats-vell')

    def __init__(self,
                 showVerts=None,
                 showVerts=None,
                 showVerts=None,
                 stdDevSubtotal=None,
                 showVer


In [163]:
print(sample(model, starting_str='class', len_generated_text=500, scale_factor=10.))

class ColorDescriptor(Serialisable):

    tagname = "pivotCache"

    txPr = Typed(expected_type=Series, allow_none=True)
    showVerts = Bool(allow_none=True)
    extLst = Typed(expected_type=Series, allow_none=True)
    showVerts = Bool(allow_none=True)
    showVerts = Bool(allow_none=True)
    showVerts = Bool(allow_none=True)
    showVerts = Bool(allow_none=True)
    showVerts = Bool(allow_none=True)
    showValue = Bool(allow_none=True)
    showVerts = Bool(allow_none=True)
    showVerts = Bool(


In [185]:
print(sample(model, starting_str='@', len_generated_text=500, scale_factor=5.))

@property
    def authors = None
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    


We are able to generate some text that ressemble proper code. Althoug we are far away from proper code, it is still remarkable what a simple model can achieve.