In [1]:
# imports
import os
import numpy as np
import time
from tinygrad import Tensor
from tinygrad import Device
import matplotlib.pyplot as plt # for making figures
%matplotlib inline

Device.DEFAULT = "GPU" # set to tinygrad backend to GPU since METAL doesn't work on older intel macs
print(Device.DEFAULT)

GPU


In [2]:
# import and clean pokemon name data

import csv

moves = []

with open('Pokemon_moves.csv', newline='') as csvfile:
    reader = csv.reader(csvfile)
    next(reader) # skips header row
    for row in reader:
        if len(row) > 1:
            moves.append(row[0])

print(f'first five moves in dataset:')
print(moves[:5])
print(f'total number of moves: {len(moves)}')

first five moves in dataset:
['10,000,000 Volt Thunderbolt', 'Absorb', 'Accelerock', 'Acid', 'Acid Armor']
total number of moves: 724


In [3]:
# build the vocabulary of characters and mappings to/from integers
chars = sorted(list(set(''.join(moves))))
# stoi = string to int, itos = int to string
stoi = {s:i+1 for i,s in enumerate(chars)}
stoi['.'] = 0 # set . as end char, since all other end chars are already used
itos = {i:s for s,i in stoi.items()}
print(itos)

# finding # of unique chars so we can set our Tensor dim. later
all_chars = set(''.join(stoi))
num_unique_chars = len(all_chars)

print('num_unique_chars =', num_unique_chars)
print('all_chars = ', all_chars)

{1: ' ', 2: "'", 3: ',', 4: '-', 5: '0', 6: '1', 7: '2', 8: '7', 9: 'A', 10: 'B', 11: 'C', 12: 'D', 13: 'E', 14: 'F', 15: 'G', 16: 'H', 17: 'I', 18: 'J', 19: 'K', 20: 'L', 21: 'M', 22: 'N', 23: 'O', 24: 'P', 25: 'Q', 26: 'R', 27: 'S', 28: 'T', 29: 'U', 30: 'V', 31: 'W', 32: 'X', 33: 'Y', 34: 'Z', 35: 'a', 36: 'b', 37: 'c', 38: 'd', 39: 'e', 40: 'f', 41: 'g', 42: 'h', 43: 'i', 44: 'k', 45: 'l', 46: 'm', 47: 'n', 48: 'o', 49: 'p', 50: 'q', 51: 'r', 52: 's', 53: 't', 54: 'u', 55: 'v', 56: 'w', 57: 'x', 58: 'y', 59: 'z', 0: '.'}
num_unique_chars = 60
all_chars =  {"'", 'v', 'F', 'e', ',', 'o', 'G', 'A', 'c', 'S', 'k', 'w', 's', 'l', 'I', 'z', 'p', 'r', '-', 'B', 'O', 'K', 'M', 'H', 'P', 'V', 'x', 'Q', 'D', 'W', 'y', 'q', 'E', 't', 'C', 'h', ' ', 'J', 'U', 'X', 'g', 'N', '0', 'T', '1', 'b', 'Z', 'Y', 'a', 'i', 'f', 'n', 'm', '.', 'd', 'R', 'u', 'L', '7', '2'}


In [4]:
# build the dataset ( in terms of train, val, and test sets )

block_size = 8 # context length: how many chars do we take to predict the next on?

def build_dataset(moves):
    
    X, Y = [], [] # X = inputs, Y = labels
    
    for n in moves:
    
        #print(n)
        context = [0] * block_size # start with padded context
    
        # iter over all chars
        for ch in n + '.':
            ix = stoi[ch] # get char in sequence
            X.append(context) # stores current running context
            Y.append(ix) # stores current char
            #print(''.join(itos[i] for i in context), '--->', itos[ix])
            context = context[1:] + [ix] # crop and append (rolling window of context)
    
    X = Tensor(X)
    Y = Tensor(Y)
    print(X.shape, Y.shape)
    return X, Y

import random
random.shuffle(moves)
n1 = int(0.8*len(moves))
n2 = int(0.9*len(moves))

Xtr, Ytr = build_dataset(moves[:n1])
Xdev, Ydev = build_dataset(moves[n1:n2])
Xte, Yte = build_dataset(moves[n2:])

(6463, 8) (6463,)
(753, 8) (753,)
(820, 8) (820,)


In [5]:
### boilerplate done, now we get to the action: ###

In [None]:
# utility function we will use later when comparing manual gradients to tinygrad gradients
def cmp(s, dt, t):
    ex = Tensor.