In [1]:
!nvidia-smi

Fri May 12 16:33:06 2023       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 530.41.03              Driver Version: 530.41.03    CUDA Version: 12.1     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                  Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf            Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA GeForce GTX 1060         Off| 00000000:01:00.0  On |                  N/A |
| N/A   51C    P8                4W /  N/A|     63MiB /  6144MiB |      7%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                         

In [15]:
import numpy as np
import torch 
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import math



# Plan of action

## Steps
* Download the data
* Tokenizer
* Batch creator
* Create a basic forward pass
* self attention layer
* Create a training process


In [1]:
import urllib.request

# download tiny shakespeare
url = 'https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt'

In [2]:
# download the file directly to a variable
text = urllib.request.urlopen(url).read().decode('utf-8')

## Create a tokenizer at the character level

In [52]:
tokens = list(set(text))
print(len(tokens))

# Create an encoder decoder for our tokens to turn them into numbers and back
encoder_decoder = {token: i for i, token in enumerate(tokens)}
decoder_encoder = {i: token for i, token in enumerate(tokens)}

encode = lambda x: [encoder_decoder[i] for i in x]
decode = lambda x: [decoder_encoder[i] for i in x]

print(encode("hii there"))
print(decode(encode("hii there")))

65
[27, 9, 9, 52, 37, 27, 17, 23, 17]
['h', 'i', 'i', ' ', 't', 'h', 'e', 'r', 'e']


## Creating our dataset
We split the data into training and validation with 90/10 split

In [61]:
import torch
data = torch.tensor(encode(text), dtype=torch.long)

print(data[1000:])

# Split the data into training and validation sets
split_val = int(len(data) * 0.9)
train_data = data[:split_val]
val_data = data[split_val:]


tensor([54, 17, 22,  ..., 20, 11, 55])


In [62]:
len(train_data), len(val_data)

(1003854, 111540)

In [63]:
train_data[:10]

tensor([ 0,  9, 23, 60, 37, 52, 45,  9, 37,  9])

### Turning our data into batches

In [85]:
batch_size = 4
block_size = 8



def get_batch(split):
    if split == 'train':
        data = train_data
    else:
        data = val_data
    batch_start_indexes = torch.randint(len(data) - block_size, (batch_size,))
    x = torch.stack([data[i:i+block_size] for i in batch_start_indexes])
    y = torch.stack([data[i+1:i+block_size+1] for i in batch_start_indexes])
    return x,y

xb, yb = get_batch('train')

for b in range(batch_size):
    for t in range(block_size):
        x = xb[b][:t+1]
        y = yb[b][t]
        print(f"Input is {x} and target is {y}")




Input is tensor([17]) and target is 52
Input is tensor([17, 52]) and target is 29
Input is tensor([17, 52, 29]) and target is 16
Input is tensor([17, 52, 29, 16]) and target is 37
Input is tensor([17, 52, 29, 16, 37]) and target is 47
Input is tensor([17, 52, 29, 16, 37, 47]) and target is 52
Input is tensor([17, 52, 29, 16, 37, 47, 52]) and target is 3
Input is tensor([17, 52, 29, 16, 37, 47, 52,  3]) and target is 23
Input is tensor([29]) and target is 20
Input is tensor([29, 20]) and target is 55
Input is tensor([29, 20, 55]) and target is 40
Input is tensor([29, 20, 55, 40]) and target is 3
Input is tensor([29, 20, 55, 40,  3]) and target is 16
Input is tensor([29, 20, 55, 40,  3, 16]) and target is 29
Input is tensor([29, 20, 55, 40,  3, 16, 29]) and target is 52
Input is tensor([29, 20, 55, 40,  3, 16, 29, 52]) and target is 27
Input is tensor([37]) and target is 27
Input is tensor([37, 27]) and target is 34
Input is tensor([37, 27, 34]) and target is 29
Input is tensor([37, 27, 