## Resources
tutorial: https://www.youtube.com/watch?v=kCc8FmEb1nY

In [2]:
import torch
import torch.nn.functional as F
import torch.optim as optim
import torch.nn as nn
from torch.utils.data.dataloader import DataLoader

import numpy as np
import pandas as pd

import matplotlib
import matplotlib.pyplot as plt

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
from tokenizer import SimpleTokenizer
torch.manual_seed(1337)
from dataset import SpeechesClassificationDataset
from tokenizer import SimpleTokenizer

from utilities import *

from hyperparams import *

import nltk

from transformer import CustomTransformerDecoder, CustomTransformerEncoder

from ray import train, tune
# nltk.download('punkt', download_dir="../data/nltk_punkt")
# nltk.data.path.append("../data/nltk_punkt")

## Tutorial Scratch Space

#### Self-Attention

In [16]:
torch.manual_seed(1337)
# batch size, embedding
#every token here has a two dimensional embedding
#8 tokens per batch (8x2 matrices)
# 4 batches 
B, T, C = 4, 8, 2
x = torch.randn(B, T, C)
x.shape

torch.Size([4, 8, 2])

## HW2 Scratch Space

In [17]:
with open("../data/speechesdataset/train_LM.txt", "r") as f:
    text = f.read()


# scd = SpeechesClassificationDataset(tokenizer=tok, file_path="data/train_CLS.tsv")

In [18]:
tok = SimpleTokenizer(text=text)

### Positional Encoding

In [19]:
data_dir = os.path.join("..", "data", "speechesdataset")
print("Loading data and creating tokenizer ...")
texts = load_texts('../data/speechesdataset')
tokenizer = SimpleTokenizer(' '.join(texts)) # create a tokenizer from the data
print("Vocabulary size is", tokenizer.vocab_size)

train_CLS_dataset = SpeechesClassificationDataset(tokenizer, os.path.join(data_dir, "train_CLS.tsv"))
train_CLS_loader = DataLoader(train_CLS_dataset, batch_size=batch_size,collate_fn=collate_batch,shuffle=True)

vocab_size = tokenizer.vocab_size

Loading data and creating tokenizer ...
Vocabulary size is 5755


In [20]:
position = torch.arange(block_size).unsqueeze(1)
div_term = torch.exp(torch.arange(0, n_embd, 2) * (-np.log(10000.0) / n_embd))
embedding = nn.Embedding(vocab_size, n_embd)
pe = torch.zeros(block_size, n_embd)
pe[:, 0::2] = torch.sin(position * div_term)
pe[:, 1::2] = torch.cos(position * div_term)

In [21]:
device

device(type='cuda')

In [22]:
from transformer import CustomTransformerEncoder
cte2 = CustomTransformerDecoder(device, vocab_size, n_embd, n_head, n_layer, n_hidden).to(device)

In [23]:
# plot_dir = os.path.join("..", "data","plots", "part1")
# u = Utilities(tokenizer, cte_trained, plot_dir, device)
# u.sanity_check("The quick brown fox jumped over the lazy dog.", block_size=block_size)
# u.sanity_check("Doing the same thing and expecting different results is insanity.", block_size=block_size)

In [24]:
sentence = "The quick brown fox jumped over the lazy dog."
encoding = tokenizer.encode(sentence)

In [25]:
padded_sentence = encoding[:block_size] + [0] * (block_size - len(encoding))

In [26]:
input_tensor = torch.tensor(padded_sentence, dtype=torch.long).unsqueeze(0).to(device)

In [27]:
cte2(input_tensor)

(tensor([[nan, nan, nan,  ..., nan, nan, nan]], device='cuda:0',
        grad_fn=<AddmmBackward0>),
 [tensor([[[0.0000, 0.0292, 0.0193,  ..., 0.0236, 0.0279, 0.0273],
           [0.0000, 0.0000, 0.0363,  ..., 0.0395, 0.0413, 0.0343],
           [0.0000, 0.0000, 0.0000,  ..., 0.0378, 0.0344, 0.0329],
           ...,
           [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.4417, 0.5583],
           [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 1.0000],
           [   nan,    nan,    nan,  ...,    nan,    nan,    nan]]],
         device='cuda:0', grad_fn=<SoftmaxBackward0>),
  tensor([[[nan, nan, nan,  ..., nan, nan, nan],
           [nan, nan, nan,  ..., nan, nan, nan],
           [nan, nan, nan,  ..., nan, nan, nan],
           ...,
           [nan, nan, nan,  ..., nan, nan, nan],
           [nan, nan, nan,  ..., nan, nan, nan],
           [nan, nan, nan,  ..., nan, nan, nan]]], device='cuda:0',
         grad_fn=<SoftmaxBackward0>),
  tensor([[[nan, nan, nan,  ..., nan, nan, nan],
         

In [76]:
x = torch.Tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
mask = torch.Tensor([[1, 0, 0], [0, 1, 1], [0, 1, 0]]).to(torch.bool)

In [77]:
x = x.masked_fill(mask, float("-inf"))

In [78]:
x

tensor([[-inf, 2., 3.],
        [4., -inf, -inf],
        [7., -inf, 9.]])

In [79]:
t = torch.tril(torch.ones(x.size(), dtype=torch.bool))

In [80]:
t

tensor([[ True, False, False],
        [ True,  True, False],
        [ True,  True,  True]])

In [81]:
x.masked_fill(t, 36)

tensor([[36.,  2.,  3.],
        [36., 36., -inf],
        [36., 36., 36.]])

### Plotting

In [None]:
# # Plotting

# #
# plt.figure(figsize=(10, 5))

# # Plot Loss
# plt.subplot(1, 2, 1)
# plt.plot(df['epoch'], df['loss'], label='Loss', color='blue')
# plt.title('Loss vs Epoch')
# plt.xlabel('Epoch')
# plt.ylabel('Loss')
# plt.legend()