<a href="https://colab.research.google.com/github/adamoosya/182Proj/blob/main/run.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import torch.nn as nn
from torch.nn import functional as F
import numpy as np
import csv
import os
import matplotlib.pyplot as plt

In [2]:
torch.manual_seed(1337)

<torch._C.Generator at 0x7e0578592bb0>

# Download modules

In [3]:
if not os.path.exists('checkpoint'):
    os.makedirs('checkpoint')

url = f"https://raw.githubusercontent.com/adamoosya/182Proj/main/hyperparameters.py"
!wget --no-cache --backups=1 {url}
from hyperparameters import BATCH_SIZE, BLOCK_SIZE, MAX_ITERS, EVAL_INTERVAL, LEARNING_RATE, EVAL_ITERS, N_EMBD, N_HEAD, \
  N_LAYER, DROPOUT, DEVICE, DATA_CATEGORIES, TRAIN_DATA_CATEGORIES, VAL_DATA_CATEGORIES

url = f"https://raw.githubusercontent.com/adamoosya/182Proj/main/tokenizer.py"
!wget --no-cache --backups=1 {url}
from tokenizer import tokenize, detokenize, IS_TO_TOKEN, AS_TOKEN, END_TOKEN, VOCAB_SIZE, CHAR_TO_TOKEN, TOKEN_TO_CHAR

url = f"https://raw.githubusercontent.com/adamoosya/182Proj/main/dataloader.py"
!wget --no-cache --backups=1 {url}
from dataloader import get_context_test, get_context_example, get_batch

url = f"https://raw.githubusercontent.com/adamoosya/182Proj/main/model.py"
!wget --no-cache --backups=1 {url}
from model import estimate_loss, GPTLanguageModel

url = f"https://raw.githubusercontent.com/adamoosya/182Proj/main/data.py"
!wget --no-cache --backups=1 {url}
from data import load_data

--2025-05-04 07:50:31--  https://raw.githubusercontent.com/adamoosya/182Proj/main/hyperparameters.py
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 3279 (3.2K) [text/plain]
Failed to rename hyperparameters.py to hyperparameters.py.1: (2) No such file or directory
Saving to: ‘hyperparameters.py’


2025-05-04 07:50:32 (62.9 MB/s) - ‘hyperparameters.py’ saved [3279/3279]

--2025-05-04 07:50:32--  https://raw.githubusercontent.com/adamoosya/182Proj/main/tokenizer.py
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.111.133, 185.199.108.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.111.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 815 [text/plain]
Fai

# Get morphological transformations

In [4]:
DATA, TRAIN_DATA, VAL_DATA = load_data()

# Store loss and accuracy for graphing purposes

In [5]:
losses = {
    'train': {category: [] for category in TRAIN_DATA_CATEGORIES},
    'val': {category: [] for category in VAL_DATA_CATEGORIES}
}
accuracies = {
    'train': {category: [] for category in TRAIN_DATA_CATEGORIES},
    'val': {category: [] for category in VAL_DATA_CATEGORIES}
}

losses['train'][None] = []
accuracies['train'][None] = []
losses['val'][None] = []
accuracies['val'][None] = []

# Run model

In [6]:
def evaluate(model, dataset, category=None, num_to_evaluate=100):
    correct = 0
    total = 0
    for _ in range(num_to_evaluate):
        context, target = get_context_test(dataset, category=category)
        if model.test(context, target):
            correct += 1
        total += 1
    return correct / total

In [7]:
model = GPTLanguageModel()
model = model.to(DEVICE)
# print the number of parameters in the model
print(sum(p.numel() for p in model.parameters())/1e6, 'M parameters')

# create a PyTorch optimizer
optimizer = torch.optim.AdamW(model.parameters(), lr=LEARNING_RATE)

10.762783 M parameters


In [8]:
for iter in range(1000):

    # every once in a while evaluate the loss on train and val sets
    if iter % EVAL_INTERVAL == EVAL_INTERVAL-1:

      train_score = evaluate(model, TRAIN_DATA, category=None)
      print(f"Epoch: {iter}, Train Score: {train_score:.4f}")

      val_score = evaluate(model, VAL_DATA, category=None)
      print(f"Epoch: {iter}, Validation Score: {val_score:.4f}")

      # for split in losses:
      #   for category in losses[split]:
      #     losses[split][category].append(estimate_loss(model, split, category))
      #     accuracies[split][category].append(evaluate(model, split, category))

    # sample a batch of data
    xb, yb, mask = get_batch(TRAIN_DATA)

    # evaluate the loss
    logits, loss = model(xb, yb, mask)
    optimizer.zero_grad(set_to_none=True)
    loss.backward()
    optimizer.step()

Epoch: 499, Train Score: 0.0500
Epoch: 499, Validation Score: 0.0600
Epoch: 999, Train Score: 0.2900
Epoch: 999, Validation Score: 0.3600


# Statistics

In [39]:
context, target = get_context_test(TRAIN_DATA, category=None)
model.test(context, target, output=True)

Context: bluer#bluest$definer#, Prediction: definest, Expected: definest


True

In [None]:
# prompt: I want you to create a bunch of plots in matplotlib. I want to plot the train losses. The losses can be found in losses['train'] for each category. All the losses should be grouped up together in the same plot. The losses for category None should be 4x4 and on the left hand side. It should be labelled "Train Loss". There are 16 other categories. Those should all be put in a 4x4 grid next to the None category, and each should be 1x1. The x and y max and min values for all the plots should be standardized. The y value should be between 0 and 1, and the x value should be between 0 and 10.  The losses graph for the None category should be 4 times the size as the losses for the other categories

import matplotlib.pyplot as plt

# Assuming 'losses' dictionary is already populated as in your provided code.

fig, axes = plt.subplots(4, 5, figsize=(20, 16))  # 4x5 grid for plots

# Plot for category 'None' (larger size)
axes[0, 0].plot(losses['train'][None])
axes[0, 0].set_title("Train Loss (None)")
axes[0, 0].set_xlabel("Iteration")
axes[0, 0].set_ylabel("Loss")
axes[0, 0].set_xlim(0, 10)
axes[0, 0].set_ylim(0, 1)

# Merge the remaining subplots
for i in range(4):
    for j in range(1,5): # start from index 1
        ax = axes[i][j]
        category_index = i * 4 + j # Calculate the index to use for other categories

        if category_index < len(TRAIN_DATA_CATEGORIES) and TRAIN_DATA_CATEGORIES[category_index] != None:
          category = TRAIN_DATA_CATEGORIES[category_index]
          ax.plot(losses['train'][category])
          ax.set_title(f"Train Loss ({category})")
          ax.set_xlabel("Iteration")
          ax.set_ylabel("Loss")
          ax.set_xlim(0, 10)
          ax.set_ylim(0, 1)
        else:
          ax.axis('off') # Hide the empty subplots


plt.tight_layout()  # Adjust spacing between subplots
plt.show()
