# Loading and Summarizing The Model

This notebook will load the model and summarize it so that we can understand the model better.


In [7]:
import sys
import torch

# Add the path to the parent directory to allow direct import from the gpt package
sys.path.append("../")

torch.__version__

'2.2.2'

## Use an accelerator if available


In [8]:
device = torch.device("cpu")

if torch.backends.mps.is_available():
    device = torch.device("mps")
elif torch.cuda.is_available():
    device = torch.device("cuda")

device

device(type='mps')

## Load a dataset to get an acturate vocabulary size


In [14]:
import pandas as pd
from gpt.datasets import CharacterLevelTextDataset
from pathlib import Path

TEXT_DATA_PATH = Path("../data/bible-kjv.txt")
SEQUENCE_LENGTH = 10

# Create a dataset from the text file
dataset = CharacterLevelTextDataset(TEXT_DATA_PATH, SEQUENCE_LENGTH)
characteristics = {
    "Data path": TEXT_DATA_PATH,
    "Sequence length": f"{SEQUENCE_LENGTH}",
    "Dataset length": f"{len(dataset)}",
    "Vocabulary size": f"{dataset.vocab_size}",
}

df = pd.DataFrame(characteristics.items(), columns=["Parameter", "Value"])
df

Unnamed: 0,Parameter,Value
0,Data path,../data/bible-kjv.txt
1,Sequence length,10
2,Dataset length,4351869
3,Vocabulary size,62


## Instantiate and summarize the model


In [10]:
from gpt import GPT2
from pathlib import Path
from torchinfo import summary

VOCAB_SIZE = dataset.vocab_size
SUMMARY_PATH = Path("../gpt2_summary.txt")

model = GPT2(
    vocab_size=VOCAB_SIZE,
    embedding_size=128,
    num_heads=4,
    num_layers=4,
    hidden_size=256,
).to(device)

input_ids = torch.randint(0, VOCAB_SIZE, (1, 10)).to(device)
summary = summary(model, input_data=input_ids)

with open(SUMMARY_PATH, "w") as f:
    f.write(str(summary))

summary

Layer (type:depth-idx)                   Output Shape              Param #
GPT2                                     [1, 10, 62]               --
├─Embedding: 1-1                         [1, 10, 128]              7,936
├─Embedding: 1-2                         [1, 10, 128]              128,000
├─ModuleList: 1-3                        --                        --
│    └─TransformerBlock: 2-1             [1, 10, 128]              --
│    │    └─MultiHeadAttention: 3-1      [1, 10, 128]              66,048
│    │    └─LayerNorm: 3-2               [1, 10, 128]              256
│    │    └─Dropout: 3-3                 [1, 10, 128]              --
│    │    └─Sequential: 3-4              [1, 10, 128]              65,920
│    │    └─LayerNorm: 3-5               [1, 10, 128]              256
│    │    └─Dropout: 3-6                 [1, 10, 128]              --
│    └─TransformerBlock: 2-2             [1, 10, 128]              --
│    │    └─MultiHeadAttention: 3-7      [1, 10, 128]              