In [102]:
import torch
import torch.nn.functional as F
import matplotlib.pyplot as plt
import time
import re
from Tokenizer import tokenize_text, separate_tokens, split_into_sentences
%matplotlib inline

In [103]:
def load_model(filename, seed=None):
    # Load the model
    checkpoint = torch.load(filename)
    C = checkpoint['C']
    W1 = checkpoint['W1']
    b1 = checkpoint['b1']
    W2 = checkpoint['W2']
    b2 = checkpoint['b2']
    itos = checkpoint['itos']
    context_size = checkpoint['context_size']
    vocab_size = checkpoint['vocab_size']
    
    device = C.device
    embedding_dim = C.shape[1]
    hidden_size = W2.shape[0]
    
    # Print dimensions for debugging
    print(f"Model dimensions:")
    print(f"C shape: {C.shape} (vocab_size, embedding_dim)")
    print(f"W1 shape: {W1.shape}")
    print(f"W2 shape: {W2.shape}")
    print(f"Block size: {context_size}")
    print(f"Embedding dim: {embedding_dim}")
    print(f"Hidden size: {hidden_size}")
    
    # Reshape W1 to handle concatenated embeddings
    W1 = W1.expand(context_size * embedding_dim, hidden_size)
    
    print(f"W1 shape after reshape: {W1.shape}")
    
    # Print model dimensions for debugging
    print(f"Shapes before processing:")
    print(f"C: {C.shape}")
    print(f"W1: {W1.shape}")
    print(f"W2: {W2.shape}")
    print(f"Block size: {context_size}")
    
    device = C.device
    embedding_dim = C.shape[1]
    hidden_size = W2.shape[0]
    
    # Instead of reshaping W1, make sure it's the correct size
    expected_w1_shape = (context_size * embedding_dim, hidden_size)
    if W1.shape != expected_w1_shape:
        raise ValueError(f"W1 shape {W1.shape} doesn't match expected shape {expected_w1_shape}")
    
    # Add this after loading the weights
    W1 = W1.reshape(context_size * C.shape[1], -1)  # Reshape to [context_size * emb_dim, hidden_size]
    
    device = C.device
    similarity_dimensions = C.shape[1]
    
    print(f"Model loaded from {filename}")
    print(f"Vocabulary size: {vocab_size}")
    print(f"Using device: {device}")
    
    # Return the loaded model components in case they're needed
    return {
        'C': C, 
        'W1': W1, 
        'b1': b1, 
        'W2': W2, 
        'b2': b2, 
        'itos': itos,
        'context_size': context_size,
        'vocab_size': vocab_size
    }

In [106]:
model_components = load_model("MLP1.w", 42)

Model dimensions:
C shape: torch.Size([46, 1]) (vocab_size, embedding_dim)
W1 shape: torch.Size([1, 4])
W2 shape: torch.Size([4, 46])
Block size: 1
Embedding dim: 1
Hidden size: 4
W1 shape after reshape: torch.Size([1, 4])
Shapes before processing:
C: torch.Size([46, 1])
W1: torch.Size([1, 4])
W2: torch.Size([4, 46])
Block size: 1
Model loaded from MLP1.w
Vocabulary size: 46
Using device: cpu


  checkpoint = torch.load(filename)


In [112]:
model_components['W1']

tensor([[ 2.3489,  2.3592, -2.2207, -2.9705]], grad_fn=<ViewBackward0>)

In [115]:
nW1 = (model_components['W1'] ** 2) ** 0.5

In [123]:
nb1 = model_components['b1'] * torch.tensor([1.0, 1.0, -1.0, -1.0])

In [124]:
nb1

tensor([-6.2453,  6.8988,  1.7038, -2.3486], grad_fn=<MulBackward0>)

In [126]:
model_components['b2']

tensor([-3.9019, -2.9001,  6.1349, -0.3616, -2.9154, -0.3167,  3.9216,  1.2724,
        -4.1372, -2.9381, -3.3050, -2.9459, -2.9054, -2.6628, -3.6120, -2.9477,
        -3.1679, -2.7918, -2.6638, -3.0004, -2.7946,  4.4621,  1.7770, -0.4613,
         2.2871,  5.9610, -0.9330,  0.2502, -1.0164,  4.0143, -2.6587,  0.5671,
         4.4169, -0.9565,  1.3396,  3.3308,  3.2504,  4.4393,  0.2887,  3.0087,
         4.3911, -1.0450, -0.8668,  4.3748, -1.6799,  0.3987],
       requires_grad=True)