In [1]:
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import pandas as pd
from tqdm import tqdm


from models import SimpleMLP, SimpleCNN, CVAE_MLP, CVAE_CNN

from utils import experiment_classif_simple, format_results, \
    plot_loss_acc_over_epochs, plot_time_vs_parameters, \
    train_cvae, generate_digit


if torch.cuda.is_available():
    device = torch.device("cuda")
elif torch.backends.mps.is_available():
    device = torch.device("mps")
else:
    device = torch.device("cpu")
# device = 'cpu'
print(f"Using device: {device}")

Using device: mps


## Basic operations

In [2]:
_ = 1 + 1

In [3]:
for _ in tqdm(range(10000000)):
    _ = 1 + 1

100%|██████████| 10000000/10000000 [00:00<00:00, 14244445.36it/s]


## Setup MNIST Experiment

### Dataset

In [None]:
transform = transforms.Compose([
    transforms.ToTensor()
])

train_dataset = datasets.MNIST(
    root="./data", train=True, download=True, transform=transform
)
test_dataset = datasets.MNIST(
    root="./data", train=False, download=True, transform=transform
)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=1000, shuffle=False)

# Plot examples of digits (with true labels)
examples = enumerate(test_loader)
batch_idx, (example_data, example_targets) = next(examples)
fig = plt.figure()
for i in range(4):
    plt.subplot(1, 4, i + 1)
    plt.imshow(example_data[i].cpu().squeeze(), cmap='gray')
    plt.title(f"True: {example_targets[i].item()}")
    plt.axis("off")
plt.tight_layout()

## Classification tasks

### Expe - Simple MLP

In [5]:
hidden_dims_tested = [
    [32, 16], 
    [64, 32],
    [128, 64, 32]
]
all_outputs_mlp = []
all_models_mlp = []

for hidden_dims in hidden_dims_tested:
    print(f"Training SimpleMLP with hidden dimensions: {hidden_dims}")
    model = SimpleMLP(hidden_dims=hidden_dims)
    output = experiment_classif_simple(
        SimpleMLP(hidden_dims=hidden_dims), 
        train_loader, test_loader,
        nbr_epochs=10, device=device,
        run_name=f"SimpleMLP_{'_'.join(map(str, hidden_dims))}"
    )
    all_outputs_mlp.append(output)
    all_models_mlp.append(model)

results_long_mlp, results_summary_mlp = format_results(all_outputs_mlp)

Training SimpleMLP with hidden dimensions: [32, 16]


Epochs: 100%|█████| 10/10 [00:50<00:00,  5.10s/it]


Training SimpleMLP with hidden dimensions: [64, 32]


Epochs: 100%|█████| 10/10 [00:44<00:00,  4.42s/it]


Training SimpleMLP with hidden dimensions: [128, 64, 32]


Epochs: 100%|█████| 10/10 [00:51<00:00,  5.20s/it]


### Expe - Simple CNN

In [6]:
hidden_channels_tested = [
    [8], 
    [16, 16]
]
all_outputs_cnn = []
all_models_cnn = []

for hidden_chans in hidden_channels_tested:
    print(f"Training SimpleCNN with hidden channels: {hidden_chans}")
    model = SimpleCNN(hidden_channels=hidden_chans)
    output = experiment_classif_simple(
        model, 
        train_loader, test_loader,
        nbr_epochs=10, device=device,
        run_name=f"SimpleCNN_{'_'.join(map(str, hidden_chans))}"
    )
    all_outputs_cnn.append(output)
    all_models_cnn.append(model)

results_long_cnn, results_summary_cnn = format_results(all_outputs_cnn)

Training SimpleCNN with hidden channels: [8]


Epochs: 100%|█████| 10/10 [00:46<00:00,  4.65s/it]


Training SimpleCNN with hidden channels: [16, 16]


Epochs: 100%|█████| 10/10 [00:51<00:00,  5.10s/it]


### Summary

In [7]:
results_summary = pd.concat([results_summary_mlp, results_summary_cnn], ignore_index=True)
results_long = pd.concat([results_long_mlp, results_long_cnn], ignore_index=True)

In [None]:
plot_time_vs_parameters(results_summary)

In [None]:
plot_loss_acc_over_epochs(results_long)

## Image generation tasks

### CVAE

In [10]:
cvae_mlp = CVAE_MLP(hidden_dims=[400, 100], latent_dim=20)

num_params = sum(p.numel() for p in cvae_mlp.parameters() if p.requires_grad)
print(f"CVAE_MLP number of parameters: {num_params}")

train_cvae(cvae_mlp, train_loader, epochs=10, device=device)

CVAE_MLP number of parameters: 720024
Epoch 1 | loss = 2.4360
Epoch 2 | loss = 1.8473
Epoch 3 | loss = 1.7306
Epoch 4 | loss = 1.6750
Epoch 5 | loss = 1.6410
Epoch 6 | loss = 1.6187
Epoch 7 | loss = 1.6012
Epoch 8 | loss = 1.5875
Epoch 9 | loss = 1.5765
Epoch 10 | loss = 1.5675


In [None]:
all_imgs_mlp = generate_digit(cvae_mlp, n_samples=8, device=device)

In [12]:
cvae_cnn = CVAE_CNN(hidden_channels=[32, 64, 128], latent_dim=20)

num_params = sum(p.numel() for p in cvae_cnn.parameters() if p.requires_grad)
print(f"CVAE_CNN number of parameters: {num_params}")

train_cvae(cvae_cnn, train_loader, epochs=5, device=device)

CVAE_CNN number of parameters: 1969609
Epoch 1 | loss = 1.9717
Epoch 2 | loss = 1.5769
Epoch 3 | loss = 1.5343
Epoch 4 | loss = 1.5104
Epoch 5 | loss = 1.4934


In [None]:
all_imgs_cnn = generate_digit(cvae_cnn, n_samples=8, device=device)

## Text generation tasks

In [14]:
from transformers import BertTokenizer, BertForMaskedLM

# Load pretrained BERT tokenizer and model
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
model = BertForMaskedLM.from_pretrained("bert-base-uncased").to(device)

num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"BERT number of parameters: {num_params}")

model.eval()

# Example: mask a word
text_examples = [
    "The capital of France is [MASK].",
    "The largest planet in our solar system is [MASK].",
    "The most passionate programming language is [MASK]."
]

for text in text_examples:
    inputs = tokenizer(text, return_tensors="pt").to(device)

    with torch.no_grad():
        outputs = model(**inputs)
        predictions = outputs.logits

    # Get the token id with the highest probability at the masked position
    mask_token_index = (inputs.input_ids == tokenizer.mask_token_id)[0].nonzero(as_tuple=True)[0]
    predicted_token_id = predictions[0, mask_token_index, :].argmax(dim=-1)
    
    top_k = 5
    top_k_token_ids = predictions[0, mask_token_index, :].topk(top_k).indices
    top_k_token_probs = predictions[0, mask_token_index, :].topk(top_k).values
    str_top_k = [f"{tokenizer.decode([token_id])} ({prob.item():.1f})" for token_id, prob in zip(top_k_token_ids[0], top_k_token_probs[0])]

    predicted_token = tokenizer.decode(predicted_token_id)
    print("--------------------------------")
    print(f"Original text: {text}")
    print(f"Predicted token (top {top_k}): {str_top_k}")
    print(f"Filled sentence: {text.replace(tokenizer.mask_token, predicted_token)}")


  from .autonotebook import tqdm as notebook_tqdm
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForMaskedLM: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


BERT number of parameters: 109514298
--------------------------------
Original text: The capital of France is [MASK].
Predicted token (top 5): ['paris (12.3)', 'lille (10.6)', 'lyon (10.5)', 'marseille (10.1)', 'tours (9.7)']
Filled sentence: The capital of France is paris.
--------------------------------
Original text: The largest planet in our solar system is [MASK].
Predicted token (top 5): ['earth (10.3)', 'pluto (10.2)', 'jupiter (9.9)', 'mars (9.9)', 'saturn (9.3)']
Filled sentence: The largest planet in our solar system is earth.
--------------------------------
Original text: The most passionate programming language is [MASK].
Predicted token (top 5): ['java (10.5)', 'python (9.9)', 'c (9.8)', 'english (9.0)', 'php (7.4)']
Filled sentence: The most passionate programming language is java.


In [15]:
from transformers import GPT2Tokenizer, GPT2LMHeadModel

# Load GPT-2 tokenizer and model
gpt2_tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
gpt2_model = GPT2LMHeadModel.from_pretrained("gpt2").to(device)

num_params = sum(p.numel() for p in gpt2_model.parameters() if p.requires_grad)
print(f"GPT-2 number of parameters: {num_params}")

gpt2_model.eval()

# Example prompt
prompt_examples = [
    "Once upon a time in a galaxy far, far away",
    "In the future, artificial intelligence will",
    "The secret to a happy life is"
]

for prompt in prompt_examples:
    inputs = gpt2_tokenizer(prompt, return_tensors="pt").to(device)

    # Generate text
    with torch.no_grad():
        output_ids = gpt2_model.generate(
            inputs["input_ids"],
            max_length=100,
            num_return_sequences=1,
            do_sample=True,
            top_k=50,
            top_p=0.95
        )

    generated_text = gpt2_tokenizer.decode(output_ids[0], skip_special_tokens=True)
    print("--------------------------------")
    print("Prompt:", prompt)
    print("Generated:", generated_text)


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


GPT-2 number of parameters: 124439808


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


--------------------------------
Prompt: Once upon a time in a galaxy far, far away
Generated: Once upon a time in a galaxy far, far away, there was a galaxy with a vast number of planets. These celestial objects formed when light and heat were created. They created the universe in such a way that our world may become completely uninhabitable. They are also known as "supernovas."

Supernovas are the first supermassive black holes in the observable Universe that have the ability to grow at incredible speeds in a matter of minutes. The expansion of the black hole is


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


--------------------------------
Prompt: In the future, artificial intelligence will
Generated: In the future, artificial intelligence will be the most important technology that could change the way we work, play, and travel our daily lives. It will bring a lot of benefits to our society as a whole, and will make it possible for everyone. So, the future is bright!"

The report also states that artificial intelligence can take jobs from humans and robots to new heights.

Dr. David Wieber, the chief executive of the University of Wisconsin, wrote, "By the
--------------------------------
Prompt: The secret to a happy life is
Generated: The secret to a happy life is never to disappoint.
