# **Introduction**

Wandb workspace URL: https://wandb.ai/fabian-dubach-hochschule-luzern/experiment-tracking/runs/gtg68kwg/workspace?nw=nwuserfabiandubach

# **Setup**


### WandDB

In [21]:
import wandb
from lightning.pytorch.loggers import WandbLogger

wandb.login(key="")
wandb_logger = WandbLogger(project="experiment-tracking")



In [22]:

def init_wandb_run(project_name, run_name, config_dict):
    run = wandb.init(
        project=project_name,
        name=run_name,
        config=config_dict,
    )
    return run


#### Example Run
experiment_1_config = {
    "learning_rate": 0.001,
    "epochs": 100,
    "batch_size": 64,
    "optimizer": "adam"
}
run1 = init_wandb_run("experiment-tracking", "experiment_1", experiment_1_config)


# **Preprocessing**

### Import all libraries needed

In [23]:
import torch
import torch.nn as nn
from tqdm import trange
from torch.utils.data import DataLoader, Dataset
from datasets import load_dataset
import gensim.downloader as api
import numpy as np
import nltk

In [24]:
SEED = 42

np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

### Load the dataset from HuggingFace

In [25]:
train = load_dataset("tau/commonsense_qa", split="train[:-1000]")
valid = load_dataset("tau/commonsense_qa", split="train[-1000:]")
test = load_dataset("tau/commonsense_qa", split="validation")

print(len(train), len(valid), len(test))

8741 1000 1221


In [26]:
nltk.download('punkt_tab')

[nltk_data] Downloading package punkt_tab to
[nltk_data]     C:\Users\fabia\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


True

In [27]:
glove_model = api.load('glove-wiki-gigaword-300')

In [28]:
def preprocessing(text):
    tokens = nltk.tokenize.word_tokenize(text)
    return tokens

In [29]:
def get_embedding(sentence):
    """Convert a sentence into a fixed-size vector by averaging word embeddings."""
    tokens = preprocessing(sentence)
    tokens = [token.lower() for token in tokens]
    word_vectors = []
    for token in tokens:
        # If the word exists in the GloVe model, get its vector
        if token in glove_model:
            word_vectors.append(glove_model[token])
        else:
            # Use a zero vector for missing words
            word_vectors.append(np.zeros(glove_model.vector_size))
    
    # Return the mean of the word vectors
    if word_vectors:
        return np.mean(word_vectors, axis=0)
    else:
        return np.zeros(glove_model.vector_size)  # Return a zero vector if no tokens were found

In [30]:
def compute_embeddings(example):
    question_emb = get_embedding(example["question"])
    choice_embs = [get_embedding(choice) for choice in example["choices"]["text"]]
    
    # Save embeddings as lists so they can be stored in the dataset
    example["question_emb"] = question_emb.tolist()
    example["choice_embs"] = [emb.tolist() for emb in choice_embs]
    return example

train = train.map(compute_embeddings)
valid = valid.map(compute_embeddings)

Map: 100%|██████████| 8741/8741 [00:08<00:00, 973.40 examples/s] 
Map: 100%|██████████| 1000/1000 [00:01<00:00, 962.26 examples/s]


In [31]:
def answer_key_to_index(answer_key):
  """Answer key (A-E) to index (0-4)"""
  return ord(answer_key) - ord("A")

In [32]:
class CommonsenseQADataset(Dataset):
    def __init__(self, dataset):
        self.data = dataset

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        example = self.data[idx]
        question_tensor = torch.tensor(example["question_emb"]).float()
        choices_tensor = torch.tensor(example["choice_embs"]).float()
        answer_index = answer_key_to_index(example["answerKey"])
        return question_tensor, choices_tensor, torch.tensor(answer_index).long()

In [33]:
train_dataset = CommonsenseQADataset(train)
valid_dataset = CommonsenseQADataset(valid)

In [34]:
batch_size = 32

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=0)
valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=True, num_workers=0)

In [35]:
for batch in train_loader:
    print(batch[0].shape, batch[1].shape, batch[2].shape, batch[2])
    break

torch.Size([32, 300]) torch.Size([32, 5, 300]) torch.Size([32]) tensor([3, 3, 1, 4, 3, 4, 4, 4, 0, 2, 2, 3, 1, 3, 1, 4, 3, 4, 4, 4, 0, 1, 4, 2,
        3, 4, 3, 0, 1, 3, 0, 2])


In [36]:
class WordEmbeddingQAClassifier(nn.Module):
    def __init__(self, embedding_dim, hidden_dim=128):
        super(WordEmbeddingQAClassifier, self).__init__()

        self.embedding_dim = embedding_dim
        self.hidden_dim = hidden_dim

        self.fc1 = nn.Linear(2 * embedding_dim, hidden_dim) # the input dimension is doubled due to concatenation (question + choice)
        self.relu = nn.ReLU() # non-linearity
        self.fc2 = nn.Linear(hidden_dim, 1) # single score per candidate

    def forward(self, question, choices):
        # question: (batch_size, embedding_dim)
        # choices: (batch_size, 5, embedding_dim)

        # expand question to match the choices dimension
        question_expanded = question.unsqueeze(1).expand(-1, choices.size(1), -1) # (batch_size, 5, embedding_dim)
        
        # concatenate question and choice embeddings
        combined = torch.cat((question_expanded, choices), dim=2) # (batch_size, 5, 2*embedding_dim)

        # pass through the classifier
        x = self.fc1(combined)  # (batch_size, 5, hidden_dim)
        x = self.relu(x)
        x = self.fc2(x)         # (batch_size, 5, 1)
        return x.squeeze(-1)    # (batch_size, 5)

In [37]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [38]:
model = WordEmbeddingQAClassifier(embedding_dim=300, hidden_dim=32)
model = model.to(device)

print(model)

WordEmbeddingQAClassifier(
  (fc1): Linear(in_features=600, out_features=32, bias=True)
  (relu): ReLU()
  (fc2): Linear(in_features=32, out_features=1, bias=True)
)


In [39]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=run1.config.learning_rate, weight_decay=1e-5)

In [40]:
num_epochs = run1.config.epochs

for epoch in (pbar := trange(num_epochs)):
    pbar.set_description(f"Epoch {epoch+1}/{num_epochs}")

    model.train()
    train_total_loss = 0.0
    train_correct = 0
    train_total = 0

    for question_batch, choices_batch, y_batch in train_loader:
        optimizer.zero_grad() # zero all the parameter gradients

        question_batch, choices_batch, y_batch = question_batch.to(device), choices_batch.to(device), y_batch.to(device)

        # Forward pass
        outputs = model(question_batch, choices_batch) # (batch_size, 5)

        # Compute loss
        train_batch_loss = criterion(outputs, y_batch)
        train_total_loss += train_batch_loss.item()

        # Compute accuracy
        train_predictions = torch.argmax(outputs, dim=1) # get most likely class
        train_correct += (train_predictions == y_batch).sum().item() # count correct predictions
        train_total += y_batch.size(0) # count total number of examples

        # Backward pass
        train_batch_loss.backward()
        optimizer.step()

    # Calculate train statistics
    avg_train_loss = train_total_loss / len(train_loader) # average loss per batch
    train_accuracy = train_correct / train_total # overall accuracy

    # Evaluate
    model.eval()
    val_correct = 0
    val_total  = 0
    val_total_loss  = 0

    with torch.no_grad():
        for question_batch, choices_batch, y_batch in valid_loader:
            question_batch, choices_batch, y_batch = question_batch.to(device), choices_batch.to(device), y_batch.to(device)

            val_outputs = model(question_batch, choices_batch) # (batch_size, 5)

            # Calculate validation loss
            val_batch_loss = criterion(val_outputs, y_batch)
            val_total_loss += val_batch_loss.item()

            val_predictions = torch.argmax(val_outputs, dim=1) # get most likely class
            val_correct += (val_predictions == y_batch).sum().item() # count correct predictions
            val_total += y_batch.size(0) # count total number of examples

    # Calculate validation statistics
    avg_val_loss = val_total_loss / len(valid_loader) # average loss per batch
    val_accuracy = val_correct / val_total # overall accuracy
    
    pbar.set_postfix({"train_loss": avg_train_loss, "train_acc": train_accuracy, "val_acc": val_accuracy})

    # Log metrics to wandb
    wandb.log({
        "epoch": epoch,
        "train_loss": avg_train_loss,
        "train_accuracy": train_accuracy,
        "val_loss": avg_val_loss,
        "val_accuracy": val_accuracy,
        "learning_rate": optimizer.param_groups[0]['lr'], # current learning rate
    })

wandb.finish()

Epoch 100/100: 100%|██████████| 100/100 [34:04<00:00, 20.44s/it, train_loss=0.408, train_acc=0.87, val_acc=0.251]


0,1
epoch,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇█
learning_rate,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
train_accuracy,▁▂▃▃▃▄▄▄▄▄▅▅▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇█████████
train_loss,████▇▇▆▆▆▅▅▅▅▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁
val_accuracy,█▆▇▄▄▆▄▄▃▂▆▆▃▄▃▄▄▆▄▅▅▆▄▄▄▃▃▂▂▃▄▃▃▃▂▂▂▁▃▁
val_loss,▁▁▁▁▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇███

0,1
epoch,99.0
learning_rate,0.001
train_accuracy,0.8705
train_loss,0.40829
val_accuracy,0.251
val_loss,3.94493


# Model

# Training

# Evaluation

# Interpretation