# **Introduction**

Wandb workspace URL: https://wandb.ai/fabian-dubach-hochschule-luzern/experiment-tracking/runs/gtg68kwg/workspace?nw=nwuserfabiandubach

# **Setup**


### WandDB

In [1]:
import wandb
from lightning.pytorch.loggers import WandbLogger

wandb.login(key="")
wandb_logger = WandbLogger(project="experiment-tracking")

  from .autonotebook import tqdm as notebook_tqdm
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


In [2]:

def init_wandb_run(project_name, run_name, config_dict):
    run = wandb.init(
        project=project_name,
        name=run_name,
        config=config_dict,
    )
    return run


#### Example Run
experiment_1_config = {
    "learning_rate": 0.001,
    "epochs": 10,
    "batch_size": 32,
    "optimizer": "adam"
}
run1 = init_wandb_run("experiment-tracking", "experiment_1", experiment_1_config)


[34m[1mwandb[0m: Currently logged in as: [33mfabian-dubach[0m ([33mfabian-dubach-hochschule-luzern[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


# **Preprocessing**

### Import all libraries needed

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from transformers import AutoTokenizer
from datasets import load_dataset
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


### Load the dataset from HuggingFace

In [2]:
dataset = load_dataset("tau/commonsense_qa")

### Load Word2Vec

In [3]:
import gensim.downloader as api
wv = api.load('word2vec-google-news-300')



### Load BERT tokenizer

In [None]:
# Tokenization because w2v is trained for tokenized words
tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')

In [None]:
def get_embedding(text):
    text = text.lower()
    tokens = tokenizer.tokenize(text)
    vectors = [wv[word] if word in wv else np.zeros(100) for word in tokens]
    return np.mean(vectors, axis=0)

In [6]:
class CommonsenseQADataset(Dataset):
    def __init__(self, dataset, split="train"):
        self.data = dataset[split]

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        question = self.data[idx]["question"]
        choices = self.data[idx]["choices"]["text"]
        label = self.data[idx]["answerKey"]
        
        embeddings = [get_embedding(question + " " + choice) for choice in choices]
        embeddings = torch.tensor(embeddings, dtype=torch.float)
        
        label_idx = ord(label) - ord('A')
        return embeddings, label_idx

In [7]:
train_dataset = CommonsenseQADataset(dataset, "train")
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

In [8]:
class LSTM(nn.Module):
    
    def __init__(self, input_dim, hidden_dim):
        super().__init__() 
        self.w_f = nn.Parameter(torch.empty(hidden_dim, hidden_dim))
        self.u_f = nn.Parameter(torch.empty(hidden_dim, input_dim))
        self.w_i = nn.Parameter(torch.empty(hidden_dim, hidden_dim))
        self.u_i = nn.Parameter(torch.empty(hidden_dim, input_dim))
        self.w_o = nn.Parameter(torch.empty(hidden_dim, hidden_dim))
        self.u_o = nn.Parameter(torch.empty(hidden_dim, input_dim))
        self.w_c = nn.Parameter(torch.empty(hidden_dim, hidden_dim))
        self.u_c = nn.Parameter(torch.empty(hidden_dim, input_dim))
        
        self.b_f = nn.Parameter(torch.empty(hidden_dim))
        self.b_i = nn.Parameter(torch.empty(hidden_dim))
        self.b_o = nn.Parameter(torch.empty(hidden_dim))
        self.b_c = nn.Parameter(torch.empty(hidden_dim))
    
    
    def reset_parameters(self):
        for weight in self.parameters():
            nn.init.uniform_(weight, -1, 1)
    
            
    def forward(self, x, h_t, c_t):
        f_t = torch.sigmoid(self.w_f @ h_t + self.u_f @ x + self.b_f)
        i_t = torch.sigmoid(self.w_i @ h_t + self.u_i @ x + self.b_i)
        o_t = torch.sigmoid(self.w_o @ h_t + self.u_o @ x + self.b_o)
        c_memory = torch.tanh(self.w_c @ h_t + self.u_c @ x + self.b_c)
        
        c_t = f_t * c_t + i_t * c_memory
        h_t = o_t * torch.tanh(c_t)
        
        return h_t, c_t

In [11]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = LSTM(10, 20).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [12]:
for epoch in range(10):
    for embeddings, labels in train_loader:
        embeddings, labels = embeddings.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(embeddings)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    print(f"Epoch {epoch+1}, Loss: {loss.item()}")

ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (21,) + inhomogeneous part.

# Model

# Training

# Evaluation

# Interpretation