In [None]:
import torch
import pandas as pd
import numpy as np
import datasets
from load_models_and_data import load_vocabulary, load_embeddings, text_to_embeddings, calc_cosine_sim, calculate_similarities
from tqdm import tqdm
tqdm.pandas()
from TwoTowerNN import QryTower, DocTower, TripletEmbeddingDataset, run_hyperparameter_tuning
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torch.utils.data import Dataset, DataLoader,  SubsetRandomSampler
from sklearn.model_selection import KFold, train_test_split
import os
import wandb


  from .autonotebook import tqdm as notebook_tqdm
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


API key loaded successfully


[34m[1mwandb[0m: Currently logged in as: [33mnnamdi-odozi[0m ([33mnnamdi-odozi-ave-actuaries[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
from datasets import load_dataset

# Loading datasets from Hugging Face
ds_soft_neg = load_dataset("cocoritzy/week_2_triplet_dataset_soft_negatives")
ds_hard_neg = load_dataset("cocoritzy/week_2_triplet_dataset_hard_negatives")


In [4]:
# Paths to your files
embeddings_path = "./downloaded_model/embeddings.pt"
vocab_path = "./downloaded_model/tkn_ids_to_words.csv"

# Load embeddings and vocabulary
print("Loading embeddings and vocabulary...")
embeddings = load_embeddings(embeddings_path)
word_to_idx = load_vocabulary(vocab_path)

print(f"Loaded embeddings with shape: {embeddings.shape}")
print(f"Loaded vocabulary with {len(word_to_idx)} tokens")

# Example usage (uncomment when ready to test)
sample_text = "This is a test sentence"
embeddings_result = text_to_embeddings(sample_text, word_to_idx, embeddings)
print(f"Embedded text shape: {embeddings_result.shape}")

# Testing - Set numpy print options
np.set_printoptions(precision=4, suppress=True, threshold=10)  # threshold limits number of elements shown
numpy_array = embeddings_result.detach().numpy()
print("Embedding array with custom formatting:")
print(numpy_array)


Loading embeddings and vocabulary...
Loaded embeddings with shape: torch.Size([63642, 128])
Loaded vocabulary with 63641 tokens
Embedded text shape: torch.Size([5, 128])
Embedding array with custom formatting:
[[ 0.1381  0.5469 -1.076  ... -0.3798 -0.7187  0.2953]
 [ 0.1925 -0.0985 -0.1367 ...  0.7328  0.5067  0.7939]
 [ 0.2072  0.043  -0.6497 ... -0.0641 -0.6588 -0.1389]
 [ 0.418  -0.645  -0.5003 ... -0.159  -0.2203 -0.2697]
 [-0.4971  0.4175 -0.0469 ... -0.1927  2.253  -0.1716]]


In [5]:
ds_soft_neg

DatasetDict({
    train: Dataset({
        features: ['query_id', 'query', 'positive_passage', 'negative_passage', 'negative_from_query_id'],
        num_rows: 79704
    })
})

In [6]:
df_soft_neg  = pd.DataFrame(ds_soft_neg['train'])
df_hard_neg  = pd.DataFrame(ds_hard_neg['train'])

In [7]:
embedded_query = text_to_embeddings(df_soft_neg['query'][0], word_to_idx, embeddings)
embedded_positive = text_to_embeddings(df_soft_neg['positive_passage'][0], word_to_idx, embeddings)
embedded_negative = text_to_embeddings(df_soft_neg['negative_passage'][0], word_to_idx, embeddings)

embedded_query.shape

torch.Size([3, 128])

In [8]:
a = embedded_query.mean(dim=0)
b = embedded_positive.mean(dim=0)
c = embedded_negative.mean(dim=0)
a.shape


torch.Size([128])

In [9]:
import torch.nn.functional as F

cosine_similarity = F.cosine_similarity(a, c, dim=0)
print(f"Cosine similarity between query and positive passage: {cosine_similarity.item()}")

Cosine similarity between query and positive passage: 0.7518182992935181


In [11]:

# Process the dataframe using apply just for first five rows
print("Calculating similarities... This may take a while depending on dataframe size.")
similarities = df_soft_neg[0:5].progress_apply(
    lambda row: calculate_similarities(row, word_to_idx, embeddings), 
    axis=1
)

# Join the similarities to the dataframe
df_soft_neg_ext = pd.concat([df_soft_neg[0:5], similarities], axis=1)

# Show a sample of the results
#print(df_soft_neg_ext[['query_pos_sim', 'query_neg_sim', 'pos_neg_sim']].head())


Calculating similarities... This may take a while depending on dataframe size.


100%|██████████| 5/5 [00:00<00:00, 554.16it/s]


In [12]:
print(df_soft_neg_ext.head())
print(df_soft_neg_ext.columns)

   query_id                                              query  \
0     19699                                        what is rba   
1     19700                       was ronald reagan a democrat   
2     19701  how long do you need for sydney and surroundin...   
3     19702                    price to install tile in shower   
4     19703                    why conversion observed in body   

                                    positive_passage  \
0  Results-Based Accountability® (also known as R...   
1  From Wikipedia, the free encyclopedia. A Reaga...   
2  Sydney is the capital city of the Australian s...   
3  1 Install ceramic tile floor to match shower-A...   
4  Conversion disorder is a type of somatoform di...   

                                    negative_passage  negative_from_query_id  \
0  I finally found some real salary data for phys...                   86595   
1  The Pacific Ocean lies to the east while the S...                   66360   
2  Probiotics are found in

In [13]:

# Process the dataframe using apply
print("Calculating similarities... This may take a while depending on dataframe size.")
similarities = df_soft_neg.progress_apply(
    lambda row: calculate_similarities(row, word_to_idx, embeddings), 
    axis=1
)

# Join the similarities to the dataframe
df_soft_neg_ext = pd.concat([df_soft_neg, similarities], axis=1)
print(df_soft_neg_ext.head())
# Show a sample of the results
#print(df_soft_neg_ext[['query_pos_sim', 'query_neg_sim', 'pos_neg_sim']].head())

#print(df_soft_neg_ext[['query_pos_sim', 'query_neg_sim', 'pos_neg_sim']].mean())

# Calculate how often the positive passage is ranked higher than negative
#higher_count = (df_soft_neg_ext['query_pos_sim'] > df_soft_neg_ext['query_neg_sim']).sum()
#total = len(df_soft_neg_ext)
#print(f"\nPositive passage ranked higher than negative: {higher_count} out of {total} ({higher_count/total:.2%})")



Calculating similarities... This may take a while depending on dataframe size.


100%|██████████| 79704/79704 [02:29<00:00, 534.52it/s]

   query_id                                              query  \
0     19699                                        what is rba   
1     19700                       was ronald reagan a democrat   
2     19701  how long do you need for sydney and surroundin...   
3     19702                    price to install tile in shower   
4     19703                    why conversion observed in body   

                                    positive_passage  \
0  Results-Based Accountability® (also known as R...   
1  From Wikipedia, the free encyclopedia. A Reaga...   
2  Sydney is the capital city of the Australian s...   
3  1 Install ceramic tile floor to match shower-A...   
4  Conversion disorder is a type of somatoform di...   

                                    negative_passage  negative_from_query_id  \
0  I finally found some real salary data for phys...                   86595   
1  The Pacific Ocean lies to the east while the S...                   66360   
2  Probiotics are found in




In [14]:
# Process the dataframe using apply
print("Calculating similarities... This may take a while depending on dataframe size.")
similarities = df_hard_neg.progress_apply(
    lambda row: calculate_similarities(row, word_to_idx, embeddings), 
    axis=1
)

# Join the similarities to the dataframe
df_hard_neg_ext = pd.concat([df_hard_neg, similarities], axis=1)
print(df_hard_neg_ext.head())
# Show a sample of the results
#print(df_hard_neg_ext[['query_pos_sim', 'query_neg_sim', 'pos_neg_sim']].head())

#print(df_hard_neg_ext[['query_pos_sim', 'query_neg_sim', 'pos_neg_sim']].mean())

# Calculate how often the positive passage is ranked higher than negative
#higher_count = (df_hard_neg_ext['query_pos_sim'] > df_hard_neg_ext['query_neg_sim']).sum()
#total = len(df_hard_neg_ext)
#print(f"\nPositive passage ranked higher than negative: {higher_count} out of {total} ({higher_count/total:.2%})")



Calculating similarities... This may take a while depending on dataframe size.


100%|██████████| 79700/79700 [02:35<00:00, 513.21it/s]

   query_id                                              query  \
0     19699                                        what is rba   
1     19700                       was ronald reagan a democrat   
2     19701  how long do you need for sydney and surroundin...   
3     19702                    price to install tile in shower   
4     19703                    why conversion observed in body   

                                    positive_passage  \
0  Results-Based Accountability® (also known as R...   
1  From Wikipedia, the free encyclopedia. A Reaga...   
2  Sydney is the capital city of the Australian s...   
3  1 Install ceramic tile floor to match shower-A...   
4  Conversion disorder is a type of somatoform di...   

                                    negative_passage  negative_index_in_group  \
0  vs. NetIQ Identity Manager. Risk-based authent...                        8   
1  1984 Re-Election. In November 1984, Ronald Rea...                        7   
2  The Sydney central b




### Twin Tower Network

In [None]:
# Create tower instances
qryTower = QryTower()
docTower = DocTower()


# Define hyperparameters
batch_size = 128
num_epochs = 1 # adjust num of epochs here
dataset_size = len(df_soft_neg_ext)  # or len(df_hard_neg_ext) depending on the dataset you want to use
steps_per_epoch = dataset_size // batch_size
total_steps = steps_per_epoch * num_epochs
learning_rate = 1e-3
embedding_dim = 128 
margin = 0.2 

In [None]:
# Create the dataset
dataset = TripletEmbeddingDataset(df_soft_neg_ext)

In [None]:
dataloader = DataLoader(
    dataset,
    batch_size=batch_size,
    shuffle=True,
    #num_workers=2,  # Adjust based on your machine's capabilities
    pin_memory=True  # Set to True if using GPU
)

In [None]:
qry = torch.randn(batch_size, embedding_dim)  # Query embeddings
pos = torch.randn(batch_size, embedding_dim)  # Positive doc embeddings
neg = torch.randn(batch_size, embedding_dim)  # Negative doc embeddings

#qry = df1['q']


# Set up the AdamW optimizer
optimizer = torch.optim.AdamW([
    {'params': qryTower.parameters()},
    {'params': docTower.parameters()}
], lr=learning_rate)

# Add learning rate scheduler (ReduceLROnPlateau)
scheduler = ReduceLROnPlateau(
    optimizer,
    mode='min',       # Reduce LR when monitored value stops decreasing
    factor=0.5,       # Multiply LR by this factor when reducing
    patience=2,       # Number of epochs with no improvement after which LR will be reduced
    verbose=True      # Print message when LR is reduced
)



In [None]:
# Training loop (simplified example)
for epoch in range(num_epochs):
    qryTower.train()
    docTower.train()
    
    
    total_loss = 0
    for batch in dataloader:
        # Get embeddings from batch
        query_emb = batch['query']
        pos_emb = batch['positive']
        neg_emb = batch['negative']
        
        # Forward pass through towers
        query_encoded = qryTower(query_emb)
        pos_encoded = docTower(pos_emb)
        neg_encoded = docTower(neg_emb)
        
        # Calculate similarities
        pos_sim = torch.nn.functional.cosine_similarity(query_encoded, pos_encoded)
        neg_sim = torch.nn.functional.cosine_similarity(query_encoded, neg_encoded)
        
        # Triplet loss
        margin = margin
        loss = torch.clamp(margin - pos_sim + neg_sim, min=0).mean()
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item() * len(query_emb)
    
    # Calculate average loss
    avg_loss = total_loss / len(dataset)
    
    # Update scheduler
    scheduler.step(avg_loss)
    
    # Print epoch results
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {avg_loss:.4f}, "
          f"LR: {optimizer.param_groups[0]['lr']:.6f}")

In [19]:
# Run the hyperparameter tuning with your dataframe
best_params, final_qry_tower, final_doc_tower = run_hyperparameter_tuning(
    df_soft_neg_ext,
    output_dims=[128],
    batch_sizes=[256, 512],
    n_folds=5,
    epochs=5
)

# Print the best parameters found
print(f"Best output dimension: {best_params['output_dim']}")
print(f"Best batch size: {best_params['batch_size']}")
print(f"Best validation loss: {best_params['avg_cv_loss']:.4f}")



--------------------------------------------------
Training with output_dim=128, batch_size=256
--------------------------------------------------

Fold 1/5


Epoch 1/5 (Train):   0%|          | 0/200 [00:00<?, ?it/s]

Epoch 1/5 (Train): 100%|██████████| 200/200 [00:10<00:00, 19.67it/s]
Epoch 1/5 (Val): 100%|██████████| 50/50 [00:02<00:00, 19.30it/s]


Epoch 1/5, Train Loss: 0.0486, Val Loss: 0.0065, LR: 0.001000
New best model saved with validation loss: 0.0065


Epoch 2/5 (Train): 100%|██████████| 200/200 [00:09<00:00, 20.57it/s]
Epoch 2/5 (Val): 100%|██████████| 50/50 [00:02<00:00, 21.27it/s]


Epoch 2/5, Train Loss: 0.0222, Val Loss: 0.0061, LR: 0.001000
New best model saved with validation loss: 0.0061


Epoch 3/5 (Train): 100%|██████████| 200/200 [00:10<00:00, 19.60it/s]
Epoch 3/5 (Val): 100%|██████████| 50/50 [00:02<00:00, 19.34it/s]


Epoch 3/5, Train Loss: 0.0178, Val Loss: 0.0056, LR: 0.001000
New best model saved with validation loss: 0.0056


Epoch 4/5 (Train): 100%|██████████| 200/200 [00:09<00:00, 20.93it/s]
Epoch 4/5 (Val): 100%|██████████| 50/50 [00:02<00:00, 20.65it/s]


Epoch 4/5, Train Loss: 0.0151, Val Loss: 0.0052, LR: 0.001000
New best model saved with validation loss: 0.0052


Epoch 5/5 (Train): 100%|██████████| 200/200 [00:09<00:00, 20.79it/s]
Epoch 5/5 (Val): 100%|██████████| 50/50 [00:02<00:00, 20.96it/s]


Epoch 5/5, Train Loss: 0.0134, Val Loss: 0.0050, LR: 0.001000
New best model saved with validation loss: 0.0050

Fold 2/5


Epoch 1/5 (Train): 100%|██████████| 200/200 [00:09<00:00, 20.36it/s]
Epoch 1/5 (Val): 100%|██████████| 50/50 [00:02<00:00, 21.43it/s]


Epoch 1/5, Train Loss: 0.0494, Val Loss: 0.0070, LR: 0.001000
New best model saved with validation loss: 0.0070


Epoch 2/5 (Train): 100%|██████████| 200/200 [00:09<00:00, 21.65it/s]
Epoch 2/5 (Val): 100%|██████████| 50/50 [00:02<00:00, 20.80it/s]


Epoch 2/5, Train Loss: 0.0229, Val Loss: 0.0064, LR: 0.001000
New best model saved with validation loss: 0.0064


Epoch 3/5 (Train): 100%|██████████| 200/200 [00:10<00:00, 19.12it/s]
Epoch 3/5 (Val): 100%|██████████| 50/50 [00:02<00:00, 20.41it/s]


Epoch 3/5, Train Loss: 0.0188, Val Loss: 0.0057, LR: 0.001000
New best model saved with validation loss: 0.0057


Epoch 4/5 (Train): 100%|██████████| 200/200 [00:09<00:00, 20.64it/s]
Epoch 4/5 (Val): 100%|██████████| 50/50 [00:02<00:00, 20.14it/s]


Epoch 4/5, Train Loss: 0.0162, Val Loss: 0.0051, LR: 0.001000
New best model saved with validation loss: 0.0051


Epoch 5/5 (Train): 100%|██████████| 200/200 [00:09<00:00, 20.63it/s]
Epoch 5/5 (Val): 100%|██████████| 50/50 [00:02<00:00, 22.09it/s]


Epoch 5/5, Train Loss: 0.0140, Val Loss: 0.0050, LR: 0.001000
New best model saved with validation loss: 0.0050

Fold 3/5


Epoch 1/5 (Train): 100%|██████████| 200/200 [00:09<00:00, 20.25it/s]
Epoch 1/5 (Val): 100%|██████████| 50/50 [00:02<00:00, 21.51it/s]


Epoch 1/5, Train Loss: 0.0485, Val Loss: 0.0063, LR: 0.001000
New best model saved with validation loss: 0.0063


Epoch 2/5 (Train): 100%|██████████| 200/200 [00:10<00:00, 19.92it/s]
Epoch 2/5 (Val): 100%|██████████| 50/50 [00:02<00:00, 19.06it/s]


Epoch 2/5, Train Loss: 0.0222, Val Loss: 0.0055, LR: 0.001000
New best model saved with validation loss: 0.0055


Epoch 3/5 (Train): 100%|██████████| 200/200 [00:09<00:00, 20.08it/s]
Epoch 3/5 (Val): 100%|██████████| 50/50 [00:02<00:00, 20.88it/s]


Epoch 3/5, Train Loss: 0.0178, Val Loss: 0.0053, LR: 0.001000
New best model saved with validation loss: 0.0053


Epoch 4/5 (Train): 100%|██████████| 200/200 [00:10<00:00, 19.26it/s]
Epoch 4/5 (Val): 100%|██████████| 50/50 [00:02<00:00, 22.45it/s]


Epoch 4/5, Train Loss: 0.0151, Val Loss: 0.0051, LR: 0.001000
New best model saved with validation loss: 0.0051


Epoch 5/5 (Train): 100%|██████████| 200/200 [00:10<00:00, 19.41it/s]
Epoch 5/5 (Val): 100%|██████████| 50/50 [00:02<00:00, 21.44it/s]


Epoch 5/5, Train Loss: 0.0132, Val Loss: 0.0049, LR: 0.001000
New best model saved with validation loss: 0.0049

Fold 4/5


Epoch 1/5 (Train): 100%|██████████| 200/200 [00:09<00:00, 20.27it/s]
Epoch 1/5 (Val): 100%|██████████| 50/50 [00:02<00:00, 22.23it/s]


Epoch 1/5, Train Loss: 0.0491, Val Loss: 0.0065, LR: 0.001000
New best model saved with validation loss: 0.0065


Epoch 2/5 (Train): 100%|██████████| 200/200 [00:09<00:00, 20.22it/s]
Epoch 2/5 (Val): 100%|██████████| 50/50 [00:02<00:00, 21.47it/s]


Epoch 2/5, Train Loss: 0.0226, Val Loss: 0.0058, LR: 0.001000
New best model saved with validation loss: 0.0058


Epoch 3/5 (Train): 100%|██████████| 200/200 [00:10<00:00, 19.20it/s]
Epoch 3/5 (Val): 100%|██████████| 50/50 [00:02<00:00, 21.00it/s]


Epoch 3/5, Train Loss: 0.0180, Val Loss: 0.0054, LR: 0.001000
New best model saved with validation loss: 0.0054


Epoch 4/5 (Train): 100%|██████████| 200/200 [00:09<00:00, 20.99it/s]
Epoch 4/5 (Val): 100%|██████████| 50/50 [00:02<00:00, 22.94it/s]


Epoch 4/5, Train Loss: 0.0154, Val Loss: 0.0051, LR: 0.001000
New best model saved with validation loss: 0.0051


Epoch 5/5 (Train): 100%|██████████| 200/200 [00:09<00:00, 20.11it/s]
Epoch 5/5 (Val): 100%|██████████| 50/50 [00:02<00:00, 19.32it/s]


Epoch 5/5, Train Loss: 0.0134, Val Loss: 0.0045, LR: 0.001000
New best model saved with validation loss: 0.0045

Fold 5/5


Epoch 1/5 (Train): 100%|██████████| 200/200 [00:09<00:00, 20.03it/s]
Epoch 1/5 (Val): 100%|██████████| 50/50 [00:02<00:00, 20.94it/s]


Epoch 1/5, Train Loss: 0.0495, Val Loss: 0.0066, LR: 0.001000
New best model saved with validation loss: 0.0066


Epoch 2/5 (Train): 100%|██████████| 200/200 [00:10<00:00, 19.54it/s]
Epoch 2/5 (Val): 100%|██████████| 50/50 [00:02<00:00, 19.61it/s]


Epoch 2/5, Train Loss: 0.0236, Val Loss: 0.0057, LR: 0.001000
New best model saved with validation loss: 0.0057


Epoch 3/5 (Train): 100%|██████████| 200/200 [00:11<00:00, 17.77it/s]
Epoch 3/5 (Val): 100%|██████████| 50/50 [00:02<00:00, 22.61it/s]


Epoch 3/5, Train Loss: 0.0180, Val Loss: 0.0051, LR: 0.001000
New best model saved with validation loss: 0.0051


Epoch 4/5 (Train): 100%|██████████| 200/200 [00:11<00:00, 17.29it/s]
Epoch 4/5 (Val): 100%|██████████| 50/50 [00:02<00:00, 20.07it/s]


Epoch 4/5, Train Loss: 0.0155, Val Loss: 0.0050, LR: 0.001000
New best model saved with validation loss: 0.0050


Epoch 5/5 (Train): 100%|██████████| 200/200 [00:10<00:00, 18.89it/s]
Epoch 5/5 (Val): 100%|██████████| 50/50 [00:02<00:00, 22.54it/s]


Epoch 5/5, Train Loss: 0.0136, Val Loss: 0.0046, LR: 0.001000
New best model saved with validation loss: 0.0046

Average CV loss for output_dim=128, batch_size=256: 0.0048


--------------------------------------------------
Training with output_dim=128, batch_size=512
--------------------------------------------------

Fold 1/5


Epoch 1/5 (Train): 100%|██████████| 100/100 [00:07<00:00, 12.62it/s]
Epoch 1/5 (Val): 100%|██████████| 25/25 [00:01<00:00, 15.28it/s]


Epoch 1/5, Train Loss: 0.0600, Val Loss: 0.0070, LR: 0.001000
New best model saved with validation loss: 0.0070


Epoch 2/5 (Train): 100%|██████████| 100/100 [00:06<00:00, 14.60it/s]
Epoch 2/5 (Val): 100%|██████████| 25/25 [00:01<00:00, 16.38it/s]


Epoch 2/5, Train Loss: 0.0239, Val Loss: 0.0059, LR: 0.001000
New best model saved with validation loss: 0.0059


Epoch 3/5 (Train): 100%|██████████| 100/100 [00:07<00:00, 13.71it/s]
Epoch 3/5 (Val): 100%|██████████| 25/25 [00:01<00:00, 15.26it/s]


Epoch 3/5, Train Loss: 0.0190, Val Loss: 0.0053, LR: 0.001000
New best model saved with validation loss: 0.0053


Epoch 4/5 (Train): 100%|██████████| 100/100 [00:06<00:00, 14.73it/s]
Epoch 4/5 (Val): 100%|██████████| 25/25 [00:01<00:00, 16.83it/s]


Epoch 4/5, Train Loss: 0.0159, Val Loss: 0.0051, LR: 0.001000
New best model saved with validation loss: 0.0051


Epoch 5/5 (Train): 100%|██████████| 100/100 [00:07<00:00, 13.32it/s]
Epoch 5/5 (Val): 100%|██████████| 25/25 [00:01<00:00, 16.27it/s]


Epoch 5/5, Train Loss: 0.0142, Val Loss: 0.0051, LR: 0.001000
New best model saved with validation loss: 0.0051

Fold 2/5


Epoch 1/5 (Train): 100%|██████████| 100/100 [00:07<00:00, 13.56it/s]
Epoch 1/5 (Val): 100%|██████████| 25/25 [00:01<00:00, 14.87it/s]


Epoch 1/5, Train Loss: 0.0616, Val Loss: 0.0071, LR: 0.001000
New best model saved with validation loss: 0.0071


Epoch 2/5 (Train): 100%|██████████| 100/100 [00:06<00:00, 14.86it/s]
Epoch 2/5 (Val): 100%|██████████| 25/25 [00:01<00:00, 15.81it/s]


Epoch 2/5, Train Loss: 0.0243, Val Loss: 0.0058, LR: 0.001000
New best model saved with validation loss: 0.0058


Epoch 3/5 (Train): 100%|██████████| 100/100 [00:07<00:00, 12.66it/s]
Epoch 3/5 (Val): 100%|██████████| 25/25 [00:01<00:00, 16.28it/s]


Epoch 3/5, Train Loss: 0.0187, Val Loss: 0.0051, LR: 0.001000
New best model saved with validation loss: 0.0051


Epoch 4/5 (Train): 100%|██████████| 100/100 [00:07<00:00, 13.23it/s]
Epoch 4/5 (Val): 100%|██████████| 25/25 [00:01<00:00, 14.55it/s]


Epoch 4/5, Train Loss: 0.0161, Val Loss: 0.0050, LR: 0.001000
New best model saved with validation loss: 0.0050


Epoch 5/5 (Train): 100%|██████████| 100/100 [00:07<00:00, 13.19it/s]
Epoch 5/5 (Val): 100%|██████████| 25/25 [00:01<00:00, 13.39it/s]


Epoch 5/5, Train Loss: 0.0140, Val Loss: 0.0048, LR: 0.001000
New best model saved with validation loss: 0.0048

Fold 3/5


Epoch 1/5 (Train): 100%|██████████| 100/100 [00:07<00:00, 14.16it/s]
Epoch 1/5 (Val): 100%|██████████| 25/25 [00:01<00:00, 16.43it/s]


Epoch 1/5, Train Loss: 0.0605, Val Loss: 0.0074, LR: 0.001000
New best model saved with validation loss: 0.0074


Epoch 2/5 (Train): 100%|██████████| 100/100 [00:08<00:00, 12.38it/s]
Epoch 2/5 (Val): 100%|██████████| 25/25 [00:02<00:00, 10.88it/s]


Epoch 2/5, Train Loss: 0.0244, Val Loss: 0.0058, LR: 0.001000
New best model saved with validation loss: 0.0058


Epoch 3/5 (Train): 100%|██████████| 100/100 [00:07<00:00, 13.76it/s]
Epoch 3/5 (Val): 100%|██████████| 25/25 [00:01<00:00, 16.20it/s]


Epoch 3/5, Train Loss: 0.0187, Val Loss: 0.0054, LR: 0.001000
New best model saved with validation loss: 0.0054


Epoch 4/5 (Train): 100%|██████████| 100/100 [00:07<00:00, 13.74it/s]
Epoch 4/5 (Val): 100%|██████████| 25/25 [00:01<00:00, 14.29it/s]


Epoch 4/5, Train Loss: 0.0160, Val Loss: 0.0050, LR: 0.001000
New best model saved with validation loss: 0.0050


Epoch 5/5 (Train): 100%|██████████| 100/100 [00:06<00:00, 14.59it/s]
Epoch 5/5 (Val): 100%|██████████| 25/25 [00:01<00:00, 16.37it/s]


Epoch 5/5, Train Loss: 0.0139, Val Loss: 0.0052, LR: 0.001000

Fold 4/5


Epoch 1/5 (Train): 100%|██████████| 100/100 [00:07<00:00, 14.06it/s]
Epoch 1/5 (Val): 100%|██████████| 25/25 [00:01<00:00, 15.94it/s]


Epoch 1/5, Train Loss: 0.0636, Val Loss: 0.0076, LR: 0.001000
New best model saved with validation loss: 0.0076


Epoch 2/5 (Train): 100%|██████████| 100/100 [00:08<00:00, 12.46it/s]
Epoch 2/5 (Val): 100%|██████████| 25/25 [00:01<00:00, 16.83it/s]


Epoch 2/5, Train Loss: 0.0251, Val Loss: 0.0058, LR: 0.001000
New best model saved with validation loss: 0.0058


Epoch 3/5 (Train): 100%|██████████| 100/100 [00:06<00:00, 14.41it/s]
Epoch 3/5 (Val): 100%|██████████| 25/25 [00:01<00:00, 16.68it/s]


Epoch 3/5, Train Loss: 0.0189, Val Loss: 0.0053, LR: 0.001000
New best model saved with validation loss: 0.0053


Epoch 4/5 (Train): 100%|██████████| 100/100 [00:06<00:00, 14.71it/s]
Epoch 4/5 (Val): 100%|██████████| 25/25 [00:01<00:00, 15.96it/s]


Epoch 4/5, Train Loss: 0.0158, Val Loss: 0.0050, LR: 0.001000
New best model saved with validation loss: 0.0050


Epoch 5/5 (Train): 100%|██████████| 100/100 [00:06<00:00, 14.39it/s]
Epoch 5/5 (Val): 100%|██████████| 25/25 [00:01<00:00, 16.38it/s]


Epoch 5/5, Train Loss: 0.0139, Val Loss: 0.0052, LR: 0.001000

Fold 5/5


Epoch 1/5 (Train): 100%|██████████| 100/100 [00:06<00:00, 14.97it/s]
Epoch 1/5 (Val): 100%|██████████| 25/25 [00:01<00:00, 16.90it/s]


Epoch 1/5, Train Loss: 0.0609, Val Loss: 0.0077, LR: 0.001000
New best model saved with validation loss: 0.0077


Epoch 2/5 (Train): 100%|██████████| 100/100 [00:07<00:00, 13.22it/s]
Epoch 2/5 (Val): 100%|██████████| 25/25 [00:01<00:00, 15.78it/s]


Epoch 2/5, Train Loss: 0.0243, Val Loss: 0.0056, LR: 0.001000
New best model saved with validation loss: 0.0056


Epoch 3/5 (Train): 100%|██████████| 100/100 [00:07<00:00, 12.56it/s]
Epoch 3/5 (Val): 100%|██████████| 25/25 [00:01<00:00, 16.08it/s]


Epoch 3/5, Train Loss: 0.0191, Val Loss: 0.0053, LR: 0.001000
New best model saved with validation loss: 0.0053


Epoch 4/5 (Train): 100%|██████████| 100/100 [00:06<00:00, 14.38it/s]
Epoch 4/5 (Val): 100%|██████████| 25/25 [00:01<00:00, 15.51it/s]


Epoch 4/5, Train Loss: 0.0160, Val Loss: 0.0049, LR: 0.001000
New best model saved with validation loss: 0.0049


Epoch 5/5 (Train): 100%|██████████| 100/100 [00:07<00:00, 13.58it/s]
Epoch 5/5 (Val): 100%|██████████| 25/25 [00:01<00:00, 15.11it/s]


Epoch 5/5, Train Loss: 0.0137, Val Loss: 0.0050, LR: 0.001000

Average CV loss for output_dim=128, batch_size=512: 0.0050


Best hyperparameters:
Output dimension: 128
Batch size: 256
Average CV loss: 0.0048


Training final model with best hyperparameters...


Epoch 1/5 (Train): 100%|██████████| 250/250 [00:13<00:00, 18.83it/s]
Epoch 1/5 (Val): 100%|██████████| 63/63 [00:02<00:00, 21.38it/s]


Epoch 1/5, Train Loss: 0.0535, Val Loss: 0.0325, LR: 0.001000
New best model saved with validation loss: 0.0325


Epoch 2/5 (Train): 100%|██████████| 250/250 [00:13<00:00, 18.35it/s]
Epoch 2/5 (Val): 100%|██████████| 63/63 [00:02<00:00, 21.64it/s]


Epoch 2/5, Train Loss: 0.0262, Val Loss: 0.0286, LR: 0.001000
New best model saved with validation loss: 0.0286


Epoch 3/5 (Train): 100%|██████████| 250/250 [00:15<00:00, 16.39it/s]
Epoch 3/5 (Val): 100%|██████████| 63/63 [00:02<00:00, 24.50it/s]


Epoch 3/5, Train Loss: 0.0216, Val Loss: 0.0267, LR: 0.001000
New best model saved with validation loss: 0.0267


Epoch 4/5 (Train): 100%|██████████| 250/250 [00:13<00:00, 18.09it/s]
Epoch 4/5 (Val): 100%|██████████| 63/63 [00:02<00:00, 22.41it/s]


Epoch 4/5, Train Loss: 0.0184, Val Loss: 0.0268, LR: 0.001000


Epoch 5/5 (Train): 100%|██████████| 250/250 [00:13<00:00, 18.94it/s]
Epoch 5/5 (Val): 100%|██████████| 63/63 [00:02<00:00, 25.55it/s]

Epoch 5/5, Train Loss: 0.0167, Val Loss: 0.0261, LR: 0.001000
New best model saved with validation loss: 0.0261
Best output dimension: 128
Best batch size: 256
Best validation loss: 0.0048





In [20]:
# Code to upload final model to wandb
import wandb
import os
from dotenv import load_dotenv

# Load your API key from config.txt
def load_api_key_from_config(config_path="config.txt"):
    try:
        with open(config_path, "r") as f:
            first_line = f.readline().strip()
            if "=" in first_line:
                api_key = first_line.split("=")[1].strip()
            else:
                api_key = first_line
        return api_key
    except FileNotFoundError:
        print(f"Config file not found at {config_path}")
        return None

# Set up wandb
api_key = load_api_key_from_config()
if api_key:
    os.environ["WANDB_API_KEY"] = api_key
    wandb.login()
    print("Successfully logged in to Weights & Biases")
else:
    print("Failed to load API key")
    
# Initialize a new wandb run
run = wandb.init(
    project="twin-tower-model",
    name="final-model",
    config={
        "output_dim": best_params["output_dim"],
        "batch_size": best_params["batch_size"],
        "architecture": "Twin Tower Network",
        "dataset": "MS MARCO"
    }
)

# Upload the final model
final_model_path = "checkpoints/final_model/final_model.pt"
model_artifact = wandb.Artifact(
    name="twin-tower-final-model", 
    type="model",
    description="Twin Tower model trained on full training data with optimal hyperparameters"
)
model_artifact.add_file(final_model_path)
wandb.log_artifact(model_artifact)

wandb.finish()
print(f"Final model uploaded to Weights & Biases project: {run.project}")

Successfully logged in to Weights & Biases


Final model uploaded to Weights & Biases project: twin-tower-model
