In [1]:
import torch
import pandas as pd
import numpy as np
import datasets
from load_models_and_data import load_vocabulary, load_embeddings, text_to_embeddings, calc_cosine_sim, calculate_embeddings, create_packed_batch
from tqdm import tqdm
tqdm.pandas()
#from TwoTowerNN import QryTower, DocTower, TripletEmbeddingDataset, run_hyperparameter_tuning
from TwinTowerGRU import QryTower, DocTower, EmbeddingTripletDataset, run_hyperparameter_tuning, GRUTwinTowerModel
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torch.utils.data import Dataset, DataLoader,  SubsetRandomSampler
from sklearn.model_selection import KFold, train_test_split
import os
import wandb
from dotenv import load_dotenv
import torch.nn.functional as F

  from .autonotebook import tqdm as notebook_tqdm


API key loaded successfully


[34m[1mwandb[0m: Currently logged in as: [33mnnamdi-odozi[0m ([33mnnamdi-odozi-ave-actuaries[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
from datasets import load_dataset

# Loading datasets from Hugging Face
ds_soft_neg = load_dataset("cocoritzy/week_2_triplet_dataset_soft_negatives")
#ds_hard_neg = load_dataset("cocoritzy/week_2_triplet_dataset_hard_negatives")


In [3]:
# Paths to your files
embeddings_path = "./downloaded_model/glove_embeddings.pt" #set this to either own-trained cbow ones or to glove pre-trained ones
vocab_path = "./downloaded_model/glove_ids_to_words.csv"

# Load embeddings and vocabulary
print("Loading embeddings and vocabulary...")
embeddings = load_embeddings(embeddings_path)
word_to_idx = load_vocabulary(vocab_path)

print(f"Loaded embeddings with shape: {embeddings.shape}")
print(f"Loaded vocabulary with {len(word_to_idx)} tokens")

# Example usage (uncomment when ready to test)
sample_text = "This is a test sentence"
embeddings_result, length = text_to_embeddings(sample_text, word_to_idx, embeddings, is_query=True)
print(f"Embedded text shape: {embeddings_result.shape}")

# Testing - Set numpy print options
np.set_printoptions(precision=4, suppress=True, threshold=10)  # threshold limits number of elements shown
numpy_array = embeddings_result.detach().numpy()
print("Embedding array with custom formatting:")
print(numpy_array)
print("Length is:", length)


Loading embeddings and vocabulary...
Loaded embeddings with shape: torch.Size([400000, 100])
Loaded vocabulary with 399998 tokens
Embedded text shape: torch.Size([26, 100])
Embedding array with custom formatting:
[[ 0.2616  0.4472 -0.0968 ... -0.4503  0.4952 -0.203 ]
 [ 0.1372 -0.5429  0.1942 ... -0.5206  0.2543 -0.2376]
 [-0.3046 -0.2365  0.1758 ... -0.8456 -0.0354  0.1704]
 ...
 [ 0.      0.      0.     ...  0.      0.      0.    ]
 [ 0.      0.      0.     ...  0.      0.      0.    ]
 [ 0.      0.      0.     ...  0.      0.      0.    ]]
Length is: 5


In [None]:
sample_text = ""
embeddings_result, length = text_to_embeddings(sample_text, word_to_idx, embeddings, is_query=True)
print(f"Embedded text shape: {embeddings_result.shape}")

np.set_printoptions(precision=4, suppress=True, threshold=10)  # threshold limits number of elements shown
numpy_array = embeddings_result.detach().numpy()
print("Embedding array with custom formatting:")
print(numpy_array)
print("Length is:", length)


In [None]:
df_soft_neg  = pd.DataFrame(ds_soft_neg['train'])
#df_hard_neg  = pd.DataFrame(ds_hard_neg['train'])

In [None]:
embedded_query, length = text_to_embeddings(df_soft_neg['query'][0], word_to_idx, embeddings, is_query=True)
embedded_positive, length = text_to_embeddings(df_soft_neg['positive_passage'][0], word_to_idx, embeddings, is_query=False)
embedded_negative, length = text_to_embeddings(df_soft_neg['negative_passage'][0], word_to_idx, embeddings, is_query=False)

print(embedded_positive.shape)
print(embedded_negative.shape)

In [None]:
a = embedded_query.mean(dim=0)
b = embedded_positive.mean(dim=0)
c = embedded_negative.mean(dim=0)
a.shape


In [None]:
import torch.nn.functional as F

cosine_similarity = F.cosine_similarity(a, c, dim=0)
print(f"Cosine similarity between query and positive passage: {cosine_similarity.item()}")

In [None]:

# # Process the dataframe using apply just for first five rows
# print("Calculating similarities... This may take a while depending on dataframe size.")
# similarities = df_soft_neg[0:5].progress_apply(
#     lambda row: calculate_similarities(row, word_to_idx, embeddings), 
#     axis=1
# )

# # Join the similarities to the dataframe
# df_soft_neg_ext = pd.concat([df_soft_neg[0:5], similarities], axis=1)

# # Show a sample of the results
# #print(df_soft_neg_ext[['query_pos_sim', 'query_neg_sim', 'pos_neg_sim']].head())
#print(df_soft_neg_ext.head())
#print(df_soft_neg_ext.columns)

In [None]:

# Process the dataframe using apply
print("Calculating embeddings... This may take a while depending on dataframe size.")
embeddings_padded = df_soft_neg.progress_apply(
    lambda row: calculate_embeddings(row, word_to_idx, embeddings), 
    axis=1
)

# Join the similarities to the dataframe
df_soft_neg_ext = pd.concat([df_soft_neg, embeddings_padded], axis=1)
print(df_soft_neg_ext.head())
# Show a sample of the results
#print(df_soft_neg_ext[['query_pos_sim', 'query_neg_sim', 'pos_neg_sim']].head())

#print(df_soft_neg_ext[['query_pos_sim', 'query_neg_sim', 'pos_neg_sim']].mean())

# Calculate how often the positive passage is ranked higher than negative
#higher_count = (df_soft_neg_ext['query_pos_sim'] > df_soft_neg_ext['query_neg_sim']).sum()
#total = len(df_soft_neg_ext)
#print(f"\nPositive passage ranked higher than negative: {higher_count} out of {total} ({higher_count/total:.2%})")



In [None]:
df_soft_neg_ext[0:1]

In [None]:
# Process the dataframe using apply
print("Calculating embeddings... This may take a while depending on dataframe size.")
embeddings_padded = df_hard_neg.progress_apply(
    lambda row: calculate_embeddings(row, word_to_idx, embeddings), 
    axis=1
)

# Join the similarities to the dataframe
df_hard_neg_ext = pd.concat([df_hard_neg, embeddings_padded], axis=1)
print(df_hard_neg_ext.head())
# Show a sample of the results
#print(df_hard_neg_ext[['query_pos_sim', 'query_neg_sim', 'pos_neg_sim']].head())

#print(df_hard_neg_ext[['query_pos_sim', 'query_neg_sim', 'pos_neg_sim']].mean())

# Calculate how often the positive passage is ranked higher than negative
#higher_count = (df_hard_neg_ext['query_pos_sim'] > df_hard_neg_ext['query_neg_sim']).sum()
#total = len(df_hard_neg_ext)
#print(f"\nPositive passage ranked higher than negative: {higher_count} out of {total} ({higher_count/total:.2%})")



In [None]:
df_all_neg_ext = pd.concat([df_soft_neg_ext, df_hard_neg_ext])
df_all_neg_ext.head()

In [None]:
# Save DataFrames to pickle format
df_soft_neg_ext.to_pickle("downloaded_model/df_soft_neg_ext.pkl")
#df_hard_neg_ext.to_pickle("downloaded_model/df_hard_neg_ext.pkl")
#df_all_neg_ext.to_pickle("downloaded_model/df_all_neg_ext.pkl")

In [4]:
# Function to load a DataFrame from pickle if the file exists
def load_df_if_exists(file_path):
    if os.path.exists(file_path):
        return pd.read_pickle(file_path)
    else:
        print(f"File not found: {file_path}")
        return None

# Load DataFrames
df_soft_neg_ext = load_df_if_exists("downloaded_model/df_soft_neg_ext.pkl")
#df_hard_neg_ext = load_df_if_exists("downloaded_model/df_hard_neg_ext.pkl")
#df_all_neg_ext = load_df_if_exists("downloaded_model/df_all_neg_ext.pkl")


In [None]:
df_soft_neg_ext.head()

In [None]:
#1. Create packed sequences for RNN processing
#packed_queries, packed_positives, packed_negatives = create_packed_batch(df_all_neg_ext)



In [None]:
# # 2. Feed packed sequences to your RNN models
# query_outputs, query_hidden =your_query_rnn(packed_queries)
# pos_outputs, pos_hidden = your_document_rnn(packed_positives)
# neg_outputs, neg_hidden = your_document_rnn(packed_negatives)

In [5]:
torch.cuda.empty_cache()  # Clear CUDA cache if using GPU


run_hyperparameter_tuning(df=df_soft_neg_ext, output_dims=[100], batch_sizes=[512], gru_hidden_dims=[100, 200], 
                         num_layers=[1], dropouts=[0.1], learning_rates=[0.0005, 1e-3], 
                         epochs=10, log_wandb=True)

Data splits: Train=47822 | Validation=15941 | Test=15941






--------------------------------------------------------------------------------
Training with: output_dim=100, batch_size=512, gru_hidden_dim=100, num_layers=1, dropout=0.1, lr=0.0005
--------------------------------------------------------------------------------


Epoch 1/10 (Train): 100%|██████████| 94/94 [00:11<00:00,  8.54it/s]
Epoch 1/10 (Val): 100%|██████████| 32/32 [00:05<00:00,  5.85it/s]


Epoch 1/10, Train Loss: 0.1745, Val Loss: 0.1494, LR: 0.000500
New best model saved with validation loss: 0.1494


Epoch 2/10 (Train): 100%|██████████| 94/94 [00:09<00:00,  9.58it/s]
Epoch 2/10 (Val): 100%|██████████| 32/32 [00:05<00:00,  5.45it/s]


Epoch 2/10, Train Loss: 0.1357, Val Loss: 0.1259, LR: 0.000500
New best model saved with validation loss: 0.1259


Epoch 3/10 (Train): 100%|██████████| 94/94 [00:09<00:00,  9.56it/s]
Epoch 3/10 (Val): 100%|██████████| 32/32 [00:04<00:00,  7.82it/s]


Epoch 3/10, Train Loss: 0.1181, Val Loss: 0.1142, LR: 0.000500
New best model saved with validation loss: 0.1142


Epoch 4/10 (Train): 100%|██████████| 94/94 [00:11<00:00,  8.03it/s]
Epoch 4/10 (Val): 100%|██████████| 32/32 [00:04<00:00,  7.99it/s]


Epoch 4/10, Train Loss: 0.1040, Val Loss: 0.1029, LR: 0.000500
New best model saved with validation loss: 0.1029


Epoch 5/10 (Train): 100%|██████████| 94/94 [00:08<00:00, 10.83it/s]
Epoch 5/10 (Val): 100%|██████████| 32/32 [00:03<00:00,  8.14it/s]


Epoch 5/10, Train Loss: 0.0907, Val Loss: 0.0944, LR: 0.000500
New best model saved with validation loss: 0.0944


Epoch 6/10 (Train): 100%|██████████| 94/94 [00:09<00:00, 10.13it/s]
Epoch 6/10 (Val): 100%|██████████| 32/32 [00:04<00:00,  7.79it/s]


Epoch 6/10, Train Loss: 0.0800, Val Loss: 0.0883, LR: 0.000500
New best model saved with validation loss: 0.0883


Epoch 7/10 (Train): 100%|██████████| 94/94 [00:09<00:00,  9.92it/s]
Epoch 7/10 (Val): 100%|██████████| 32/32 [00:03<00:00,  8.30it/s]


Epoch 7/10, Train Loss: 0.0713, Val Loss: 0.0856, LR: 0.000500
New best model saved with validation loss: 0.0856


Epoch 8/10 (Train): 100%|██████████| 94/94 [00:09<00:00,  9.96it/s]
Epoch 8/10 (Val): 100%|██████████| 32/32 [00:04<00:00,  7.79it/s]


Epoch 8/10, Train Loss: 0.0649, Val Loss: 0.0836, LR: 0.000500
New best model saved with validation loss: 0.0836


Epoch 9/10 (Train): 100%|██████████| 94/94 [00:08<00:00, 10.48it/s]
Epoch 9/10 (Val): 100%|██████████| 32/32 [00:04<00:00,  7.69it/s]


Epoch 9/10, Train Loss: 0.0589, Val Loss: 0.0805, LR: 0.000500
New best model saved with validation loss: 0.0805


Epoch 10/10 (Train): 100%|██████████| 94/94 [00:09<00:00, 10.32it/s]
Epoch 10/10 (Val): 100%|██████████| 32/32 [00:04<00:00,  7.80it/s]

Epoch 10/10, Train Loss: 0.0517, Val Loss: 0.0808, LR: 0.000500





0,1
epoch,▁▂▃▃▄▅▆▆▇█
learning_rate,▁▁▁▁▁▁▁▁▁▁
train_loss,█▆▅▄▃▃▂▂▁▁
val_loss,█▆▄▃▂▂▂▁▁▁

0,1
epoch,10.0
learning_rate,0.0005
train_loss,0.05174
val_loss,0.08082






--------------------------------------------------------------------------------
Training with: output_dim=100, batch_size=512, gru_hidden_dim=100, num_layers=1, dropout=0.1, lr=0.001
--------------------------------------------------------------------------------


Epoch 1/10 (Train): 100%|██████████| 94/94 [00:09<00:00,  9.95it/s]
Epoch 1/10 (Val): 100%|██████████| 32/32 [00:04<00:00,  7.61it/s]


Epoch 1/10, Train Loss: 0.1579, Val Loss: 0.1384, LR: 0.001000
New best model saved with validation loss: 0.1384


Epoch 2/10 (Train): 100%|██████████| 94/94 [00:09<00:00, 10.07it/s]
Epoch 2/10 (Val): 100%|██████████| 32/32 [00:04<00:00,  7.09it/s]


Epoch 2/10, Train Loss: 0.1205, Val Loss: 0.1132, LR: 0.001000
New best model saved with validation loss: 0.1132


Epoch 3/10 (Train): 100%|██████████| 94/94 [00:11<00:00,  8.47it/s]
Epoch 3/10 (Val): 100%|██████████| 32/32 [00:04<00:00,  6.81it/s]


Epoch 3/10, Train Loss: 0.1003, Val Loss: 0.0987, LR: 0.001000
New best model saved with validation loss: 0.0987


Epoch 4/10 (Train): 100%|██████████| 94/94 [00:11<00:00,  8.41it/s]
Epoch 4/10 (Val): 100%|██████████| 32/32 [00:04<00:00,  6.83it/s]


Epoch 4/10, Train Loss: 0.0865, Val Loss: 0.0958, LR: 0.001000
New best model saved with validation loss: 0.0958


Epoch 5/10 (Train): 100%|██████████| 94/94 [00:10<00:00,  8.56it/s]
Epoch 5/10 (Val): 100%|██████████| 32/32 [00:04<00:00,  6.70it/s]


Epoch 5/10, Train Loss: 0.0755, Val Loss: 0.0841, LR: 0.001000
New best model saved with validation loss: 0.0841


Epoch 6/10 (Train): 100%|██████████| 94/94 [00:10<00:00,  8.61it/s]
Epoch 6/10 (Val): 100%|██████████| 32/32 [00:04<00:00,  6.63it/s]


Epoch 6/10, Train Loss: 0.0656, Val Loss: 0.0802, LR: 0.001000
New best model saved with validation loss: 0.0802


Epoch 7/10 (Train): 100%|██████████| 94/94 [00:11<00:00,  8.47it/s]
Epoch 7/10 (Val): 100%|██████████| 32/32 [00:04<00:00,  6.66it/s]


Epoch 7/10, Train Loss: 0.0562, Val Loss: 0.0779, LR: 0.001000
New best model saved with validation loss: 0.0779


Epoch 8/10 (Train): 100%|██████████| 94/94 [00:10<00:00,  8.69it/s]
Epoch 8/10 (Val): 100%|██████████| 32/32 [00:04<00:00,  6.59it/s]


Epoch 8/10, Train Loss: 0.0493, Val Loss: 0.0762, LR: 0.001000
New best model saved with validation loss: 0.0762


Epoch 9/10 (Train): 100%|██████████| 94/94 [00:10<00:00,  8.90it/s]
Epoch 9/10 (Val): 100%|██████████| 32/32 [00:04<00:00,  6.67it/s]


Epoch 9/10, Train Loss: 0.0430, Val Loss: 0.0762, LR: 0.001000


Epoch 10/10 (Train): 100%|██████████| 94/94 [00:10<00:00,  8.83it/s]
Epoch 10/10 (Val): 100%|██████████| 32/32 [00:04<00:00,  7.64it/s]

Epoch 10/10, Train Loss: 0.0372, Val Loss: 0.0754, LR: 0.001000
New best model saved with validation loss: 0.0754





0,1
epoch,▁▂▃▃▄▅▆▆▇█
learning_rate,▁▁▁▁▁▁▁▁▁▁
train_loss,█▆▅▄▃▃▂▂▁▁
val_loss,█▅▄▃▂▂▁▁▁▁

0,1
epoch,10.0
learning_rate,0.001
train_loss,0.03718
val_loss,0.07535






--------------------------------------------------------------------------------
Training with: output_dim=100, batch_size=512, gru_hidden_dim=200, num_layers=1, dropout=0.1, lr=0.0005
--------------------------------------------------------------------------------


Epoch 1/10 (Train): 100%|██████████| 94/94 [00:10<00:00,  8.94it/s]
Epoch 1/10 (Val): 100%|██████████| 32/32 [00:04<00:00,  7.74it/s]


Epoch 1/10, Train Loss: 0.1650, Val Loss: 0.1371, LR: 0.000500
New best model saved with validation loss: 0.1371


Epoch 2/10 (Train): 100%|██████████| 94/94 [00:10<00:00,  8.97it/s]
Epoch 2/10 (Val): 100%|██████████| 32/32 [00:03<00:00,  8.04it/s]


Epoch 2/10, Train Loss: 0.1289, Val Loss: 0.1226, LR: 0.000500
New best model saved with validation loss: 0.1226


Epoch 3/10 (Train): 100%|██████████| 94/94 [00:10<00:00,  8.84it/s]
Epoch 3/10 (Val): 100%|██████████| 32/32 [00:03<00:00,  8.12it/s]


Epoch 3/10, Train Loss: 0.1093, Val Loss: 0.1035, LR: 0.000500
New best model saved with validation loss: 0.1035


Epoch 4/10 (Train): 100%|██████████| 94/94 [00:10<00:00,  8.72it/s]
Epoch 4/10 (Val): 100%|██████████| 32/32 [00:03<00:00,  8.17it/s]


Epoch 4/10, Train Loss: 0.0943, Val Loss: 0.0963, LR: 0.000500
New best model saved with validation loss: 0.0963


Epoch 5/10 (Train): 100%|██████████| 94/94 [00:10<00:00,  8.97it/s]
Epoch 5/10 (Val): 100%|██████████| 32/32 [00:03<00:00,  8.28it/s]


Epoch 5/10, Train Loss: 0.0832, Val Loss: 0.0894, LR: 0.000500
New best model saved with validation loss: 0.0894


Epoch 6/10 (Train): 100%|██████████| 94/94 [00:10<00:00,  8.95it/s]
Epoch 6/10 (Val): 100%|██████████| 32/32 [00:03<00:00,  8.18it/s]


Epoch 6/10, Train Loss: 0.0740, Val Loss: 0.0869, LR: 0.000500
New best model saved with validation loss: 0.0869


Epoch 7/10 (Train): 100%|██████████| 94/94 [00:10<00:00,  8.58it/s]
Epoch 7/10 (Val): 100%|██████████| 32/32 [00:04<00:00,  7.95it/s]


Epoch 7/10, Train Loss: 0.0651, Val Loss: 0.0834, LR: 0.000500
New best model saved with validation loss: 0.0834


Epoch 8/10 (Train): 100%|██████████| 94/94 [00:10<00:00,  8.93it/s]
Epoch 8/10 (Val): 100%|██████████| 32/32 [00:03<00:00,  8.07it/s]


Epoch 8/10, Train Loss: 0.0574, Val Loss: 0.0811, LR: 0.000500
New best model saved with validation loss: 0.0811


Epoch 9/10 (Train): 100%|██████████| 94/94 [00:10<00:00,  8.97it/s]
Epoch 9/10 (Val): 100%|██████████| 32/32 [00:04<00:00,  7.82it/s]


Epoch 9/10, Train Loss: 0.0489, Val Loss: 0.0792, LR: 0.000500
New best model saved with validation loss: 0.0792


Epoch 10/10 (Train): 100%|██████████| 94/94 [00:10<00:00,  8.86it/s]
Epoch 10/10 (Val): 100%|██████████| 32/32 [00:04<00:00,  7.85it/s]

Epoch 10/10, Train Loss: 0.0429, Val Loss: 0.0759, LR: 0.000500
New best model saved with validation loss: 0.0759





0,1
epoch,▁▂▃▃▄▅▆▆▇█
learning_rate,▁▁▁▁▁▁▁▁▁▁
train_loss,█▆▅▄▃▃▂▂▁▁
val_loss,█▆▄▃▃▂▂▂▁▁

0,1
epoch,10.0
learning_rate,0.0005
train_loss,0.0429
val_loss,0.07592






--------------------------------------------------------------------------------
Training with: output_dim=100, batch_size=512, gru_hidden_dim=200, num_layers=1, dropout=0.1, lr=0.001
--------------------------------------------------------------------------------


Epoch 1/10 (Train): 100%|██████████| 94/94 [00:10<00:00,  8.91it/s]
Epoch 1/10 (Val): 100%|██████████| 32/32 [00:04<00:00,  7.75it/s]


Epoch 1/10, Train Loss: 0.1542, Val Loss: 0.1321, LR: 0.001000
New best model saved with validation loss: 0.1321


Epoch 2/10 (Train): 100%|██████████| 94/94 [00:10<00:00,  8.84it/s]
Epoch 2/10 (Val): 100%|██████████| 32/32 [00:04<00:00,  7.95it/s]


Epoch 2/10, Train Loss: 0.1194, Val Loss: 0.1128, LR: 0.001000
New best model saved with validation loss: 0.1128


Epoch 3/10 (Train): 100%|██████████| 94/94 [00:10<00:00,  9.07it/s]
Epoch 3/10 (Val): 100%|██████████| 32/32 [00:04<00:00,  7.90it/s]


Epoch 3/10, Train Loss: 0.0999, Val Loss: 0.1048, LR: 0.001000
New best model saved with validation loss: 0.1048


Epoch 4/10 (Train): 100%|██████████| 94/94 [00:10<00:00,  8.61it/s]
Epoch 4/10 (Val): 100%|██████████| 32/32 [00:04<00:00,  7.83it/s]


Epoch 4/10, Train Loss: 0.0848, Val Loss: 0.0905, LR: 0.001000
New best model saved with validation loss: 0.0905


Epoch 5/10 (Train): 100%|██████████| 94/94 [00:10<00:00,  8.92it/s]
Epoch 5/10 (Val): 100%|██████████| 32/32 [00:04<00:00,  7.89it/s]


Epoch 5/10, Train Loss: 0.0736, Val Loss: 0.0873, LR: 0.001000
New best model saved with validation loss: 0.0873


Epoch 6/10 (Train): 100%|██████████| 94/94 [00:10<00:00,  8.83it/s]
Epoch 6/10 (Val): 100%|██████████| 32/32 [00:04<00:00,  7.74it/s]


Epoch 6/10, Train Loss: 0.0642, Val Loss: 0.0802, LR: 0.001000
New best model saved with validation loss: 0.0802


Epoch 7/10 (Train): 100%|██████████| 94/94 [00:10<00:00,  8.73it/s]
Epoch 7/10 (Val): 100%|██████████| 32/32 [00:03<00:00,  8.12it/s]


Epoch 7/10, Train Loss: 0.0570, Val Loss: 0.0854, LR: 0.001000


Epoch 8/10 (Train): 100%|██████████| 94/94 [00:10<00:00,  9.04it/s]
Epoch 8/10 (Val): 100%|██████████| 32/32 [00:03<00:00,  8.28it/s]


Epoch 8/10, Train Loss: 0.0508, Val Loss: 0.0794, LR: 0.001000
New best model saved with validation loss: 0.0794


Epoch 9/10 (Train): 100%|██████████| 94/94 [00:10<00:00,  8.95it/s]
Epoch 9/10 (Val): 100%|██████████| 32/32 [00:04<00:00,  7.89it/s]


Epoch 9/10, Train Loss: 0.0438, Val Loss: 0.0781, LR: 0.001000
New best model saved with validation loss: 0.0781


Epoch 10/10 (Train): 100%|██████████| 94/94 [00:10<00:00,  8.96it/s]
Epoch 10/10 (Val): 100%|██████████| 32/32 [00:04<00:00,  7.95it/s]

Epoch 10/10, Train Loss: 0.0371, Val Loss: 0.0808, LR: 0.001000





0,1
epoch,▁▂▃▃▄▅▆▆▇█
learning_rate,▁▁▁▁▁▁▁▁▁▁
train_loss,█▆▅▄▃▃▂▂▁▁
val_loss,█▆▄▃▂▁▂▁▁▁

0,1
epoch,10.0
learning_rate,0.001
train_loss,0.0371
val_loss,0.0808




Best hyperparameters:
Output dimension: 100
Batch size: 512
GRU hidden dimension: 100
Number of GRU layers: 1
Dropout: 0.1
Learning rate: 0.001
Validation Loss: 0.0754


Training final model with best hyperparameters...


Epoch 1/10 (Train): 100%|██████████| 125/125 [00:10<00:00, 11.42it/s]
Epoch 1/10 (Val): 100%|██████████| 32/32 [00:05<00:00,  6.32it/s]


Epoch 1/10, Train Loss: 0.1585, Val Loss: 0.1308, LR: 0.001000
New best model saved with validation loss: 0.1308


Epoch 2/10 (Train): 100%|██████████| 125/125 [00:10<00:00, 11.75it/s]
Epoch 2/10 (Val): 100%|██████████| 32/32 [00:04<00:00,  7.88it/s]


Epoch 2/10, Train Loss: 0.1218, Val Loss: 0.1138, LR: 0.001000
New best model saved with validation loss: 0.1138


Epoch 3/10 (Train): 100%|██████████| 125/125 [00:11<00:00, 11.33it/s]
Epoch 3/10 (Val): 100%|██████████| 32/32 [00:04<00:00,  7.75it/s]


Epoch 3/10, Train Loss: 0.1045, Val Loss: 0.0998, LR: 0.001000
New best model saved with validation loss: 0.0998


Epoch 4/10 (Train): 100%|██████████| 125/125 [00:10<00:00, 11.93it/s]
Epoch 4/10 (Val): 100%|██████████| 32/32 [00:04<00:00,  7.64it/s]


Epoch 4/10, Train Loss: 0.0898, Val Loss: 0.0861, LR: 0.001000
New best model saved with validation loss: 0.0861


Epoch 5/10 (Train): 100%|██████████| 125/125 [00:10<00:00, 11.82it/s]
Epoch 5/10 (Val): 100%|██████████| 32/32 [00:04<00:00,  7.78it/s]


Epoch 5/10, Train Loss: 0.0785, Val Loss: 0.0839, LR: 0.001000
New best model saved with validation loss: 0.0839


Epoch 6/10 (Train): 100%|██████████| 125/125 [00:10<00:00, 11.83it/s]
Epoch 6/10 (Val): 100%|██████████| 32/32 [00:04<00:00,  7.59it/s]


Epoch 6/10, Train Loss: 0.0699, Val Loss: 0.0753, LR: 0.001000
New best model saved with validation loss: 0.0753


Epoch 7/10 (Train): 100%|██████████| 125/125 [00:10<00:00, 11.80it/s]
Epoch 7/10 (Val): 100%|██████████| 32/32 [00:04<00:00,  7.90it/s]


Epoch 7/10, Train Loss: 0.0635, Val Loss: 0.0714, LR: 0.001000
New best model saved with validation loss: 0.0714


Epoch 8/10 (Train): 100%|██████████| 125/125 [00:10<00:00, 11.73it/s]
Epoch 8/10 (Val): 100%|██████████| 32/32 [00:04<00:00,  7.54it/s]


Epoch 8/10, Train Loss: 0.0575, Val Loss: 0.0705, LR: 0.001000
New best model saved with validation loss: 0.0705


Epoch 9/10 (Train): 100%|██████████| 125/125 [00:10<00:00, 11.54it/s]
Epoch 9/10 (Val): 100%|██████████| 32/32 [00:04<00:00,  7.55it/s]


Epoch 9/10, Train Loss: 0.0533, Val Loss: 0.0690, LR: 0.001000
New best model saved with validation loss: 0.0690


Epoch 10/10 (Train): 100%|██████████| 125/125 [00:10<00:00, 12.07it/s]
Epoch 10/10 (Val): 100%|██████████| 32/32 [00:04<00:00,  7.65it/s]


Epoch 10/10, Train Loss: 0.0495, Val Loss: 0.0662, LR: 0.001000
New best model saved with validation loss: 0.0662
Final model saved at: checkpoints/final_gru_model_20250425-151105/final_gru_model_20250425-151105.pt




Torch IR graph at exception: graph(%embeddings.1 : Float(1, 26, 100, strides=[2600, 100, 1], requires_grad=0, device=cuda:0),
      %1 : Long(1, strides=[1], requires_grad=0, device=cuda:0),
      %embeddings : Float(1, 201, 100, strides=[20100, 100, 1], requires_grad=0, device=cuda:0),
      %3 : Long(1, strides=[1], requires_grad=0, device=cuda:0),
      %query_encoder.gru.weight_ih_l0 : Float(300, 100, strides=[100, 1], requires_grad=1, device=cuda:0),
      %query_encoder.gru.weight_hh_l0 : Float(300, 100, strides=[100, 1], requires_grad=1, device=cuda:0),
      %query_encoder.gru.bias_ih_l0 : Float(300, strides=[1], requires_grad=1, device=cuda:0),
      %query_encoder.gru.bias_hh_l0 : Float(300, strides=[1], requires_grad=1, device=cuda:0),
      %query_encoder.gru.weight_ih_l0_reverse : Float(300, 100, strides=[100, 1], requires_grad=1, device=cuda:0),
      %query_encoder.gru.weight_hh_l0_reverse : Float(300, 100, strides=[100, 1], requires_grad=1, device=cuda:0),
      %query_

RuntimeError: vector::_M_range_check: __n (which is 0) >= this->size() (which is 0)

### Twin Tower Network

In [None]:
model_path = os.path.join("checkpoints", "final_gru_model_20250424-152045", "final_gru_model_20250424-152045.pt")
print(f"Loading model from: {model_path}")
    
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Create model instance
model = GRUTwinTowerModel(embedding_dim=100, gru_hidden_dim=100, output_dim=100, 
                         num_layers=1, dropout=0.1)

# Load the checkpoint and extract the model state dict
checkpoint = torch.load(model_path, map_location=device)
# The error shows the state_dict is nested under "model_state_dict"
model.load_state_dict(checkpoint["model_state_dict"])

model.to(device).eval()

print("Model loaded successfully!")

In [None]:
# Loading the pre-trained model from WandB
#run = wandb.init()
# The correct artifact path format
api = wandb.Api()
artifact = api.artifact("nnamdi-odozi-ave-actuaries/gru-twin-tower-model/final_gru_model_20250424-174424:v0")

#https://wandb.ai/nnamdi-odozi-ave-actuaries/gru-twin-tower-model/artifacts/model/final_gru_model_20250424-174424/v0/files/final_gru_model_20250424-174424.pt
artifact_dir = artifact.download()

# Find the model file
import os
model_files = [f for f in os.listdir(artifact_dir) if f.endswith('.pt') or f.endswith('.pth')]
if not model_files:
    raise FileNotFoundError(f"No model files found in {artifact_dir}")

model_path = os.path.join(artifact_dir, model_files[0])
print(f"Found model at: {model_path}")

# Load model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
checkpoint = torch.load(model_path, map_location=device)

# Create model with correct dimensions
model2 = GRUTwinTowerModel(
    embedding_dim=100, 
    gru_hidden_dim=100,  # Use 100 as seen in your model print
    output_dim=100,
    num_layers=1,
    dropout=0.1
)

# Load state dict (handle both formats)
if "model_state_dict" in checkpoint:
    model2.load_state_dict(checkpoint["model_state_dict"])
else:
    model2.load_state_dict(checkpoint)

model2 = model.to(device)
model2.eval()
print("Model loaded successfully!")

In [None]:
print(model)

In [None]:
# 2. Test with consecutive rows - just use a slice directly
df_slice = df_soft_neg_ext[0:3]  # Use any 3 consecutive rows

# Process dataframe slice
with torch.no_grad():
    # Move everything to device
    query_embs = torch.stack(df_slice['query_emb'].tolist()).to(device)
    query_lens = torch.tensor(df_slice['query_length'].tolist()).to(device)
    pos_embs = torch.stack(df_slice['pos_emb'].tolist()).to(device)
    pos_lens = torch.tensor(df_slice['pos_length'].tolist()).to(device)
    
    # Get encodings for all rows at once
    query_vecs, doc_vecs = model(query_embs, query_lens, pos_embs, pos_lens)
    
    # Calculate similarities
    sims = torch.nn.functional.cosine_similarity(query_vecs, doc_vecs, dim=1)
    
print("Similarities:", sims.cpu().numpy())

In [None]:
df_slice

In [None]:
# 2. Test with consecutive rows - just use a slice directly
df_slice = df_soft_neg_ext[0:512]  # Doing more rows

# Process dataframe slice
with torch.no_grad():
    # Move everything to device
    query_embs = torch.stack(df_slice['query_emb'].tolist()).to(device)
    query_lens = torch.tensor(df_slice['query_length'].tolist()).to(device)
    pos_embs = torch.stack(df_slice['pos_emb'].tolist()).to(device)
    pos_lens = torch.tensor(df_slice['pos_length'].tolist()).to(device)
    
    # Get encodings for all rows at once
    query_vecs, doc_vecs = model(query_embs, query_lens, pos_embs, pos_lens)
    
    # Calculate similarities
    sims = torch.nn.functional.cosine_similarity(query_vecs, doc_vecs, dim=1)
    
print("Similarities:", sims.cpu().numpy())
sims.cpu().numpy().mean()

In [None]:
print(model)

In [None]:
#with a random sentence:
query_test = "This is RBA"
doc_test = "This is RBA"
q_l = len(query_test.split())
d_l = len(doc_test.split())
 

In [None]:
query_emb, q_l = text_to_embeddings(query_test, word_to_idx, embeddings, is_query=True)
doc_emb, d_l = text_to_embeddings(doc_test, word_to_idx, embeddings, is_query=False)
print(query_emb.shape)
print(doc_emb)
print(q_l, d_l)

In [None]:
q = query_emb.mean(dim=0)
d = doc_emb.mean(dim=0)

In [None]:
# Calculate similarities
sims = torch.nn.functional.cosine_similarity(q, d, dim=0)
    
print("Similarities:", sims.cpu().numpy())

In [None]:
# 1. Make sure tensors are on the right device
device = next(model.parameters()).device
query_emb = query_emb.to(device)  # Shape should be [seq_length, embedding_dim]
q_l = torch.tensor([q_l], device=device)  # Single value for sequence length

# 2. Add batch dimension for model processing
query_emb = query_emb.unsqueeze(0)  # Shape becomes [1, seq_length, embedding_dim]


In [None]:
# 3. Query-only inference using just dataframe columns 
#query_row = df_soft_neg_ext[0]  # Use any row
#test_query_emb = query_row['query_emb'].unsqueeze(0).to(device)
#test_query_len = torch.tensor([query_row['query_length']]).to(device)

# Just run through query encoder and tower
with torch.no_grad():
    query_encoded = model.query_encoder(query_emb, q_l)
    query_vector = model.query_tower(query_encoded)
    query_vector = torch.nn.functional.normalize(query_vector, p=2, dim=1) #I don't think this is needed, but let's keep it for now

print("Query vector shape:", query_vector.shape)
print("Values:", query_vector[0, :5].cpu().numpy())

In [None]:
def evaluate_model(qryTower, docTower, dataloader, device):
    qryTower.eval()
    docTower.eval()

    total = 0
    correct = 0

    for batch in dataloader:
        # Get embeddings from batch
        query_emb = batch['query']
        pos_emb = batch['positive']
        neg_emb = batch['negative']
        
        # Forward pass through towers
        query_encoded = qryTower(query_emb)
        pos_encoded = docTower(pos_emb)
        neg_encoded = docTower(neg_emb)
        
        # Calculate similarities
        pos_sim = torch.nn.functional.cosine_similarity(query_encoded, pos_encoded)
        neg_sim = torch.nn.functional.cosine_similarity(query_encoded, neg_encoded)

        correct += (pos_sim > neg_sim).sum().item()
        total += batch['query'].size(0)

    acc = correct / total
    print(f"Eval Accuracy (query closer to pos than neg): {acc:.4f}")
    return acc



total_loss = 0
    

In [None]:
#print(f"Epoch {epoch+1}, Avg Loss: {total_loss / len(dataloader):.4f}")
evaluate_model(final_qry_tower, final_doc_tower, dataloader, device)

In [None]:
query_emb = text_to_embeddings("What is RBA", word_to_idx, embeddings)
pos_emb = text_to_embeddings("What is RBA", word_to_idx, embeddings)

# Ensure tensors have at least two dimensions before applying mean
if query_emb.dim() == 1:
	query_emb = query_emb.unsqueeze(0)
if pos_emb.dim() == 1:
	pos_emb = pos_emb.unsqueeze(0)

query_emb = query_emb.mean(dim=0)
pos_emb = pos_emb.mean(dim=0)

print(torch.nn.functional.cosine_similarity(query_emb, pos_emb, dim=0))

