In [1]:
%load_ext autoreload
%autoreload 2

## Imports

In [1]:
import torch
import numpy as np
import matplotlib.pyplot as plt
from model.Train import Trainer
from model.Loss import LSmoothing
from torch.utils.data import DataLoader
from model.Bert import BertForQuestionPairClassification
from model.DataManager import QuoraDataset
from transformers import BertTokenizer
from model.NegativeSampling import RandomSampling

## Constants

## Loading Data

In [2]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

path ="data/quora-question-pairs/train.csv"
bs = 32
bm25_sampling = True

data = QuoraDataset.load_data(path, 1000)
data['global_docno'] = data.index.astype(str)
train_data, val_data, test_data = QuoraDataset.split_data(data)
if bm25_sampling : 
    index_ref_tr = QuoraDataset.index_data(train_data,type_df="train_5")
    index_ref_val = QuoraDataset.index_data(val_data,type_df="val_5")
    index_ref_test = QuoraDataset.index_data(test_data,type_df="test_5")
    train_data = BM25Sampling.sample(index_ref_tr,train_data, k=9).sort_values(by="question1")
    val_data = BM25Sampling.sample(index_ref_val,val_data, k=9).sort_values(by="question1")
    test_data = BM25Sampling.sample(index_ref_test,test_data, k=9).sort_values(by="question1")
else:
    train_data = RandomSampling.sample(train_data, k=9).sort_values(by="question1")
    val_data = RandomSampling.sample(val_data, k=9).sort_values(by="question1")
    test_data = RandomSampling.sample(test_data, k=9).sort_values(by="question1")

train_dataset = QuoraDataset(train_data, tokenizer, max_length=128)
train_loader = DataLoader(train_dataset, batch_size=bs, shuffle=True)
val_dataset = QuoraDataset(val_data, tokenizer, max_length=128)
val_loader = DataLoader(val_dataset, batch_size=bs, shuffle=False)
test_dataset = QuoraDataset(test_data, tokenizer, max_length=128)
test_loader = DataLoader(test_dataset, batch_size=bs, shuffle=False)

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
epochs = 10
learning_rate = 1e-4

model = BertForQuestionPairClassification()
model = model.to(device)
optimizer = torch.optim.AdamW
loss = LSmoothing()
trainer = Trainer()
history = trainer.set_model(model)\
    .set_loader(train_loader, val_loader, None)\
    .set_loss_fn(loss)\
    .set_optimizer(optimizer)\
    .fit(learning_rate, epochs, CL=False)

: 

In [None]:
train_loss = history['training']['loss']
val_loss = history['validation']['loss']

plt.style.use('ggplot')
plt.figure(figsize=(15,10))
plt.plot(train_loss, label='train loss')
plt.plot(val_loss, label='val loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.title('Training and Validation Loss')
plt.legend()
plt.show()

In [None]:
def precision_at_k(all_scores, all_targets, K):
    # Get the indices that would sort the array
    top_k_indices = np.argsort(all_scores)[::-1][:K]
    # Select the top K targets
    top_k_targets = all_targets[top_k_indices]
    # Calculate precision
    precision = np.mean(top_k_targets)
    return precision

def recall_at_k(all_scores, all_targets, K, num_relevant):
    top_k_indices = np.argsort(all_scores)[::-1][:K]
    top_k_targets = all_targets[top_k_indices]
    # Calculate recall
    recall = np.sum(top_k_targets) / num_relevant
    return recall

def evaluate_ranking_model(model, data_loader, K, device):
    model.eval()
    model.to(device)
    precisions = []
    recalls = []

    with torch.no_grad():
        for inputs, labels in data_loader:
            # Move all inputs to the correct device
            inputs = {k: v.to(device) for k, v in inputs.items()}
            labels = labels.to(device)

            outputs = model(**inputs)
            scores = torch.softmax(outputs, dim=1)[:, 1]  # Probability of being similar
            scores = scores.cpu().numpy()
            targets = labels.cpu().numpy()

            # Assume all entries in a batch belong to one group/query
            num_duplicate = np.sum(targets)  # Count of relevant documents/questions
            if num_duplicate == 0:
                continue  # Avoid division by zero
            
            precision_k = precision_at_k(scores, targets, K)
            recall_k = recall_at_k(scores, targets, K, num_duplicate)

            precisions.append(precision_k)
            recalls.append(recall_k)

    # Average over all queries
    avg_precision = np.mean(precisions)
    avg_recall = np.mean(recalls)

    return {
        "precision_at_k": avg_precision,
        "recall_at_k": avg_recall
    }

metrics = evaluate_ranking_model(model, val_loader, K=5, device=device)
print("Ranking Metrics:", metrics)