#**An innovative solution inspired by state-of-the-art research: We fine-tuned three transformers and built a meta-learner on top to aggregate their probability predictions and determine the final class.**

In [None]:
#Necessary imports

import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import re
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from torch.utils.data import DataLoader, Dataset
from sklearn.metrics import accuracy_score
import torch

#Empty the cuda cache to be sure we have enough space to load the transformer models
torch.cuda.empty_cache()




In [None]:
#Mounting to google drive to be able to save and load the models there
from google.colab import drive
import os

drive.mount('/content/drive')
SAVE_PATH = "/content/drive/MyDrive/models_nli"
os.makedirs(SAVE_PATH, exist_ok=True)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


**Preprocessing the data**

In [None]:
def clean_text(text):
    """Clean text: normalize spaces, remove misplaced punctuation, fix contractions."""
    text = str(text).strip().lower()

    # Fix spaces around punctuation (keep punctuation but standardize spacing)
    text = re.sub(r'\s+([?.!,"])', r'\1', text)  # Removes spaces before punctuation
    text = re.sub(r'([?.!,"])', r'\1 ', text)  # Ensures one space after punctuation

    # Normalize quotes (remove extra surrounding quotes)
    text = re.sub(r'^"|"$', '', text)

    # Handle common contractions
    text = re.sub(r"\bd'you\b", "do you", text)
    text = re.sub(r"\b'cause\b", "because", text)
    text = re.sub(r"\bi'm\b", "i am", text)
    text = re.sub(r"\bain't\b", "is not", text)

    return text



In [None]:
#Train set for finetunign the transformers
# Load training dataset
df_train = pd.read_csv("train.csv", quotechar='"', delimiter=",", encoding="utf-8")
df_train.columns = ["premise", "hypothesis", "label"]  # Ensure correct column names
df_train.dropna(inplace=True)  # Remove missing values
print(df_train.head())

                                             premise  \
0  yeah i don't know cut California in half or so...   
1                      actual names will not be used   
2          The film was directed by Randall Wallace.   
3   "How d'you know he'll sign me on?"Anse studie...   
4  In the light of the candles his cheeks looked ...   

                                          hypothesis  label  
0  Yeah. I'm not sure how to make that fit. Maybe...      1  
1  For the sake of privacy, actual names are not ...      1  
2  The film was directed by Randall Wallace and s...      1  
3       Anse looked at himself in a cracked mirror.       1  
4  Drew regarded his best friend and noted that i...      1  


In [None]:
# Apply text cleaning
df_train["premise"] = df_train["premise"].apply(clean_text)
df_train["hypothesis"] = df_train["hypothesis"].apply(clean_text)

df_train["label"] = df_train["label"].astype(int)

# Print first few samples
print("Training Dataset Sample:")
print(df_train.head())

Training Dataset Sample:
                                             premise  \
0  yeah i don't know cut california in half or so...   
1                      actual names will not be used   
2         the film was directed by randall wallace.    
3   how do you know he'll sign me on? " anse stud...   
4  in the light of the candles his cheeks looked ...   

                                          hypothesis  label  
0  yeah.  i am not sure how to make that fit.  ma...      1  
1  for the sake of privacy,  actual names are not...      1  
2  the film was directed by randall wallace and s...      1  
3       anse looked at himself in a cracked mirror.       1  
4  drew regarded his best friend and noted that i...      1  


In [None]:
#Validation set for finetuning the transformers
# Load validation dataset
df_val = pd.read_csv("dev.csv", quotechar='"', delimiter=",", encoding="utf-8")
df_val.columns = ["premise", "hypothesis", "label"]  # Ensure correct column names
df_val.dropna(inplace=True)  # Remove missing values

# Apply text cleaning
df_val["premise"] = df_val["premise"].apply(clean_text)
df_val["hypothesis"] = df_val["hypothesis"].apply(clean_text)

df_val["label"] = df_val["label"].astype(int)

# Print first few samples
print("Validation Dataset Sample:")
print(df_val.head())

Validation Dataset Sample:
                                             premise  \
0  by starting at the soft underbelly,  the 16, 0...   
1  the class had broken into a light sweat,  but ...   
2  samson had his famous haircut here,  but he wo...   
3  a man with a black shirt holds a baby while a ...   
4  i know that many of you are interested in addr...   

                                          hypothesis  label  
0  general nelson a.  miles had 30, 000 troops in...      0  
1        the class grew more tense as time went on.       1  
2  it was unknown where exactly within the town s...      1  
3  a darkly dressed man passes a crying baby to a...      0  
4                     the problems must be addressed      1  


In [None]:
#Training and validation set for the meta learner
# Split into 80% training and 20% validation
import pandas as pd
from sklearn.model_selection import train_test_split
df_meta_train, df_meta_val = train_test_split(df_val, test_size=0.2, random_state=42, stratify=df_val["label"])


**Dataset creation function and loading the needed tokenizers**

In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
from torch.utils.data import Dataset, DataLoader

class NliDataset(Dataset):
  """Dataset for the NLI task"""
  def __init__(self, premises, hypotheses, labels, tokenizer, max_lenth = 124):
    self.premises = premises
    self.hypotheses = hypotheses
    self.labels = labels
    self.tokenizer = tokenizer
    self.max_length = max_lenth

  def __len__(self):
    return len(self.premises)

  def __getitem__(self, idx):
      encoding = self.tokenizer(self.premises[idx], self.hypotheses[idx], padding='max_length',truncation=True, max_length=self.max_length, return_tensors='pt')
      return {
          'input_ids': encoding['input_ids'].squeeze(0),
          'attention_mask': encoding['attention_mask'].squeeze(0),
          'label': torch.tensor(self.labels[idx], dtype=torch.long)
      }

# Initialize tokenizers for different models
bert_tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
roberta_tokenizer = AutoTokenizer.from_pretrained("roberta-base")
# deberta_tokenizer = AutoTokenizer.from_pretrained("microsoft/deberta-v3-base")
albert_tokenizer = AutoTokenizer.from_pretrained("albert-base-v2")




#**-----------------Transformers finetuning---------------------------**

In [None]:
#Create datasets for the transformers
bert_t_train_dataset = NliDataset(df_train["premise"].tolist(), df_train["hypothesis"].tolist(), df_train["label"].tolist(), bert_tokenizer)
bert_t_val_dataset = NliDataset(df_val["premise"].tolist(), df_val["hypothesis"].tolist(), df_val["label"].tolist(), bert_tokenizer)

roberta_t_train_dataset = NliDataset(df_train["premise"].tolist(), df_train["hypothesis"].tolist(), df_train["label"].tolist(), roberta_tokenizer)
roberta_t_val_dataset = NliDataset(df_val["premise"].tolist(), df_val["hypothesis"].tolist(), df_val["label"].tolist(), roberta_tokenizer)


# deberta_train_dataset = NliDataset(df_train["premise"].tolist(), df_train["hypothesis"].tolist(), df_train["label"].tolist(), deberta_tokenizer)
# deberta_val_dataset = NliDataset(df_val["premise"].tolist(), df_val["hypothesis"].tolist(), df_val["label"].tolist(), deberta_tokenizer)

albert_t_train_dataset = NliDataset(df_train["premise"].tolist(), df_train["hypothesis"].tolist(), df_train["label"].tolist(), albert_tokenizer)
albert_t_val_dataset = NliDataset(df_val["premise"].tolist(), df_val["hypothesis"].tolist(), df_val["label"].tolist(), albert_tokenizer)


# Create DataLoaders for the transformers
bert_t_train_loader = DataLoader(bert_t_train_dataset, batch_size=64, shuffle=False)
bert_t_val_loader = DataLoader(bert_t_val_dataset, batch_size=64, shuffle=False)



roberta_t_train_loader = DataLoader(roberta_t_train_dataset, batch_size=64, shuffle=False)
roberta_t_val_loader = DataLoader(roberta_t_val_dataset, batch_size=64, shuffle=False)

# deberta_train_loader = DataLoader(deberta_train_dataset, batch_size=64, shuffle=True)
# deberta_val_loader = DataLoader(deberta_val_dataset, batch_size=64, shuffle=False)

albert_t_train_loader = DataLoader(albert_t_train_dataset, batch_size=64, shuffle=False)
albert_t_val_loader = DataLoader(albert_t_val_dataset, batch_size=64, shuffle=False)

In [None]:
import torch.nn as nn
import torch.optim as optim
from transformers import AutoModelForSequenceClassification, get_scheduler
from sklearn.metrics import accuracy_score

from sklearn.metrics import f1_score

def train_transformer(model_name, train_loader, val_loader, epochs=3, learning_rate=2e-5, use_focal_loss=False, patience=3):
    """Train a transformer model:
       - Gradient Clipping
       - Learning Rate Scheduler
       - Early Stopping
       - Weight Decay (Regularization)
    """
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Load model
    model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
    model.to(device)

    # Loss function
    if use_focal_loss:
        class FocalLoss(nn.Module):
            def __init__(self, gamma=2.0, alpha=0.25):
                super(FocalLoss, self).__init__()
                self.gamma = gamma
                self.alpha = alpha

            def forward(self, inputs, targets):
                ce_loss = nn.CrossEntropyLoss()(inputs, targets)
                pt = torch.exp(-ce_loss)
                focal_loss = (self.alpha * (1 - pt) ** self.gamma * ce_loss).mean()
                return focal_loss

        criterion = FocalLoss()
    else:
        criterion = nn.CrossEntropyLoss()

    # Optimizer with Weight Decay for regularization
    optimizer = optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=1e-5)

    # Learning Rate Scheduler
    lr_scheduler = get_scheduler(
        "linear", optimizer=optimizer, num_warmup_steps=0, num_training_steps=len(train_loader) * epochs
    )

    # Early stopping setup
    best_val_loss = float("inf")
    early_stop_counter = 0

    # Training loop
    for epoch in range(epochs):
        model.train()
        total_loss = 0

        for batch in train_loader:
            input_ids, attention_mask, labels = batch["input_ids"].to(device), batch["attention_mask"].to(device), batch["label"].to(device)

            optimizer.zero_grad()
            outputs = model(input_ids, attention_mask=attention_mask)
            loss = criterion(outputs.logits, labels)
            loss.backward()

            # Apply gradient clipping
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)

            optimizer.step()
            lr_scheduler.step()  # Update learning rate

            total_loss += loss.item()

        avg_train_loss = total_loss / len(train_loader)

        # Validation phase
        model.eval()
        val_loss = 0
        all_preds, all_labels = [], []

        with torch.no_grad():
            for batch in val_loader:
                input_ids, attention_mask, labels = batch["input_ids"].to(device), batch["attention_mask"].to(device), batch["label"].to(device)
                outputs = model(input_ids, attention_mask=attention_mask)
                loss = criterion(outputs.logits, labels)
                val_loss += loss.item()

                preds = torch.argmax(outputs.logits, dim=1).cpu().numpy()
                all_preds.extend(preds)
                all_labels.extend(labels.cpu().numpy())

        avg_val_loss = val_loss / len(val_loader)
        val_acc = accuracy_score(all_labels, all_preds)

        print(f"Epoch {epoch+1} | Train Loss: {avg_train_loss:.4f} | Val Loss: {avg_val_loss:.4f} | Val Acc: {val_acc:.4f}")
        val_f1 = f1_score(all_labels, all_preds, average='weighted')
        print(f"Epoch {epoch+1} | Val Acc: {val_acc:.4f} | Val F1: {val_f1:.4f}")

        # Early stopping check
        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            early_stop_counter = 0
            torch.save(model.state_dict(), f"{model_name}_best.pth")  # Save the best model
        else:
            early_stop_counter += 1
            if early_stop_counter >= patience:
                print(f"Early stopping at epoch {epoch+1}. No improvement in validation loss for {patience} consecutive epochs.")
                break

    return model


In [None]:
#WE RUN THIS CELL ONLY TO TRAIN THE TRANSFORMERS, ONCE WE HAVE TRAINED THEM WE RUN THE NEXT CELL JUST TO LOAD THEM FROM DRIVE

#Fine-tuning the transformers by calling our function and saving the models on the drive
bert_model = train_transformer("bert-base-uncased", bert_t_train_loader, bert_t_val_loader)
torch.save(bert_model.state_dict(), f"{SAVE_PATH}/bert_finetuned.pth")
roberta_model = train_transformer("roberta-base", roberta_t_train_loader, roberta_t_val_loader)
torch.save(roberta_model.state_dict(), f"{SAVE_PATH}/roberta_finetuned.pth")

# deberta_model = train_transformer("microsoft/deberta-v3-base", deberta_train_loader, deberta_val_loader, use_focal_loss=True)
# torch.save(deberta_model.state_dict(), f"{SAVE_PATH}/deberta_finetuned.pth")

albert_model = train_transformer("albert-base-v2", albert_t_train_loader, albert_t_val_loader, use_focal_loss=True)
torch.save(albert_model.state_dict(), f"{SAVE_PATH}/albert_finetuned.pth")

In [None]:
#LOADS THE FINETUNED TRANSFORMERS
import torch
from transformers import AutoModelForSequenceClassification

SAVE_PATH = "/content/drive/MyDrive/models_nli"

def load_finetuned_model(model_name, path):
    """Load a fine-tuned transformer model from saved weights."""
    model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)  # 2-class NLI task
    model.load_state_dict(torch.load(path, map_location=torch.device("cuda" if torch.cuda.is_available() else "cpu")))
    model.eval()
    return model

bert_model = load_finetuned_model("bert-base-uncased", f"{SAVE_PATH}/bert_finetuned.pth")
roberta_model = load_finetuned_model("roberta-base", f"{SAVE_PATH}/roberta_finetuned.pth")
albert_model = load_finetuned_model("albert-base-v2", f"{SAVE_PATH}/albert_finetuned.pth")


#**------------Meta learner------------**

In [None]:
#Definition of the meta model
import torch.nn as nn

class MetaLearner(nn.Module):
    def __init__(self, input_size, num_classes, hidden_size=128, dropout=0.2):
        super(MetaLearner, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_size, hidden_size),  # First layer
            nn.ReLU(),  # Activation function
            nn.Dropout(dropout),  # Regularization
            nn.Linear(hidden_size, hidden_size // 2),  # Second layer
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_size // 2, num_classes)  # Output layer
        )

    def forward(self, x):
        return self.model(x)


In [None]:
#Create datasets for the meta learner
bert_meta_train_dataset = NliDataset(df_meta_train["premise"].tolist(), df_meta_train["hypothesis"].tolist(), df_meta_train["label"].tolist(), bert_tokenizer)
bert_meta_val_dataset = NliDataset(df_meta_val["premise"].tolist(), df_meta_val["hypothesis"].tolist(), df_meta_val["label"].tolist(), bert_tokenizer)

roberta_meta_train_dataset = NliDataset(df_meta_train["premise"].tolist(), df_meta_train["hypothesis"].tolist(), df_meta_train["label"].tolist(), roberta_tokenizer)
roberta_meta_val_dataset = NliDataset(df_meta_val["premise"].tolist(), df_meta_val["hypothesis"].tolist(), df_meta_val["label"].tolist(), roberta_tokenizer)

# deberta_train_dataset = NliDataset(df_train["premise"].tolist(), df_train["hypothesis"].tolist(), df_train["label"].tolist(), deberta_tokenizer)
# deberta_val_dataset = NliDataset(df_val["premise"].tolist(), df_val["hypothesis"].tolist(), df_val["label"].tolist(), deberta_tokenizer)

albert_meta_train_dataset = NliDataset(df_meta_train["premise"].tolist(), df_meta_train["hypothesis"].tolist(), df_meta_train["label"].tolist(), albert_tokenizer)
albert_meta_val_dataset = NliDataset(df_meta_val["premise"].tolist(), df_meta_val["hypothesis"].tolist(), df_meta_val["label"].tolist(), albert_tokenizer)



# Create DataLoaders for the meta learner
bert_meta_train_loader = DataLoader(bert_meta_train_dataset, batch_size=32, shuffle=False)
bert_meta_val_loader = DataLoader(bert_meta_val_dataset, batch_size=32, shuffle=False)

roberta_meta_train_loader = DataLoader(roberta_meta_train_dataset, batch_size=32, shuffle=False)
roberta_meta_val_loader = DataLoader(roberta_meta_val_dataset, batch_size=32, shuffle=False)

# deberta_train_loader = DataLoader(deberta_train_dataset, batch_size=64, shuffle=True)
# deberta_val_loader = DataLoader(deberta_val_dataset, batch_size=64, shuffle=False)

albert_meta_train_loader = DataLoader(albert_meta_train_dataset, batch_size=32, shuffle=False)
albert_meta_val_loader = DataLoader(albert_meta_val_dataset, batch_size=32, shuffle=False)

In [None]:
import torch.nn.functional as F

def get_predictions(model, dataloader):
    """Get softmax probabilities from the trained transformers so we can use them as input for the metalearner"""
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    model.eval()

    all_preds = []
    with torch.no_grad():
        for batch in dataloader:
            input_ids, attention_mask = batch["input_ids"].to(device), batch["attention_mask"].to(device)
            outputs = model(input_ids, attention_mask=attention_mask)
            probs = F.softmax(outputs.logits, dim=1)  # Convert logits to probabilities
            all_preds.append(probs.cpu())

    return torch.cat(all_preds, dim=0)  # Stack all predictions

# Get softmax probabilities for the meta training set from the aready finetuned transformer models
bert_preds = get_predictions(bert_model, bert_meta_train_loader)
roberta_preds = get_predictions(roberta_model, roberta_meta_train_loader)
albert_preds = get_predictions(albert_model, albert_meta_train_loader)

# Combine predictions into a single tensor: shape [num_samples, num_models * num_classes], this is the input for the meta learner
meta_inputs = torch.cat([bert_preds, roberta_preds, albert_preds], dim=1)
meta_labels = torch.tensor(df_meta_train["label"].values)

# Create meta-learning dataset
meta_dataset = torch.utils.data.TensorDataset(meta_inputs, meta_labels)
meta_loader = DataLoader(meta_dataset, batch_size=64, shuffle=True)






In [None]:
# Get softmax probabilities for validation set for the meta learner
bert_val_preds = get_predictions(bert_model, bert_meta_val_loader)
roberta_val_preds = get_predictions(roberta_model, roberta_meta_val_loader)
albert_val_preds = get_predictions(albert_model, albert_meta_val_loader)

# Combine into meta-validation inputs
meta_val_inputs = torch.cat([bert_val_preds, roberta_val_preds, albert_val_preds], dim=1)
meta_val_labels = torch.tensor(df_meta_val["label"].values)

# Create validation meta-learning dataset
meta_val_dataset = torch.utils.data.TensorDataset(meta_val_inputs, meta_val_labels)
meta_val_loader = DataLoader(meta_val_dataset, batch_size=64, shuffle=False)


In [None]:
#Training fo the meta model
meta_model = MetaLearner(input_size=meta_inputs.shape[1], num_classes=len(torch.unique(meta_labels)))
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
meta_model.to(device)
optimizer = torch.optim.Adam(meta_model.parameters(), lr=1e-3)
criterion = nn.CrossEntropyLoss()

for epoch in range(10):  # It only overfits after 10 epochs
    meta_model.train()
    total_loss = 0
    for inputs, labels in meta_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = meta_model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    print(f"Epoch {epoch+1}, Loss: {total_loss/len(meta_loader)}")

#save the model for future use
torch.save(meta_model.state_dict(), f"{SAVE_PATH}/meta_model.pth")

Epoch 1, Loss: 0.39430740948985604
Epoch 2, Loss: 0.3222089970813078
Epoch 3, Loss: 0.32126476992579067
Epoch 4, Loss: 0.32209871946012275
Epoch 5, Loss: 0.3124090107048259
Epoch 6, Loss: 0.31059635281562803
Epoch 7, Loss: 0.3079418997554218
Epoch 8, Loss: 0.3103067455922856
Epoch 9, Loss: 0.30710761003634507
Epoch 10, Loss: 0.30447167543803944


In [None]:
#Evaluation of the meta model
meta_model.eval()
correct = 0
total = 0

with torch.no_grad():
    for inputs, labels in meta_val_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = meta_model(inputs)
        preds = torch.argmax(outputs, dim=1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)

print(f"Meta-learning model accuracy: {correct / total:.4f}")


Meta-learning model accuracy: 0.8976


In [None]:
#Evaluation of the meta model
from sklearn.metrics import f1_score

meta_model.eval()
correct = 0
total = 0
all_preds = []
all_labels = []

with torch.no_grad():
    for inputs, labels in meta_val_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = meta_model(inputs)
        preds = torch.argmax(outputs, dim=1)

        # Store predictions & labels for F1-score
        all_preds.append(preds.cpu())
        all_labels.append(labels.cpu())

        # Calculate accuracy
        correct += (preds == labels).sum().item()
        total += labels.size(0)

# Convert lists to tensors
all_preds = torch.cat(all_preds).numpy()
all_labels = torch.cat(all_labels).numpy()

# Compute F1-score
f1 = f1_score(all_labels, all_preds, average="weighted")

print(f"Meta-learning model accuracy: {correct / total:.4f}")
print(f"Meta-learning model F1-score: {f1:.4f}")


# df_results = pd.DataFrame({"prediction": all_preds})
# df_results.to_csv("meta_model_predictions.csv", index=False)

# print("Predictions saved to meta_model_predictions.csv")

Meta-learning model accuracy: 0.8976
Meta-learning model F1-score: 0.8975
Predictions saved to meta_model_predictions.csv


#**Getting results for the test dataset with no 'labels' column**

In [None]:
# Load test dataset
df_test = pd.read_csv("test.csv", quotechar='"', delimiter=",", encoding="utf-8")
df_test.columns = ["premise", "hypothesis"]  # Ensure correct column names


# Apply text cleaning
df_test["premise"] = df_test["premise"].apply(clean_text)
df_test["hypothesis"] = df_test["hypothesis"].apply(clean_text)



# Print first few samples
print("Test Dataset Sample:")
print(df_test.head())
print(f"Test Dataset Shape: {df_test.shape}")


In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
from torch.utils.data import Dataset, DataLoader

#Dataset for the test set with no 'labels' column
class NliTestDataset(Dataset):
  """Dataset for the NLI task"""
  def __init__(self, premises, hypotheses, tokenizer, max_lenth = 124):
    self.premises = premises
    self.hypotheses = hypotheses
    self.tokenizer = tokenizer
    self.max_length = max_lenth

  def __len__(self):
    return len(self.premises)

  def __getitem__(self, idx):
      encoding = self.tokenizer(self.premises[idx], self.hypotheses[idx], padding='max_length',truncation=True, max_length=self.max_length, return_tensors='pt')
      return {
          'input_ids': encoding['input_ids'].squeeze(0),
          'attention_mask': encoding['attention_mask'].squeeze(0)
      }

# Initialize tokenizers for different models
bert_tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
roberta_tokenizer = AutoTokenizer.from_pretrained("roberta-base")
# deberta_tokenizer = AutoTokenizer.from_pretrained("microsoft/deberta-v3-base")
albert_tokenizer = AutoTokenizer.from_pretrained("albert-base-v2")

#Create test datasets for each model
bert_test_dataset = NliTestDataset(df_test["premise"].tolist(), df_test["hypothesis"].tolist(), bert_tokenizer)
roberta_test_dataset = NliTestDataset(df_test["premise"].tolist(), df_test["hypothesis"].tolist(), roberta_tokenizer)
albert_test_dataset = NliTestDataset(df_test["premise"].tolist(), df_test["hypothesis"].tolist(), albert_tokenizer)

# Create Test DataLoaders
bert_test_loader = DataLoader(bert_test_dataset, batch_size=32, shuffle=False)
roberta_test_loader = DataLoader(roberta_test_dataset, batch_size=32, shuffle=False)
albert_test_loader = DataLoader(albert_test_dataset, batch_size=32, shuffle=False)

In [None]:
#Get predictions for the test data
bert_test_preds = get_predictions(bert_model, bert_test_loader)
roberta_test_preds = get_predictions(roberta_model, roberta_test_loader)
albert_test_preds = get_predictions(albert_model, albert_test_loader)

# Combine predictions into a single tensor: shape [num_samples, num_models * num_classes]
meta_inputs = torch.cat([bert_test_preds, roberta_test_preds, albert_test_preds], dim=1)

meta_test_dataset = torch.utils.data.TensorDataset(meta_inputs)
meta_test_loader = DataLoader(meta_test_dataset, batch_size=64, shuffle=False)


In [None]:
import torch
import pandas as pd

def get_meta_predictions(meta_model, test_loader, save_csv=False, csv_filename="meta_model_test_predictions.csv"):
    """
    Generates predictions for a given test dataset using the trained meta_model.

    Args:
    - meta_model: Trained PyTorch model
    - test_loader: DataLoader for test dataset
    - save_csv: Whether to save predictions as CSV (default: False)
    - csv_filename: Name of the CSV file if saving (default: "meta_model_test_predictions.csv")

    Returns:
    - all_preds: Numpy array containing the predictions
    """
    meta_model.eval()  # Set to evaluation mode
    all_preds = []

    with torch.no_grad():
        for inputs in test_loader:  # No labels needed for test data
            inputs = inputs[0].to(device)  # Extract inputs from DataLoader
            outputs = meta_model(inputs)
            preds = torch.argmax(outputs, dim=1)  # Get class predictions
            all_preds.append(preds.cpu())

    # Convert list of tensors to a single numpy array
    all_preds = torch.cat(all_preds).numpy()

    # Save predictions to CSV if required
    if save_csv:
        df_results = pd.DataFrame({"prediction": all_preds})
        df_results.to_csv(csv_filename, index=False)
        print(f"Predictions saved to {csv_filename}")

    return all_preds

all_preds=[]
all_preds = get_meta_predictions(meta_model, meta_test_loader, save_csv=True)


In [1]:
import torch
import transformers

print("Torch version:", torch.__version__)
print("Transformers version:", transformers.__version__)


Torch version: 2.6.0+cu124
Transformers version: 4.50.3


In [None]:
!pip install transformers==4.37.2 --force-reinstall

