#**Demo code for loading the saved tranformer models and the meta learner on top of them**

In the training notebook, at the end we already ahve code for getting results for a given file, but here we do it more clrealy without everything else for the training.
We assume the file we want to get the predictions on does not have 'labels' column.

In [1]:
#Necessary imports

import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import re
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from torch.utils.data import DataLoader, Dataset
from sklearn.metrics import accuracy_score
import torch

In [2]:
def clean_text(text):
    """Clean text: normalize spaces, remove misplaced punctuation, fix contractions."""
    text = str(text).strip().lower()

    # Fix spaces around punctuation (keep punctuation but standardize spacing)
    text = re.sub(r'\s+([?.!,"])', r'\1', text)  # Removes spaces before punctuation
    text = re.sub(r'([?.!,"])', r'\1 ', text)  # Ensures one space after punctuation

    # Normalize quotes (remove extra surrounding quotes)
    text = re.sub(r'^"|"$', '', text)

    # Handle common contractions
    text = re.sub(r"\bd'you\b", "do you", text)
    text = re.sub(r"\b'cause\b", "because", text)
    text = re.sub(r"\bi'm\b", "i am", text)
    text = re.sub(r"\bain't\b", "is not", text)

    return text



In [3]:
file_path = "test.csv" #change it to the file path of the file you want to get predictions for

# Load dataset
df = pd.read_csv(file_path, quotechar='"', delimiter=",", encoding="utf-8")
df.columns = ["premise", "hypothesis"]  # Ensure correct column names
#df.dropna(inplace=True)  # Remove missing values
print(df.head())
# Apply text cleaning
df["premise"] = df["premise"].apply(clean_text)
df["hypothesis"] = df["hypothesis"].apply(clean_text)



# Print first few samples
print("Training Dataset Sample:")
print(df.head())

                                             premise  \
0  Boy wearing red hat, blue jacket pushing plow ...   
1  A blond woman in a black shirt is standing beh...   
2  Three people in uniform are outdoors and are o...   
3  A person, in a striped blue shirt and pants, i...   
4  A man, woman, and child get their picture take...   

                        hypothesis  
0    The boy is surrounded by snow  
1           The woman is standing.  
2     Uniformed people are outside  
3            The person is running  
4  A family on vacation is posing.  
Training Dataset Sample:
                                             premise  \
0  boy wearing red hat,  blue jacket pushing plow...   
1  a blond woman in a black shirt is standing beh...   
2  three people in uniform are outdoors and are o...   
3  a person,  in a striped blue shirt and pants, ...   
4  a man,  woman,  and child get their picture ta...   

                         hypothesis  
0     the boy is surrounded by snow  
1  

In [4]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
from torch.utils.data import Dataset, DataLoader

#Dataset for the test set with no 'labels' column
class NliTestDataset(Dataset):
  """Dataset for the NLI task"""
  def __init__(self, premises, hypotheses, tokenizer, max_lenth = 124):
    self.premises = premises
    self.hypotheses = hypotheses
    self.tokenizer = tokenizer
    self.max_length = max_lenth

  def __len__(self):
    return len(self.premises)

  def __getitem__(self, idx):
      encoding = self.tokenizer(self.premises[idx], self.hypotheses[idx], padding='max_length',truncation=True, max_length=self.max_length, return_tensors='pt')
      return {
          'input_ids': encoding['input_ids'].squeeze(0),
          'attention_mask': encoding['attention_mask'].squeeze(0)
      }

# Initialize tokenizers for different models
bert_tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
roberta_tokenizer = AutoTokenizer.from_pretrained("roberta-base")
# deberta_tokenizer = AutoTokenizer.from_pretrained("microsoft/deberta-v3-base")
albert_tokenizer = AutoTokenizer.from_pretrained("albert-base-v2")

#Create test datasets for each model
bert_test_dataset = NliTestDataset(df["premise"].tolist(), df["hypothesis"].tolist(), bert_tokenizer)
roberta_test_dataset = NliTestDataset(df["premise"].tolist(), df["hypothesis"].tolist(), roberta_tokenizer)
albert_test_dataset = NliTestDataset(df["premise"].tolist(), df["hypothesis"].tolist(), albert_tokenizer)

# Create Test DataLoaders
bert_test_loader = DataLoader(bert_test_dataset, batch_size=32, shuffle=False)
roberta_test_loader = DataLoader(roberta_test_dataset, batch_size=32, shuffle=False)
albert_test_loader = DataLoader(albert_test_dataset, batch_size=32, shuffle=False)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/481 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/684 [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/760k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.31M [00:00<?, ?B/s]

In [5]:
import torch
from transformers import AutoModelForSequenceClassification

SAVE_PATH = "/content/drive/MyDrive/models_nli"

def load_finetuned_model(model_name, path):
    """Load a fine-tuned transformer model from saved weights."""
    model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)  # 3-class NLI task
    model.load_state_dict(torch.load(path, map_location=torch.device("cuda" if torch.cuda.is_available() else "cpu")))
    model.eval()
    return model

# bert_model = load_finetuned_model("bert-base-uncased", f"{SAVE_PATH}/bert_finetuned.pth")
# roberta_model = load_finetuned_model("roberta-base", f"{SAVE_PATH}/roberta_finetuned.pth")
# albert_model = load_finetuned_model("albert-base-v2", f"{SAVE_PATH}/albert_finetuned.pth")

!pip install gdown

# https://drive.google.com/file/d/1-9vsFS7GrXtF-3aKZ3D3fnSOXUUy8LGS/view?usp=drive_link       - the link for the finetuned roberta model
!gdown --id 1-9vsFS7GrXtF-3aKZ3D3fnSOXUUy8LGS -O roberta_finetuned.pth
roberta_model = load_finetuned_model("roberta-base", "roberta_finetuned.pth")

#https://drive.google.com/file/d/1--g4wl1gyOWCEHjfhzk1dIHVwF40YI4A/view?usp=drive_link       - the link for the finetuned Bert model
!gdown --id 1--g4wl1gyOWCEHjfhzk1dIHVwF40YI4A -O bert_finetuned.pth
bert_model = load_finetuned_model("bert-base-uncased", "bert_finetuned.pth")


#https://drive.google.com/file/d/1-WoL-zxDotkggGn3kwsJcSWrWV3s1YWh/view?usp=sharing      - the link for the finetuned Albert model
!gdown --id 1-WoL-zxDotkggGn3kwsJcSWrWV3s1YWh -O albert_finetuned.pth
albert_model = load_finetuned_model("albert-base-v2", "albert_finetuned.pth")








Downloading...
From (original): https://drive.google.com/uc?id=1-9vsFS7GrXtF-3aKZ3D3fnSOXUUy8LGS
From (redirected): https://drive.google.com/uc?id=1-9vsFS7GrXtF-3aKZ3D3fnSOXUUy8LGS&confirm=t&uuid=5a048aa6-6aee-4c16-be17-87235118a2fe
To: /content/roberta_finetuned.pth
100% 499M/499M [00:08<00:00, 61.2MB/s]


Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Downloading...
From (original): https://drive.google.com/uc?id=1--g4wl1gyOWCEHjfhzk1dIHVwF40YI4A
From (redirected): https://drive.google.com/uc?id=1--g4wl1gyOWCEHjfhzk1dIHVwF40YI4A&confirm=t&uuid=e58f081f-38e4-4ef2-a464-d8e3b1d1c04c
To: /content/bert_finetuned.pth
100% 438M/438M [00:12<00:00, 34.6MB/s]


Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Downloading...
From (original): https://drive.google.com/uc?id=1-WoL-zxDotkggGn3kwsJcSWrWV3s1YWh
From (redirected): https://drive.google.com/uc?id=1-WoL-zxDotkggGn3kwsJcSWrWV3s1YWh&confirm=t&uuid=d0d50260-d5f4-4533-80ad-af91401bcf9a
To: /content/albert_finetuned.pth
100% 46.8M/46.8M [00:04<00:00, 11.3MB/s]


Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/47.4M [00:00<?, ?B/s]

Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert-base-v2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [6]:
import torch.nn.functional as F

def get_predictions(model, dataloader):
    """Get softmax probabilities from the trained transformers so we can use them as input for the metalearner"""
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    model.eval()

    all_preds = []
    with torch.no_grad():
        for batch in dataloader:
            input_ids, attention_mask = batch["input_ids"].to(device), batch["attention_mask"].to(device)
            outputs = model(input_ids, attention_mask=attention_mask)
            probs = F.softmax(outputs.logits, dim=1)  # Convert logits to probabilities
            all_preds.append(probs.cpu())

    return torch.cat(all_preds, dim=0)  # Stack all predictions

#Get predictions for the test data
bert_test_preds = get_predictions(bert_model, bert_test_loader)
roberta_test_preds = get_predictions(roberta_model, roberta_test_loader)
albert_test_preds = get_predictions(albert_model, albert_test_loader)

# Combine predictions into a single tensor: shape [num_samples, num_models * num_classes]
meta_inputs = torch.cat([bert_test_preds, roberta_test_preds, albert_test_preds], dim=1)

meta_test_dataset = torch.utils.data.TensorDataset(meta_inputs)
meta_test_loader = DataLoader(meta_test_dataset, batch_size=64, shuffle=False)

In [7]:
#Definition of the meta model
import torch.nn as nn

class MetaLearner(nn.Module):
    def __init__(self, input_size, num_classes, hidden_size=128, dropout=0.2):
        super(MetaLearner, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_size, hidden_size),  # First layer
            nn.ReLU(),  # Activation function
            nn.Dropout(dropout),  # Regularization
            nn.Linear(hidden_size, hidden_size // 2),  # Second layer
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_size // 2, num_classes)  # Output layer
        )

    def forward(self, x):
        return self.model(x)

In [8]:
meta_model = MetaLearner(input_size=meta_inputs.shape[1], num_classes=2)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
meta_model.to(device)

#https://drive.google.com/file/d/1Xw1U7s7LB0fgGZy8l6_JZvcdXp2o7mx8/view?usp=sharing    - link for the meta learner


!gdown --id 1Xw1U7s7LB0fgGZy8l6_JZvcdXp2o7mx8 -O meta_model.pth

meta_model.load_state_dict(torch.load("meta_model.pth", map_location=device))

# Set to evaluation mode
meta_model.eval()


Downloading...
From: https://drive.google.com/uc?id=1Xw1U7s7LB0fgGZy8l6_JZvcdXp2o7mx8
To: /content/meta_model.pth
100% 39.9k/39.9k [00:00<00:00, 59.7MB/s]


MetaLearner(
  (model): Sequential(
    (0): Linear(in_features=6, out_features=128, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.2, inplace=False)
    (3): Linear(in_features=128, out_features=64, bias=True)
    (4): ReLU()
    (5): Dropout(p=0.2, inplace=False)
    (6): Linear(in_features=64, out_features=2, bias=True)
  )
)

In [9]:
import torch
import pandas as pd

def get_meta_predictions(meta_model, test_loader, save_csv=False, csv_filename="meta_model_test_predictions.csv"):
    """
    Generates predictions for a given test dataset using the trained meta_model.

    Args:
    - meta_model: Trained PyTorch model
    - test_loader: DataLoader for test dataset
    - save_csv: Whether to save predictions as CSV (default: False)
    - csv_filename: Name of the CSV file if saving (default: "meta_model_test_predictions.csv")

    Returns:
    - all_preds: Numpy array containing the predictions
    """
    meta_model.eval()  # Set to evaluation mode
    all_preds = []

    with torch.no_grad():
        for inputs in test_loader:  # No labels needed for test data
            inputs = inputs[0].to(device)  # Extract inputs from DataLoader
            outputs = meta_model(inputs)
            preds = torch.argmax(outputs, dim=1)  # Get class predictions
            all_preds.append(preds.cpu())

    # Convert list of tensors to a single numpy array
    all_preds = torch.cat(all_preds).numpy()

    # Save predictions to CSV if required
    if save_csv:
        df_results = pd.DataFrame({"prediction": all_preds})
        df_results.to_csv(csv_filename, index=False)
        print(f"Predictions saved to {csv_filename}")

    return all_preds

all_preds=[]
all_preds = get_meta_predictions(meta_model, meta_test_loader, save_csv=True)


Predictions saved to meta_model_test_predictions.csv
