# Model Inference
This notebook loads the augmented questions and runs inference using over 20 HuggingFace language models.
All model outputs (with metadata) are saved in `data/model_outputs.csv` for later analysis.


In [3]:
import pandas as pd
from tqdm import tqdm
from src.hf_model_inference import load_model_pipeline, query_model

# Load the augmented questions
augmented_df = pd.read_csv('data/augmented_questions.csv')
print("Loaded augmented dataset:", augmented_df.shape)

# Define a list of 20+ model names (adjust or extend as desired)
model_names = [
    "gpt2",
    "EleutherAI/gpt-neo-125M",
    "EleutherAI/gpt-neo-1.3B",
    "EleutherAI/gpt-neo-2.7B",
    "microsoft/DialoGPT-small",
    "microsoft/DialoGPT-medium",
    "microsoft/DialoGPT-large",
    "facebook/blenderbot-400M-distill",
    "facebook/blenderbot-90M",
    "gpt2-medium",
    "gpt2-large",
    "distilgpt2",
    "t5-small",
    "t5-base",
    "t5-large",
    "bigscience/bloom-560m",
    "bigscience/bloom-1b1",
    "bigscience/bloom-3b",
    "EleutherAI/gpt-j-6B",
    "ctrl"
]

# Load the pipelines for each model
model_pipelines = {}
for m in model_names:
    try:
        print(f"Loading model: {m}")
        model_pipelines[m] = load_model_pipeline(m)
    except Exception as e:
        print(f"Failed to load model {m}: {e}")

# Run inference: for every question variant and model, query the model and record its output
outputs = []
for idx, row in tqdm(augmented_df.iterrows(), total=len(augmented_df)):
    question_variant = row['variant_question']
    original_question = row['original_question']
    error_count = row['error_count']
    for model_name, pipe in model_pipelines.items():
        try:
            output_text = query_model(pipe, question_variant)
        except Exception as e:
            output_text = f"Error: {e}"
        outputs.append({
            'original_question': original_question,
            'variant_question': question_variant,
            'error_count': error_count,
            'model_name': model_name,
            'model_output': output_text
        })

outputs_df = pd.DataFrame(outputs)
outputs_df.to_csv('data/model_outputs.csv', index=False)
print("Model outputs saved at data/model_outputs.csv")

AttributeError: partially initialized module 'pandas' has no attribute '_pandas_parser_CAPI' (most likely due to a circular import)