In [None]:
import torch
import pandas as pd
from datasets import Dataset
from transformers import AutoModelForCausalLM, AutoTokenizer

In [None]:
trained_model = "./llama-student-phase1"
tokenizer = AutoTokenizer.from_pretrained(trained_model)
model = AutoModelForCausalLM.from_pretrained(trained_model)
model.eval()

In [None]:
# ====== Load dataset ======
def load_partition(path: str) -> Dataset:
    df = pd.read_csv(path)
    return Dataset.from_pandas(df)

dataset = load_partition("./merged_dataset.csv")

In [None]:
# Perform inference step here
def generate_output(text):
    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
    with torch.no_grad():
        output = model.generate(**inputs, max_length=200)
    return tokenizer.decode(output[0], skip_special_tokens=True)

predictions = []

for row in dataset:
    text_input = row["string"]
    
    # Generate reasoning and classification
    generated_text = generate_output(text_input)
    
    predictions.append(generated_text)

# Save results
dataset = dataset.add_column("generated_reasoning", predictions)
output_csv_path = "./llama-student-phase2.csv"
dataset.to_pandas().to_csv(output_csv_path, index=False)

print(f"Inference completed. Results saved to {output_csv_path}")