In [None]:
import transformers
import torch
import pandas as pd
import json

# ------------------------
# Load dataset
# ------------------------
df = pd.read_csv("your_dataset.csv")  # Assumes there is a column named "text"

# ------------------------
# Load model once
# ------------------------
model_path = "/gpfs1/llm/llama-3.2-hf/Meta-Llama-3.2-3B-Instruct"
classifier = transformers.pipeline(
    "text-generation",
    model=model_path,
    model_kwargs={"torch_dtype": torch.bfloat16},
    device_map="auto"
)

# ------------------------
# Classification function
# ------------------------
def classify_row(text):
    prompt = (
        "You are an assistant trained to identify stances about infant feeding methods in a given text.\n\n"
        "Definitions:\n"
        "- \"Stance\": A belief or opinion (e.g., \"breastfeeding is best\").\n"
        "- \"Behavior\": An action (e.g., \"I breastfed for a year\") — not a stance by itself.\n"
        "- \"Emotion\": A feeling (e.g., \"I felt overwhelmed\") — not a stance, but may influence one.\n\n"
        "Known infant feeding targets:\n"
        "[\"breastfeeding\", \"formula feeding\", \"combo feeding\", \"bottle feeding\", "
        "\"nursing\", \"pumping\", \"latching\", \"exclusive breastfeeding\", \"exclusive formula feeding\"]\n\n"
        f"Text:\n\"{text}\"\n\n"
        "Instructions:\n"
        "1. Determine whether the text expresses a stance about an infant feeding method.\n"
        "2. If yes, return the matching target(s) from the list above.\n"
        "3. If no stance is present, indicate whether the text reflects behavior, emotion, or something else.\n\n"
        "Return a JSON object in this format:\n"
        "{\n"
        "  \"Infant_Feeding_Targets\": [\"<target(s)>\"],\n"
        "  \"Stance\": \"<In Favor / Against / Neutral / Unclear>\",\n"
        "  \"Confidence\": <0–100>,\n"
        "  \"Reasoning\": \"<brief explanation>\",\n"
        "  \"Notes\": \"<e.g., behavior or emotion or something else>\"\n"
        "}"
    )
    
    result = classifier(prompt, max_new_tokens=400, temperature=0.0, do_sample=False)
    response = result[0]["generated_text"].strip()  # no need for len(prompt) slice
    
    try:
        parsed = json.loads(response)
    except json.JSONDecodeError:
        parsed = {"error": "Invalid JSON", "raw_response": response}
    return parsed

# ------------------------
# Apply to dataset
# ------------------------
df["stance_result"] = df["text"].apply(classify_row)

# Expand JSON dicts into separate columns
df_expanded = pd.concat([df.drop(columns=["stance_result"]), df["stance_result"].apply(pd.Series)], axis=1)

# ------------------------
# Save outputs
# ------------------------
df_expanded.to_csv("classified_dataset.csv", index=False)
print("Done! Results saved to classified_dataset.csv")