In [None]:
import pandas as pd
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer

In [None]:
model_name = "google/flan-t5-large"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

In [None]:
def prompt_v1(review):
    # v1：simple prompt
    return f"Classify this review into one of these categories: Complaint, Praise, Suggestion, Query.\nReview: {review}"

# v2: role prompting + guidelines
def prompt_v2(review):
    return f"""You are an assistant that classifies customer reviews.
    Your task is to classify the review into exactly one of the following categories:
    - Complaint
    - Praise
    - Suggestion
    - Query
    
    Guidelines:
    - Complaint: expresses dissatisfaction or a problem
    - Praise: expresses satisfaction or positive feedback
    - Suggestion: gives advice or improvement ideas
    - Query: asks a question or requests information
    
    Review: "{review}"
    Output only one category name from the list above. Do not add any explanation."""

In [None]:
def get_completion(prompt):
    inputs = tokenizer(prompt, return_tensors="pt")
    outputs = model.generate(**inputs, max_length=50)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

In [None]:
df = pd.read_csv("../data/sample_reviews.csv")

results = []
for review in df['review']:
    # v1：分类
    output_v1 = get_completion(prompt_v1(review))
    
    output_v2 = get_completion(prompt_v2(review))
    
    results.append({
        "review": review,
        "output_v1": output_v1,
        "output_v2": output_v2
    })


In [None]:
results_df = pd.DataFrame(results)
results_df.to_csv("../results/output.csv", index=False)
print("Saved results to ../results/output.csv")