In [3]:
!pip install evaluate
import pandas as pd
from datasets import Dataset, load_dataset
from transformers import AutoTokenizer

# --- [A] Load the full dataset (REPLACEMENT FOR YOUR OLD LOAD BLOCK] ---

# Load the Amazon Fine Food Reviews dataset. This dataset also contains a 'Score' column.
print("Downloading Amazon Fine Food Reviews dataset...")
raw_dataset = load_dataset("jhan21/amazon-food-reviews-dataset")
df = raw_dataset["train"].to_pandas()

# Filter and rename the columns of interest: Text and Score
df = df[['Text', 'Score']].dropna().rename(columns={'Text': 'text', 'Score': 'score'})


# --- [B] Map Scores to Sentiment Labels ---

# Define a function to map 5-star scores to 3 sentiment labels:
# Score 1 or 2 -> Negative
# Score 3 -> Neutral
# Score 4 or 5 -> Positive
def map_score_to_sentiment(score):
    if score in [1, 2]:
        return "Negative"
    elif score == 3:
        return "Neutral"
    else: # score 4 or 5
        return "Positive"

# Apply the mapping and rename the column to 'sentiment' (our new label)
df['sentiment'] = df['score'].apply(map_score_to_sentiment)
df = df.drop(columns=['score']) # Drop the numerical score column

# Convert back to Hugging Face Dataset and split
dataset = Dataset.from_pandas(df).train_test_split(test_size=0.1)

print(f"Dataset prepared with {len(df)} total examples.")
print("Sample prepared data:")
print(dataset['train'][0])

Collecting evaluate
  Downloading evaluate-0.4.6-py3-none-any.whl.metadata (9.5 kB)
Downloading evaluate-0.4.6-py3-none-any.whl (84 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: evaluate
Successfully installed evaluate-0.4.6
Downloading Amazon Fine Food Reviews dataset...
Dataset prepared with 568454 total examples.
Sample prepared data:
{'text': 'This was a big shocker.  I got one a piece for my two dogs.  They went absolutely nuts over these things, as if they were getting a hot dog.  I used to buy them all the time until we left the toys behind on a vacation.  The dogs loved the jerky rings but not so much the cornstarch rings.  I would put the cornstarch rings on the outside and the jerky rings on the inside.  Somehow they would eat the jerky completely without touching the cornstarch.<br /><br />Its a good toy and treat to keep them busy and out of trouble.  The treats 

In [4]:
model_name = "google/flan-t5-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)

def preprocess_function_sentiment(examples):
    # Prompt the model to classify the sentiment of the text
    inputs = [f"classify sentiment: {doc}" for doc in examples["text"]]
    model_inputs = tokenizer(inputs, max_length=512, truncation=True)

    # The label is the actual sentiment string (e.g., "Positive")
    with tokenizer.as_target_tokenizer():
        labels = tokenizer(examples["sentiment"], max_length=10, truncation=True) # Max length is small for sentiment words

    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

# Apply the new preprocessing function
tokenized_datasets = dataset.map(preprocess_function_sentiment, batched=True)

Map:   0%|          | 0/511608 [00:00<?, ? examples/s]



Map:   0%|          | 0/56846 [00:00<?, ? examples/s]

In [5]:

from transformers import AutoModelForSeq2SeqLM, TrainingArguments, Trainer
import evaluate
import numpy as np

# Load the model and a simpler metric for classification
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
metric = evaluate.load("accuracy")

def compute_metrics_sentiment(eval_pred):
    predictions, labels = eval_pred
    # T5 outputs token IDs, so we generate the text prediction first
    decoded_preds = tokenizer.batch_decode(predictions, skip_special_tokens=True)

    # We also need to decode the reference labels
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)

    # Compute accuracy on the predicted sentiment word vs. the true sentiment word
    result = metric.compute(predictions=decoded_preds, references=decoded_labels)
    return {k: round(v * 100, 4) for k, v in result.items()}

# Training arguments (can be kept from your summarization task)
training_args = TrainingArguments(
    output_dir="./sentiment_results", # Change output directory
    num_train_epochs=6,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    learning_rate=2e-5,
    weight_decay=0.01,
    warmup_steps=500,
    save_total_limit=1,
    logging_steps=100,
    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True, # Recommended for classification
)

# Initialize the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["test"],
    tokenizer=tokenizer,
    compute_metrics=compute_metrics_sentiment
)

# Start Fine-Tuning (uncomment to run training)
# trainer.train()

config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/990M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

Downloading builder script: 0.00B [00:00, ?B/s]

  trainer = Trainer(


In [6]:
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

test_review = """
This coffee is absolutely fantastic! Rich aroma, smooth taste, and it woke me up perfectly.
Definitely a five-star product that I will be ordering again very soon.
"""

inputs = tokenizer(f"classify sentiment: {test_review}", return_tensors="pt", max_length=512, truncation=True)
inputs = {k: v.to(device) for k, v in inputs.items()}

# Generate the sentiment prediction
with torch.no_grad():
   prediction_ids = model.generate(
    **inputs,
    max_length=10, # only need a short word like "Positive"
    num_beams=1, # faster inference for classification
    do_sample=False, # choose the most likely word
    early_stopping=True
)

predicted_sentiment = tokenizer.decode(prediction_ids[0], skip_special_tokens=True)

print(f"Review: {test_review[:50]}...")
print("Predicted Sentiment:", predicted_sentiment)

The following generation flags are not valid and may be ignored: ['early_stopping']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


Review: 
This coffee is absolutely fantastic! Rich aroma, ...
Predicted Sentiment: positive


In [7]:
import os
import torch
from transformers import AutoTokenizer

# --- Assuming 'trainer' and 'tokenizer' variables from previous steps are available ---

# 1. Define the final save location
final_save_dir = "./final_sentiment_model"
print(f"Saving final model to: {final_save_dir}")

# 2. Save the model weights (best checkpoint loaded due to load_best_model_at_end=True)
# This uses the H.F. Trainer object to save the model, config, and training arguments.
trainer.save_model(final_save_dir)

# 3. Save the tokenizer (essential for loading and inference)
tokenizer.save_pretrained(final_save_dir)

print("Model and tokenizer successfully saved.")

# 4. List the contents to confirm the files are present
print("\nDirectory contents:")
!ls -l {final_save_dir}

Saving final model to: ./final_sentiment_model
Model and tokenizer successfully saved.

Directory contents:
total 970328
-rw-r--r-- 1 root root      1509 Nov 12 15:02 config.json
-rw-r--r-- 1 root root       142 Nov 12 15:02 generation_config.json
-rw-r--r-- 1 root root 990345064 Nov 12 15:02 model.safetensors
-rw-r--r-- 1 root root      2543 Nov 12 15:02 special_tokens_map.json
-rw-r--r-- 1 root root    791656 Nov 12 15:02 spiece.model
-rw-r--r-- 1 root root     20830 Nov 12 15:02 tokenizer_config.json
-rw-r--r-- 1 root root   2422332 Nov 12 15:02 tokenizer.json
-rw-r--r-- 1 root root      5841 Nov 12 15:02 training_args.bin


In [8]:
import torch
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
import time

# --- 1. Configuration ---

# Define the path where your specialized Sentiment Model was saved
SENTIMENT_MODEL_PATH = "./final_sentiment_model"
SENTIMENT_PROMPT = "classify sentiment: "

# Define the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# --- 2. Load Model and Tokenizer ---

try:
    # Load the tokenizer from the sentiment model's path
    tokenizer = AutoTokenizer.from_pretrained(SENTIMENT_MODEL_PATH)

    # Load the Sentiment Model
    sent_model = AutoModelForSeq2SeqLM.from_pretrained(SENTIMENT_MODEL_PATH).to(device)

    print("\n✅ Specialized Sentiment Model loaded successfully.")

except Exception as e:
    print(f"\n❌ Error loading the Sentiment Model. Ensure the model was saved to: {SENTIMENT_MODEL_PATH}")
    print(f"Details: {e}")
    # Exit if model fails to load
    exit()

# --- 3. Define Test Input ---

test_review = """
The packaging was excellent and the delivery was fast, which was a nice surprise!
However, the coffee beans tasted stale and bitter, which totally ruined my morning.
I am highly disappointed with the core product quality, despite the good service.
"""

# --- 4. Run Inference ---

def run_sentiment_inference(model, prompt_text, review_text):
    """Generates the sentiment classification label."""

    # Apply the specific classification prompt/prefix
    full_input = prompt_text + review_text

    inputs = tokenizer(full_input, return_tensors="pt", max_length=512, truncation=True)
    inputs = {k: v.to(device) for k, v in inputs.items()}

    start_time = time.time()

    with torch.no_grad():
        output_ids = model.generate(
            **inputs,
            max_length=10,    # Max length is small for a classification label
            min_length=1,
            num_beams=1,      # Use 1 beam for quick, deterministic classification
            do_sample=False,  # Choose the most likely word (Positive, Negative, etc.)
            early_stopping=True
        )

    inference_time = time.time() - start_time

    sentiment_label = tokenizer.decode(output_ids[0], skip_special_tokens=True)
    return sentiment_label, inference_time

# --- 5. Output Results ---

sentiment_output, duration = run_sentiment_inference(
    model=sent_model,
    prompt_text=SENTIMENT_PROMPT,
    review_text=test_review
)

print("\n" + "="*50)
print("SENTIMENT ANALYSIS TEST RESULTS")
print("="*50)
print("ORIGINAL REVIEW:")
print(test_review.strip())
print("\n[CLASSIFICATION RESULT]")
print(f"Input Prompt: '{SENTIMENT_PROMPT}...'")
print(f"Predicted Sentiment: {sentiment_output}")
print(f"Inference Time: {duration:.3f} seconds")
print("="*50)

Using device: cuda

✅ Specialized Sentiment Model loaded successfully.

SENTIMENT ANALYSIS TEST RESULTS
ORIGINAL REVIEW:
The packaging was excellent and the delivery was fast, which was a nice surprise! 
However, the coffee beans tasted stale and bitter, which totally ruined my morning. 
I am highly disappointed with the core product quality, despite the good service.

[CLASSIFICATION RESULT]
Input Prompt: 'classify sentiment: ...'
Predicted Sentiment: negative
Inference Time: 0.081 seconds
