## 1. Setup & Imports


In [None]:
import os
from pathlib import Path
import warnings

import rootutils

rootutils.setup_root(Path.cwd(), indicator=".project-root", pythonpath=True)

ROOT_DIR = Path(os.environ.get("PROJECT_ROOT", Path.cwd()))
print(f"Project root: {ROOT_DIR}")

warnings.filterwarnings("ignore")

## 2. Initialize Spark


In [None]:
from src.amazon_reviews_analysis.utils import build_spark

spark = build_spark()

print("✓ Spark Session created successfully!")
print(f"Spark Version: {spark.version}")

## 3. Load Model


In [None]:
from pyspark.ml import PipelineModel

MODEL_DIR = ROOT_DIR / "models" / "spark_lr_classifier"
TEXT_COL = "text"

model = PipelineModel.load(str(MODEL_DIR))

print(f"✓ Model loaded from {MODEL_DIR}")
print(f"Pipeline stages: {[stage.__class__.__name__ for stage in model.stages]}")

## 4. Define Label Mapping


In [None]:
from pyspark.sql.functions import udf, col
from pyspark.sql.types import StringType

LABEL_MAP = {0.0: "negative", 1.0: "positive", 2.0: "neutral"}


@udf(StringType())
def label_to_sentiment(prediction):
    return LABEL_MAP.get(prediction, "unknown")


print("Label mapping:")
for k, v in LABEL_MAP.items():
    print(f"  {int(k)} -> {v}")

## 5. Inference on Custom Reviews


In [None]:
custom_reviews = [
    "This product is absolutely amazing! Best purchase I've ever made. Highly recommend!",
    "Terrible quality. Broke after just one day of use. Complete waste of money.",
    "It's okay, nothing special. Does the job but nothing more.",
    "I love this! Works perfectly and arrived quickly. Five stars!",
    "Not worth the price. Very disappointed with the quality.",
    "Average product. Some good features, some bad. Neutral overall.",
    "Exceeded my expectations! Will definitely buy again.",
    "Awful experience. Product was damaged and customer service was unhelpful.",
    "Decent for the price. Gets the job done.",
    "Perfect gift for my friend. She absolutely loved it!",
]

input_df = spark.createDataFrame([(review,) for review in custom_reviews], [TEXT_COL])

print(f"Loaded {input_df.count()} custom reviews for inference")

In [None]:
predictions = model.transform(input_df)

results = predictions.withColumn("sentiment", label_to_sentiment(col("prediction")))

print("Inference Results:")
print("=" * 80)
results.select(TEXT_COL, "prediction", "sentiment").show(truncate=60)

## 6. Detailed Results with Probabilities


In [None]:
from pyspark.sql.functions import element_at

detailed_results = results.select(
    TEXT_COL,
    "sentiment",
    element_at(col("probability"), 1).alias("prob_negative"),
    element_at(col("probability"), 2).alias("prob_positive"),
    element_at(col("probability"), 3).alias("prob_neutral"),
)

print("Detailed Results with Class Probabilities:")
detailed_results.show(truncate=40)

## 8. Interactive Inference


In [None]:
def predict_sentiment(review_text: str) -> dict:
    """Predict sentiment for a single review."""
    input_df = spark.createDataFrame([(review_text,)], [TEXT_COL])
    prediction = model.transform(input_df).collect()[0]

    probs = prediction["probability"]
    pred_label = int(prediction["prediction"])

    return {
        "text": review_text,
        "sentiment": LABEL_MAP[float(pred_label)],
        "confidence": float(probs[pred_label]),
        "probabilities": {"negative": float(probs[0]), "positive": float(probs[1]), "neutral": float(probs[2])},
    }


test_review = "This is the best product I have ever bought!"
result = predict_sentiment(test_review)

print(f"Review: {result['text']}")
print(f"Sentiment: {result['sentiment']} (confidence: {result['confidence']:.2%})")
print(f"Probabilities: {result['probabilities']}")

In [None]:
spark.stop()
print("✓ Spark session stopped")