In [None]:
# Import all necessary libraries
import numpy as np
import pandas as pd
import geopandas as gpd

from matplotlib import pyplot as plt
import seaborn as sns
import holoviews as hv
import hvplot.pandas

from transformers import pipeline

import cenpy
import pygris

import re
import warnings

# Show all columns
pd.options.display.max_columns = 999

## 2. Fast food trends in Philadelphia

### 2.1 Identify fast food restaurants

In [None]:
# Create a new column to identify fast food restaurants
Yelp_tract['is_fast_food'] = Yelp_tract['categories'].str.contains('Fast Food', na=False)

### 2.2 Calculate the median income for fast food and otherwise

In [None]:
# Calculate the median income for fast food and non-fast food restaurants
med_inc_fast_food = Yelp_tract.groupby('is_fast_food')['Med_HH_Inc'].median()

print(med_inc_fast_food)

### 2.3 Load fast food review data

In [None]:
# Load the fast food reviews
fast_food_review = pd.read_json("data/reviews_philly_fast_food.json.gz", 
                    orient='records', 
                    lines=True, 
                    compression='gzip')

fast_food_review.head()

### 2.4 Trim to the most popular fast food restaurants

In [None]:
# List of popular fast food restaurants
popular_fast_food = [
    "McDonald's",
    "Wendy's",
    "Subway",
    "Popeyes Louisiana Kitchen",
    "Taco Bell",
    "KFC",
    "Burger King",
]

# Get the business ID and name for the popular fast food restaurants
Yelp_name = Yelp[["business_id", "name"]]

fast_food_review = fast_food_review.merge(Yelp_name, on="business_id", how="left")

fast_food_review.head()

In [None]:
# Filter the fast food reviews to only include the popular fast food restaurants
fast_food_review_trim = fast_food_review[fast_food_review['name'].isin(popular_fast_food)]
fast_food_review_trim = fast_food_review_trim.reset_index(drop=True)

fast_food_review_trim.head()

### 2.5 Run the emotions classifier on fast food reviews

In [None]:
# Remove rows with missing review text
fast_food_review_trim = fast_food_review_trim.dropna(subset=["text"])
fast_food_review_trim = fast_food_review_trim.loc[fast_food_review_trim["text"] != ""]

# Extract the review text to a list
fast_food_review_list = fast_food_review_trim["text"].str.strip().tolist()

# Remove any non-string items
text_pattern = re.compile(r'\w+')
fast_food_review_list = [item for item in fast_food_review_list if isinstance(item, str) and text_pattern.search(item)]

fast_food_review_list[:10]

In [None]:
# Hide warnings
warnings.filterwarnings("ignore")

# Load the emotion classification model
model = "bhadresh-savani/distilbert-base-uncased-emotion"

# Create a pipeline for emotion classification
emotion_classifier = pipeline(
    task="text-classification",
    model=model,
    top_k=None,
    tokenizer=model,
    truncation=True,
)

# Calculate the top emotion score for fast food restaurant reviews
fast_food_review_emotion = emotion_classifier(fast_food_review_list)

fast_food_review_emotion[:4]

In [None]:
# Transform the result from the classifier into a DataFrame
fast_food_review_emotion_df = pd.DataFrame([{d["label"]: d["score"] for d in dd} for dd in fast_food_review_emotion]).assign(
    text=fast_food_review_list
)

fast_food_review_emotion_df.head()

### 2.6 Identify the predicted emotion for each text

In [None]:
# Use idxmax to identify the predicted emotion for each review
fast_food_review_emotion_df["prediction"] = fast_food_review_emotion_df[
    ["anger", "fear", "sadness", "joy", "love", "surprise"]
].idxmax(axis=1)

fast_food_review_emotion_df.head()

### 2.7 Combine the ratings and sentiment data

In [None]:
# Combine the review text and emotion prediction
fast_food_review_classified = pd.concat([fast_food_review_trim, fast_food_review_emotion_df], axis=1)

fast_food_review_classified.head()

### 2.8 Plot sentiment vs. stars

In [None]:
# Ensure 'stars' and 'prediction' are valid
fast_food_review_classified = fast_food_review_classified.dropna(subset=['stars', 'prediction'])

# Ensure 'stars' is numeric (for ordering) and 'prediction' is categorical
fast_food_review_classified['stars'] = pd.to_numeric(fast_food_review_classified['stars'], errors='coerce')
fast_food_review_classified['prediction'] = fast_food_review_classified['prediction'].astype('category')

# Plot the stacked bar chart
plt.figure(figsize=(10, 6))

sns.histplot(
    data=fast_food_review_classified,
    x="stars",
    hue="prediction",
    multiple="stack",
    discrete=True,  # Treat stars as discrete categories
    shrink=0.8      # Adjust bar width for better spacing
)

# Add titles and labels
plt.title("Emotion Breakdown by Star Ratings", fontsize=16)
plt.xlabel("Stars", fontsize=12)
plt.ylabel("Count", fontsize=12)

plt.grid(axis="y", linestyle="--", alpha=0.7)