In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Load cleaned data

In [None]:
restaurants_df = pd.read_csv("restaurants_clean.csv")
menu_df = pd.read_csv("menu_clean.csv")

# ============================
# 🔍 Search Ranking Insights
# ============================

# 1. Top restaurants by average position

In [None]:
top_ranked = restaurants_df.groupby('restaurant_name')['position'].mean().sort_values().head(10)
print("🔝 Top 10 Restaurants by Average Search Position:")
print(top_ranked)

# 2. Correlation between position and score (use original csv here since 'score' was dropped)

In [None]:
orig_restaurants = pd.read_csv("restaurants.csv")
clean_df = orig_restaurants.dropna(subset=['position', 'score'])
sns.scatterplot(data=clean_df, x='position', y='score')
plt.title("Search Position vs Review Score")
plt.xlabel("Search Position (Lower is Better)")
plt.ylabel("Review Score")
plt.show()

correlation1 = clean_df[['position', 'score']].corr().iloc[0, 1]
print("📈 Correlation between position and score:", correlation1)



# ============================
# 🥘 Menu Diversity Insights
# ============================
# 1. Menu variety by restaurant

In [None]:
menu_counts = menu_df.groupby('restaurant_id')['item_name'].nunique().reset_index()
menu_counts.columns = ['restaurant_id', 'menu_item_count']
top_menus = menu_counts.sort_values(by='menu_item_count', ascending=False).head(10)
print("🍽️ Top 10 Restaurants with Most Diverse Menus:")
print(top_menus)


# 2. Correlation with total_ratings

In [None]:
ratings = orig_restaurants[['restaurant_id', 'total_ratings']].drop_duplicates()
merged_df = pd.merge(menu_counts, ratings, on='restaurant_id')
merged_df.dropna(inplace=True)
sns.scatterplot(data=merged_df, x='menu_item_count', y='total_ratings')
plt.title("Menu Item Count vs Total Ratings")
plt.xlabel("Menu Items")
plt.ylabel("Total Ratings")
plt.show()
correlation2 = merged_df['menu_item_count'].corr(merged_df['total_ratings'])
print("📊 Correlation between menu item count and total reviews:", correlation2)