In [None]:
import pandas as pd

In [None]:
df = pd.read_excel("Fast Food Analysis Project Spreadsheet.xlsx")

In [None]:
df.head()

In [None]:
df.drop('Unnamed: 0', axis=1, inplace=True)

In [None]:
df.head()

In [None]:
df.shape

In [None]:
df.info()

In [None]:
print(df.columns)

In [None]:
df = df.rename(columns={'Restaurant': 'restaurant_name', 'Item Name': 'item_name', 'Calories ': 'calories', 'Protein (g)': 'protein',
 'Total Fats (g)': 'fats', 'Sodium (mg)': 'sodium', 'Total Sugars (g)': 'sugars',
 'Cholesterol (mg)': 'cholesterol', 'Price ($)': 'price', 'Carbs (g)': 'carbs', 
 'Fiber (g)': 'fiber', 'Item Type': 'item_type'})
df.head()

In [None]:
df.isnull().sum()


In [None]:
df.duplicated().sum()

In [None]:
## starting tier one analysis: basic averages, ranks of each restaurant and item with the highest calories, protein etc. 
df.describe()

In [None]:
## gonna use a loop to find top and bottom 10 of each metric (protein, fiber etc)
metrics = ['calories', 'protein', 'fats', 'sodium', 'cholesterol', 'price', 'carbs', 'fiber']

for metric in metrics:
    print(f"Top 10 {metric.title()}")
    display(df.sort_values(by=metric, ascending=False).head(10))

    print(f"Bottom 10 {metric.title()}")
    display(df.sort_values(by=metric, ascending=False).tail(10))

In [None]:
#now gonna do a loop for the averages (gonna use medians, chose due to discovery of outliers) based on restaurants
for metric in metrics:
    print(f"Restaurants Ranked by Median {metric.title()} (High to Low)")

    ranked = df.groupby('restaurant_name')[metric].median().sort_values(ascending=False)
    display(ranked)

In [None]:
#do the same as above but by item type now
for metric in metrics:
    print(f"Item Types Ranked by Median {metric.title()} (High to Low)")

    ranked = df.groupby('item_type')[metric].median().sort_values(ascending=False)
    display(ranked)

In [None]:
## starting tier two analysis
## start of the clustering technique 
df.describe()


In [None]:
## making definitions of clusters + finding how many there are; will in next iteration of project separate by item type! ##
cluster_rules = {
    'high_protein_low_fat': (df['protein'] >= 20) & (df['fats'] <= 23),
    'low_carb_high_fat': (df['carbs'] <= 40) & (df['fats'] >= 23),
    'high_fiber_low_sodium': (df['fiber'] >= 3) & (df['sodium'] <= 1015),
    'high_protein_low_price': (df['protein'] >= 20) & (df['price'] <= 6.60), 
    'low_calorie_high_fiber': (df['calories'] <= 440) & (df['fiber'] >= 3)
}

for name, rule in cluster_rules.items():
    df[f"cluster_{name}"] = rule

In [None]:
for name in cluster_rules:
    print(f"{name}: {df[f'cluster_{name}'].sum()} items")

In [None]:
cluster_name_map = {
    'high_protein_low_fat': 'High Protein + Low Fat', 
    'low_carb_high_fat': 'Low Carb + High Fat', 
    'high_fiber_low_sodium': 'High Fiber + Low Sodium', 
    'high_protein_low_price': 'High Protein + Low Price', 
    'low_calorie_high_fiber': 'Low Calorie + High Fiber'
}

df['manual_cluster'] = 'None'

for name in cluster_rules:
    clean_label = cluster_name_map[name]
    df.loc[df[f'cluster_{name}'], 'manual_cluster'] = clean_label

In [None]:
df.head()

In [None]:
df[df['manual_cluster'] != 'None'].sort_values('manual_cluster')

In [None]:
## for easier viewing of the items and their clusters ##
cluster_cols = [col for col in df.columns if col.startswith('cluster_')]

df_display = df.drop(columns=cluster_cols)

In [None]:
df_display.head()

In [None]:
df[df['manual_cluster'] == 'High Fiber + Low Sodium']

In [None]:
#now gonna do a loop to rank items in each cluster by metrics
for metric in metrics:
    for cluster in df['manual_cluster'].unique():
        if cluster != 'None':
            cluster_df = df_display[df['manual_cluster'] == cluster]
            sorted_df = cluster_df.sort_values(by=metric, ascending=False)

            print(f"Ranked Items in Cluster: {cluster} by {metric.title()}")
            display(sorted_df)

In [None]:
## from this code, we can see if items are matching for multiple clusters
df[[col for col in df.columns if col.startswith('cluster_')]].sum(axis=1).value_counts()

In [None]:
df_display.head()

In [None]:
metrics

In [None]:
## beginning the process of fuzzy clustering using membership scores for radar charts
high_good_metrics = ['protein', 'fiber']
low_good_metrics = ['calories', 'fats', 'sodium', 'cholesterol', 'price', 'carbs']

for metric in high_good_metrics:
    df[f"{metric}_score"] = df[metric].rank(pct=True, ascending=True) * 100
for metric in low_good_metrics: 
    df[f"{metric}_score"] = df[metric].rank(pct=True, ascending=False) * 100

In [None]:
## verifying membership scores 
df[[col for col in df.columns if '_score' in col]].describe()

In [None]:
df[['item_name', 'protein', 'protein_score']].sort_values(by='protein_score', ascending=False).head(10)

In [None]:
df[['item_name', 'sodium', 'sodium_score']].sort_values(by='sodium_score', ascending=False).head(10)

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
df[metrics].info()

In [None]:
corr_matrix = df[metrics].corr()

In [None]:
plt.figure(figsize=(10,8))
sns.heatmap(corr_matrix, annot=True, fmt='.2f', cmap='pink', square=True, linewidths=0.5)
plt.title('Correlation Between Metrics')
plt.xticks(rotation=45)
plt.yticks(rotation=0)
plt.tight_layout()
plt.show()

In [None]:
## making the cluster/nutrition goals score
df['high_pro_low_fat_score'] = (
    df['protein_score'] * 0.6 + df['fats_score'] * 0.4      
)

df['high_fib_low_sod_score'] = (
    df['fiber_score'] * 0.5 + df['sodium_score'] * 0.5
)

df['high_pro_low_pri_score'] = (
    df['protein_score'] * 0.6 + df['price_score'] * 0.4
)

df['low_car_high_fat_score'] = (
    df['carbs_score'] * 0.6 + df['fats_score'] * 0.4
)

df['low_cal_high_fib_score'] = (
    df['calories_score'] * 0.5 + df['fiber_score'] * 0.5
)

In [None]:
df.head()

In [None]:
metrics

In [None]:
## about to rank restaurants by the nutrition goals! 
metric_score_cols = ['calories_score', 'protein_score', 'fats_score', 'sodium_score', 'cholesterol_score',
                     'price_score', 'carbs_score', 'fiber_score']
goal_score_cols = [col for col in df.columns if col.endswith('_score') and col not in metric_score_cols]

for goal in goal_score_cols:
    readable_name = goal.replace('_score', '').replace('_', '').title()
    print(f"\nRestaurants Ranked by Median {readable_name} Score")
    
    ranked = df.groupby('restaurant_name')[goal].median().sort_values(ascending=False)
    display(ranked)


In [None]:
goal_score_cols

In [None]:
df[df['high_fib_low_sod_score'] >= 70].shape[0]

In [None]:
df[df['low_cal_high_fib_score'] >= 70].shape[0]

In [None]:
df[df['high_pro_low_fat_score'] >= 70].shape[0]

In [None]:
df[df['high_pro_low_pri_score'] >= 70].shape[0]

In [None]:
df[df['low_car_high_fat_score'] >= 70].shape[0]

In [1]:
import json 
import uuid

In [5]:
with open('fastfood.json', 'r') as f:
    data = json.load(f)

In [3]:
import os
os.getcwd()

'/Users/mikayla/fast-food-macros-1'

In [6]:
for item in data:
    item['id'] = str(uuid.uuid4())

In [9]:
with open('fastfood_with_ids.json', 'w') as f:
    json.dump(data, f, indent=2)

In [10]:
print('✅ Unique IDS adds and saved to fastfood_with_ids.json')

✅ Unique IDS adds and saved to fastfood_with_ids.json
