In [None]:
import pandas as pd

# Load all data
df_recipes = pd.read_excel('Detailed_Ingredients_Data.xlsx')
df_unique = pd.read_excel('Unique_ingredients.xlsx')
df_emissions = pd.read_excel('20153EID_food.xlsx')
df_price_ratios = pd.read_excel('Price_Ratio_to_Japan.xlsx')

# Ensure each unique ingredient in df_emissions is retained only once
df_emissions.drop_duplicates(subset='Item names (FIES)', keep='first', inplace=True)

# Merge data to get sector names and row codes
df_merged = pd.merge(df_recipes, df_unique, on='ingredient', how='left')

# Merge again to get emission intensity
df_final = pd.merge(df_merged, df_emissions, on='Item names (FIES)', how='left')

# Load price ratio data and merge
df_final = pd.merge(df_final, df_price_ratios, left_on=['country', 'Series Name'], right_on=['Country Name', 'Series Name'], how='left')

# Adjust prices based on country's price ratios
df_final['adjusted_price'] = df_final['price'] * df_final['Price_Ratio_to_Japan']

# Define dish type categorization function
def categorize_dish(ingredients):
    if any('chicken' in ingredient for ingredient in ingredients):
        return 'chicken'
    elif any('beef' in ingredient for ingredient in ingredients):
        return 'beef'
    elif any('pork' in ingredient for ingredient in ingredients):
        return 'pork'
    elif any(ingredient in ['fish', 'shrimp', 'crab', 'carp', 'lobster','belt fish',
            'bream','eel','fish (anchovy)','fish (tilapia or salmon)','fish (white meat)', 
            'fish head','mackerel','mullet','mussel','oyster','prawns','rohu',
            'scallop','sea bass','sea cucumber','shellfish','shrimp',
            'snapper (red)','squid','tilapia','trout','tuna','carp','catfish',
            'abalone','fish ball','fish stock/broth','soft-shelled turtle',
            'squirrel mandarin fish'] for ingredient in ingredients):
        return 'seafood'
    elif any(ingredient in [
    'duck egg', 'pigeon egg', 'cheese (feta)', 'dumpling skin', 'wonton wrappers',
    'pho', 'bone in goat', 'camel meat', 'duck breast', 'duck feet',
    'duck intestines', 'duck leg', 'duck meat', 'lamb', 'lamb chops',
    'lamb fat', 'lamb shanks', 'lamb shoulder', 'lamb-ground', 'mutton',
    'mutton fat', 'mutton-lean', 'pigeon', 'pigeon breast', 'lamb stock/broth'] for ingredient in ingredients):
        return 'other'
    else:
        return 'vegetarian'

# Get all ingredients for each dish based on recipe id
df_ingredients_per_dish = df_final.groupby('idrecipe')['ingredient'].apply(list).reset_index()

# Apply categorization function to each dish
df_ingredients_per_dish['dish_type'] = df_ingredients_per_dish['ingredient'].apply(categorize_dish)

# Merge dish type back to original data
df_final = df_final.merge(df_ingredients_per_dish[['idrecipe', 'dish_type']], on='idrecipe', how='left')

# Calculate carbon emissions for each ingredient
df_final['carbon_emission'] = df_final['weight(g)'] * df_final['emission_intensity'] * df_final['adjusted_price'] / (100 * df_final['serving_new'])  # Converted to million grams
# df_final['carbon_emission_per_capita'] = df_final['carbon_emission'] / df_final['serving_new'] 


df_final['price_by_dish'] = df_final['weight(g)'] * df_final['adjusted_price'] / 100


# Summarize carbon emissions for each dish based on recipe id and retain country information and dish type
carbon_footprint_per_dish = df_final.groupby(['idrecipe', 'country', 'dish_type','serving_new', 'recipe_name'])['carbon_emission'].sum().reset_index()


In [None]:
#Below is the data for final analysis
# Group by country
stats_by_country = df_final.groupby('country').agg({
    'carbon_emission': ['mean', 'max', 'min'],
    'price_by_dish': ['mean', 'max', 'min'],
    'ENERC': ['mean', 'max', 'min'],
    'WATER': ['mean', 'max', 'min'],
    'PROCNT': ['mean', 'max', 'min'],
    'FAT': ['mean', 'max', 'min'],
        'CHOAVLDF': ['mean', 'max', 'min'],
    'FIBTG': ['mean', 'max', 'min'],
    'ASH': ['mean', 'max', 'min'],
    'CA': ['mean', 'max', 'min'],
    'P': ['mean', 'max', 'min'],
    'NA': ['mean', 'max', 'min'],
    'K': ['mean', 'max', 'min'],
    'FE': ['mean', 'max', 'min'],
    'CU': ['mean', 'max', 'min'],
    'ZN': ['mean', 'max', 'min'],
    'RETOL': ['mean', 'max', 'min'],
    'CARTB': ['mean', 'max', 'min'],
    'VITA_RAE': ['mean', 'max', 'min'],
    'THIA': ['mean', 'max', 'min'],
    'RIBF': ['mean', 'max', 'min'],
    'NIA': ['mean', 'max', 'min'],
    'VITC': ['mean', 'max', 'min']
}).reset_index()

# Group by dish type
stats_by_dish_type = df_final.groupby('dish_type').agg({
    'carbon_emission': ['mean', 'max', 'min'],
    'price_by_dish': ['mean', 'max', 'min'],
    'ENERC': ['mean', 'max', 'min'],
    'WATER': ['mean', 'max', 'min'],
    'PROCNT': ['mean', 'max', 'min'],
    'FAT': ['mean', 'max', 'min'],
    
    'CHOAVLDF': ['mean', 'max', 'min'],
    'FIBTG': ['mean', 'max', 'min'],
    'ASH': ['mean', 'max', 'min'],
    'CA': ['mean', 'max', 'min'],
    'P': ['mean', 'max', 'min'],
    'NA': ['mean', 'max', 'min'],
    'K': ['mean', 'max', 'min'],
    'FE': ['mean', 'max', 'min'],
    'CU': ['mean', 'max', 'min'],
    'ZN': ['mean', 'max', 'min'],
    'RETOL': ['mean', 'max', 'min'],
    'CARTB': ['mean', 'max', 'min'],
    'VITA_RAE': ['mean', 'max', 'min'],
    'THIA': ['mean', 'max', 'min'],
    'RIBF': ['mean', 'max', 'min'],
    'NIA': ['mean', 'max', 'min'],
    'VITC': ['mean', 'max', 'min']
}).reset_index()

# Group by country and dish type combination
stats_by_country_and_dish = df_final.groupby(['country', 'dish_type']).agg({
    'carbon_emission': ['mean', 'max', 'min'],
    'price_by_dish': ['mean', 'max', 'min'],
    'ENERC': ['mean', 'max', 'min'],
    'WATER': ['mean', 'max', 'min'],
    'PROCNT': ['mean', 'max', 'min'],
    'FAT': ['mean', 'max', 'min'],
    'CHOAVLDF': ['mean', 'max', 'min'],
    'FIBTG': ['mean', 'max', 'min'],
    'ASH': ['mean', 'max', 'min'],
    'CA': ['mean', 'max', 'min'],
    'P': ['mean', 'max', 'min'],
    'NA': ['mean', 'max', 'min'],
    'K': ['mean', 'max', 'min'],
    'FE': ['mean', 'max', 'min'],
    'CU': ['mean', 'max', 'min'],
    'ZN': ['mean', 'max', 'min'],
    'RETOL': ['mean', 'max', 'min'],
    'CARTB': ['mean', 'max', 'min'],
    'VITA_RAE': ['mean', 'max', 'min'],
    'THIA': ['mean', 'max', 'min'],
    'RIBF': ['mean', 'max', 'min'],
    'NIA': ['mean', 'max', 'min'],
    'VITC': ['mean', 'max', 'min']
}).reset_index()

with pd.ExcelWriter('carbon_footprint_per_dish_with_nutrition_output.xlsx') as writer:
    stats_by_country.to_excel(writer, sheet_name='Stats by Country')
    stats_by_dish_type.to_excel(writer, sheet_name='Stats by Dish Type')
    stats_by_country_and_dish.to_excel(writer, sheet_name='Stats by Country and Dish')
