# Labelling Recipes Stage

In [1]:
import pandas as pd
import numpy as np
import matplotlib
from matplotlib import pyplot as plt
import seaborn as sns

In [2]:
fifty_cutoff = 191.12
overall_baseline = 382.25

GHG_baseline = 382.25
nitrogen_baseline = 3.86
water_baseline = 1248.19
land_baseline = 0.93

In [3]:
# CHANGE THIS AS NEEDED:
# restaurant_name = "OK23-24"
# restaurant_name = "Gather23-24"
restaurant_name = "Totem23-24"

In [4]:
def create_ghg_label(label):
    # determine the GHG label based on the given label value and predefined cutoffs
    if label > overall_baseline:
        # if the label value is > than overall baseline, assign "Red" label
        return "Red"
    elif (label <= overall_baseline) & (label > fifty_cutoff):
        # if the label value is between overall baseline and fifty_cutoff, assign "Yellow" label
        return "Yellow"
    elif label <= fifty_cutoff:
        # if the label value is less than or equal to the fifty_cutoff, assign "Green" label
        return "Green"


In [5]:
def create_results_all_factors(df, GHG_weight, nitrogen_weight, water_weight, land_weight):
    # GHG_weight, nitrogen_weight, water_weight, land_weight are passed as arguments
    df["Combined Label"] = ""
    
    def calculate_all_factors(ghg, nitrogen, water, land, n):
        weighted_ghg = (ghg / GHG_baseline) * GHG_weight
        weighted_nitrogen = (nitrogen / nitrogen_baseline) * nitrogen_weight
        weighted_water = (water / water_baseline) * water_weight
        weighted_land = (land / land_baseline) * land_weight

        # Calculate the combined score
        combined_score = (weighted_ghg + weighted_nitrogen + weighted_water + weighted_land) / n
        return combined_score

    for ind, row in df.iterrows():
        # Calculate the label for each row
        label = calculate_all_factors(row["GHG Emission (g) / 100g"], row["N lost (g) / 100g"],
                                      row["Stress-Weighted Water Use (L) / 100g"],
                                      row['Land Use (m^2) / 100g'], 4)  # 4 factors considered

        # Assign labels based on the calculated score
        if label <= 0.5:
            df.loc[ind, "Combined Label"] = "Green"
        elif label > 0.5 and label < 1:
            df.loc[ind, "Combined Label"] = "Yellow"
        else:
            df.loc[ind, "Combined Label"] = "Red"
    return df


In [6]:
def add_menu_names(df, dict):
    # add menu names to the DataFrame based on a dictionary
    df["Displayed Name"] = ""
    menu_name = list(dict.keys())
    menu_ID = list(dict.values())
    for ind, row in df.iterrows():
        searchID = row["ProdId"]
        if searchID in menu_ID:
            # if the product ID is found in the menu_ID list, retrieve the corresponding name
            position = menu_ID.index(searchID)
            name = menu_name[position]
            df.loc[ind, "Displayed Name"] = name
        else:
            continue
    name_col = df.pop("Displayed Name")
    df.insert(0, "Displayed Name", name_col)
    df = df.dropna(subset=["Displayed Name"])

    df_temp = df["Displayed Name"].str.split("|", expand=True)
    df["Category"] = df_temp[0]
    df["Displayed Name"] = df_temp[1]
    name_col2 = df.pop("Category")
    df.insert(0, "Category", name_col2)
    # returns the modified DataFrame with the added "Category" and "Displayed Name" columns
    return df


In [7]:
def create_final_counts(df):
    # create final counts of GHG and combined labels from DataFrame

    # count the occurrences of GHG Only labels
    ghg_red = df["GHG Only Label"].value_counts()["Red"]
    ghg_yellow = df["GHG Only Label"].value_counts()["Yellow"]
    ghg_green = df["GHG Only Label"].value_counts()["Green"]
    # count the occurrences of Combined labels
    all_red = df["Combined Label"].value_counts()["Red"]
    all_yellow = df["Combined Label"].value_counts()["Yellow"]
    all_green = df["Combined Label"].value_counts()["Green"]
    print(all_red, all_yellow, all_green)
    # create a dictionary to store the label counts
    data = {"GHG Label Counts": [ghg_red, ghg_yellow, ghg_green],
            "Combined Label Counts": [all_red, all_yellow, all_green]}
    # create a DataFrame from the label counts data and return it
    results = pd.DataFrame(data, index=["Red", "Yellow", "Green"])
    return results


In [8]:
def create_visualizations(df):
    df.reset_index(inplace=True)
    df = df.rename(columns={"index": "Color"})
    sns.set_theme(style="darkgrid")
    palette = {"Green": "tab:green", "Red": "tab:red", "Yellow": "tab:orange", "Sum": "tab:blue"}
    fig, axes = plt.subplots(1, 2, figsize=(7, 4), sharex=False, sharey=True)
    fig.suptitle("Emission Label Counts")
    ax1 = sns.barplot(data=df, x=df["Color"], y=df["GHG Label Counts"], ax=axes[0], palette=palette)
    ax1.set_title("GHG Emission Label")
    ax1.set_ylabel("Counts")
    ax1.set_xlabel("")
    ax2 = sns.barplot(data=df, x=df["Color"], y=df["Combined Label Counts"], ax=axes[1], palette=palette)
    ax2.set_title("Combined Emissions Label")
    ax2.set_ylabel("")
    ax2.set_xlabel("")
    fig.title = "Color Comparison"
    ax1.set_title = "GHG Emission Label Counts"
    ax2.set_title = "Combined Emissions Label Counts"
    ax2.set_ylabel = ""
    for ax in [ax1, ax2]:
        for i in ax.containers:
            ax.bar_label(i, )
    plt.tight_layout()
    plt.savefig("C:/Users/ENTER_HERE/CFFS-S23/CFFS-22-23/data/final/2023_2024_CFFS_Outcomes/Summary_fig.png")
    plt.show()

In [9]:
def create_category_true(df):
    # create category columns for True values in the DataFrame

    # create empty columns for each category
    df["RED"] = ""
    df["YELLOW"] = ""
    df["GREEN"] = ""
    # iterate over each row in the DataFrame
    for ind, row in df.iterrows():
        # Check the value of the Combined Label column and assign "TRUE" to the corresponding category column
        if row["Combined Label"] == "Red":
            df.loc[ind, "RED"] = "TRUE"
        if row["Combined Label"] == "Yellow":
            df.loc[ind, "YELLOW"] = "TRUE"
        if row["Combined Label"] == "Green":
            df.loc[ind, "GREEN"] = "TRUE"
    # adds category columns for True values based on the Combined Label column
    return df


In [10]:
products = pd.read_csv('C:/Users/ENTER_HERE/CFFS-S23/CFFS-22-23/data/preprocessed/Products_List.csv')

In [11]:
final = products.copy()
final

Unnamed: 0,ProdId,Description,SalesGroup,Weight (g),GHG Emission (g),N lost (g),Land Use (m^2),Freshwater Withdrawals (L),Stress-Weighted Water Use (L),GHG Emission (g) / 100g,N lost (g) / 100g,Freshwater Withdrawals (L) / 100g,Stress-Weighted Water Use (L) / 100g,Land Use (m^2) / 100g
0,R-71428,HC|Beef sausage roll,FEAST,240,5101.116,39.54,15.174,201.26,7357.08,2125.46,16.48,83.86,3065.45,6.32
1,R-62265,HS|Carved|Roast Beef,FT HOMESKILLET,400,8685.057372,26.991114,19.209631,135.43,5200.83,2171.26,6.75,33.86,1300.21,4.8


In [12]:
final["GHG Only Label"] = final["GHG Emission (g) / 100g"].apply(lambda x: create_ghg_label(x))
final

Unnamed: 0,ProdId,Description,SalesGroup,Weight (g),GHG Emission (g),N lost (g),Land Use (m^2),Freshwater Withdrawals (L),Stress-Weighted Water Use (L),GHG Emission (g) / 100g,N lost (g) / 100g,Freshwater Withdrawals (L) / 100g,Stress-Weighted Water Use (L) / 100g,Land Use (m^2) / 100g,GHG Only Label
0,R-71428,HC|Beef sausage roll,FEAST,240,5101.116,39.54,15.174,201.26,7357.08,2125.46,16.48,83.86,3065.45,6.32,Red
1,R-62265,HS|Carved|Roast Beef,FT HOMESKILLET,400,8685.057372,26.991114,19.209631,135.43,5200.83,2171.26,6.75,33.86,1300.21,4.8,Red


In [13]:
final = create_results_all_factors(final, GHG_weight=0.8, nitrogen_weight=0.03, water_weight=0.15, land_weight=0.02)
# final = create_results_all_factors(final)
final

Unnamed: 0,ProdId,Description,SalesGroup,Weight (g),GHG Emission (g),N lost (g),Land Use (m^2),Freshwater Withdrawals (L),Stress-Weighted Water Use (L),GHG Emission (g) / 100g,N lost (g) / 100g,Freshwater Withdrawals (L) / 100g,Stress-Weighted Water Use (L) / 100g,Land Use (m^2) / 100g,GHG Only Label,Combined Label
0,R-71428,HC|Beef sausage roll,FEAST,240,5101.116,39.54,15.174,201.26,7357.08,2125.46,16.48,83.86,3065.45,6.32,Red,Red
1,R-62265,HS|Carved|Roast Beef,FT HOMESKILLET,400,8685.057372,26.991114,19.209631,135.43,5200.83,2171.26,6.75,33.86,1300.21,4.8,Red,Red


In [14]:
final.to_csv("C:/Users/ENTER_HERE/CFFS-S23/CFFS-22-23/data/final/2023_2024_CFFS_Outcomes/Data_Labelled_"+restaurant_name+".csv", 
             index=False)
final.to_excel("C:/Users/ENTER_HERE/CFFS-S23/CFFS-22-23/data/final/2023_2024_CFFS_Outcomes/Data_Labelled_"+restaurant_name+".xlsx", 
               sheet_name="Labels", index=False)

In [15]:
# CHANGE THE NAME AS NEEDED AS PER THE RESTAURANT:

OK_list = final.set_index('Description')['ProdId'].to_dict()
OK_list

{'HC|Beef sausage roll': 'R-71428', 'HS|Carved|Roast Beef ': 'R-62265'}

In [16]:
final2 = final.copy()
final2 = add_menu_names(final, OK_list)
final2.to_csv("C:/Users/ENTER_HERE/CFFS-S23/CFFS-22-23/data/final/2023_2024_CFFS_Outcomes/Data_Labelled_"+restaurant_name+"_with_name.csv",
                  index=False)
final2.to_csv("C:/Users/ENTER_HERE/CFFS-S23/CFFS-22-23/data/final/2023_2024_CFFS_Outcomes/Data_Labelled_"+restaurant_name+"_with_name.xlsx",
                  index=False)

In [17]:
# # # ADDED FOR GATHER:
# final2 = pd.read_csv("C:/Users/ENTER_HERE/CFFS-S23/CFFS-22-23/data/final/2023_2024_CFFS_Outcomes/Data_Labelled_"+restaurant_name+"_with_name.csv")
# final2.head()

In [18]:
# counts = create_final_counts(final2)
# all_ghg_num = counts["GHG Label Counts"].sum()
# all_num = counts["Combined Label Counts"].sum()
# sum_row = pd.Series(data={"GHG Label Counts": all_ghg_num, "Combined Label Counts": all_num}, name="Sum")

In [19]:
# counts = counts.append(sum_row, ignore_index=False)
# counts.to_csv("C:/Users/ENTER_HERE/CFFS-S23/CFFS-22-23/data/final/2023_2024_CFFS_Outcomes/"+restaurant_name+"_Summary.csv", index=False)
# counts_print = pd.concat([counts, sum_row], ignore_index=True)
# counts


# THERE ARE 5 SUM ROWS BECUASE THERE ARE 5 EMISSION FACTORS THAT WE ARE LOOKING AT: GHG, nitrogen, stressed water, fresh water
# and land use

In [20]:
# THE BLUE BAR REPRESENTS THE SUM ROW WHICH IS THE TOTAL NUMBER OF RECIPES THAT HAVE BEEN ASSIGNED A LABEL
# EACH BAR REPRESENTS THE # OF RECIPES THAT WERE LABELLED WITH THAT SPECIFIC COLOUR

# fig = create_visualizations(counts)
# fig

In [21]:
# final2.head()

In [22]:
# final3 = final2.drop(columns=['Category', 'Displayed Name', 'Weight (g)', 'GHG Only Label'])

In [23]:
# final3.head()

In [24]:
# final3.rename(columns={'ProdId': 'Optimum Control ID', 'Description': 'OC Description'}, inplace=True)

In [25]:
# final3.head()

In [26]:
# final3['Red'] = final3['Combined Label'] == 'Red'
# final3['Yellow'] = final3['Combined Label'] == 'Yellow'
# final3['Green'] = final3['Combined Label'] == 'Green'

In [27]:
# final3.head()

In [28]:
# import pandas as pd


# desired_column_order = [
#     'Optimum Control ID', 'OC Description', 'SalesGroup', 'Combined Label', 'Red', 'Yellow', 'Green',
#     'GHG Emission (g)', 'N lost (g)', 'Land Use (m^2)', 'Freshwater Withdrawals (L)', 
#     'Stress-Weighted Water Use (L)', 'GHG Emission (g) / 100g', 'N lost (g) / 100g', 
#     'Freshwater Withdrawals (L) / 100g', 'Stress-Weighted Water Use (L) / 100g', 
#     'Land Use (m^2) / 100g'
# ]

# # Create a new DataFrame with the desired column order
# data_reordered = final3[desired_column_order]

# # Print the reordered DataFrame
# data_reordered.head()


In [29]:
# data_reordered.to_csv("C:/Users/ENTER_HERE/CFFS-S23/CFFS-22-23/data/final/2023_2024_CFFS_Outcomes/Labelled_Data_"+restaurant_name+"_for_nutrislice.csv",
#                   index=False)