In [None]:

# The following dataframes are to fill the missing data in our general_df
# Land use data 
land_use_kcal = pd.read_csv(resources_path + "land-use-kcal-poore.csv").drop(["Code", "Year"], axis = 1)
land_use_kg = pd.read_csv(resources_path + "land-use-per-kg-poore.csv").drop(["Code", "Year"], axis = 1)
land_use_protein = pd.read_csv(resources_path + "land-use-protein-poore.csv").drop(["Code", "Year"], axis = 1)
# Water use data
water_use_kcal = pd.read_csv(resources_path + "freshwater-withdrawals-per-kcal.csv").drop(["Code", "Year"], axis = 1)
water_use_kg = pd.read_csv(resources_path + "freshwater-withdrawals-per-kg.csv").drop(["Code", "Year"], axis = 1)
water_use_protein = pd.read_csv(resources_path + "freshwater-withdrawals-per-protein.csv").drop(["Code", "Year"], axis = 1)

# Merge all land data in one dataframe
land_use = pd.merge(land_use_kcal, land_use_kg, how = "outer", on = "Entity")
land_use = pd.merge(land_use, land_use_protein, how = "outer", on = "Entity")
land_use.columns = ["Entity", "Land use per 1000kcal", "Land use per kg", "Land use per 100g protein"]
land_use.head()

# Merge all water data in one dataframe
water_use = pd.merge(water_use_kcal, water_use_kg, how = "outer", on = "Entity")
water_use = pd.merge(water_use, water_use_protein, how = "outer", on = "Entity")
water_use.columns = ["Entity", "Freswater withdrawls per 1000kcal", "Freswater withdrawls per kg", "Freswater withdrawls per 100g protein"]

In [None]:
####
class comparator:
    def __init__(self, foods, daily_intake):
        self.foods = foods
        self.daily_intake = daily_intake
        self.comparison = self.__comparator()

    ####
    def __comparator(self):
        # Merge first foods series with daily intake series
        comparison = pd.merge(self.daily_intake, self.foods[0], how = "left", left_index = True, right_index = True)

        # If there's more than one item in foods list...
        if len(self.foods) > 1:
            # then merge the rest of the items with the dataframe we just created
            for food in self.foods[1:]:
                comparison = pd.merge(comparison, food, how = "left", left_index = True, right_index = True)


        # To conclude, iterate over all food elements
        for food in self.foods:
            # Calculate the % of the daily nutrient intake the food provides with
            comparison[f"Relative - {food.name}"] = (comparison.loc[:, food.name] / comparison.loc[:, "Daily Intake"]) * 100

        return comparison

    ####
    def to_plot(self):
        # We get the columns with the relative nutritional values of the foods
        rel_comparison = self.comparison.iloc[:, -len(self.foods):]

        # We'll save the dataframes in the following list
        relatives = []

        # Iterate over the columns in comparison
        for column in rel_comparison.columns:
            # Get the Series coresponding to the food column
            rel = rel_comparison.loc[:, column]
            # Get nutrients out of the index
            rel = rel.reset_index()
            # Add a column with the food name
            rel["Food"] = column[11:]
            # Rename the columns for later use
            rel.columns = ["Nutrient", "Comparison", "Food"]
            # add the dataframe to our list
            relatives.append(rel)

        # Once we have all the dataframes, we'll stack them together vertically and return it
        return pd.concat(relatives)


#################### Nutrition ####################

In [None]:
class comparator:
    def __init__(self, foods, daily_intake):
        self.foods = foods
        self.daily_intake = daily_intake

        self.comparison_di = self.__daily_intake_comparator()
        self.comparison_fats = self.comparator(['Sugars, total (g)',
       'Carbohydrate (g)', 'Total Fat (g)', 'Fatty acids, total saturated (g)',
       'Fatty acids, total monounsaturated (g)',
       'Fatty acids, total polyunsaturated (g)'])
        self.comparison_chol = self.comparator(['Cholesterol (mg)'])
        self.comparison_kcal = self.comparator(["Energy (kcal)"])


    ####
    def daily_intake_table(self):
        # Merge first foods series with daily intake series
        comparison_di = pd.merge(self.daily_intake, self.foods[0], how = "left", left_index = True, right_index = True)

        # If there's more than one item in foods list...
        if len(self.foods) > 1:
            # then merge the rest of the items with the dataframe we just created
            for food in self.foods[1:]:
                comparison_di = pd.merge(comparison_di, food, how = "left", left_index = True, right_index = True)


        # To conclude, iterate over all food elements
        for food in self.foods:
            # Calculate the % of the daily nutrient intake the food provides with
            comparison_di[f"Relative - {food.name}"] = (comparison_di.loc[:, food.name] / comparison_di.loc[:, "Daily Intake"]) * 100

        return comparison_di

    ####
    def __daily_intake_comparator(self):
        # We get the columns with the relative nutritional values of the foods
        rel_comparison = self.daily_intake_table().iloc[:, -len(self.foods):]

        # We'll save the dataframes in the following list
        relatives = []

        # Iterate over the columns in comparison
        for column in rel_comparison.columns:
            # Get the Series coresponding to the food column
            rel = rel_comparison.loc[:, column]
            # Get nutrients out of the index
            rel = rel.reset_index()
            # Add a column with the food name
            rel["Food"] = column[11:]
            # Rename the columns for later use
            rel.columns = ["Nutrient", "Value", "Food"]
            # add the dataframe to our list
            relatives.append(rel)

        # Once we have all the dataframes, we'll stack them together vertically and return it
        return pd.concat(relatives)

    ####
    def comparator(self, filter_):
        processed_foods = []

        for food in foods:
            # Filter food nutrients
            data = food[filter_]
            # Get nutrients' names out of the index
            data = data.reset_index()
            # We need a new Series object for the food name
            food_name = pd.Series([food.name for i in range(len(data))])
            # Concat everything together
            data = pd.concat([data, food_name], axis = 1)
            # Rename the columns
            data.columns = ["Nutrient", "Value", "Food"]
            # Append this new df to our list
            processed_foods.append(data)

        return pd.concat(processed_foods, axis = 0)

    ####
    def get_comparisons(self):
        return self.comparison_di, self.comparison_fats, self.comparison_chol, self.comparison_kcal

In [None]:
def full_comparison_plot(comparisons, fontsize = 18, legendsize = 20, figsize = (20, 20)):
    comparison_di, comparison_fats, comparison_chol, comparison_kcal = comparisons

    sns.set_theme()
    n_colors = len(comparison_kcal["Food"].unique())
    palette = sns.color_palette("Paired", n_colors = n_colors)

    fig, ax = plt.subplots(2, 2, figsize = (20, 20))

    # AX1
    sns.barplot(x = "Value", y = "Nutrient", hue = "Food", data = comparison_di, palette = palette, ax = ax[0][0])
    ax[0][0].axvline(x = 100, color = "r", linestyle = "dashed")

    ax[0][0].set_title("% Of the Recommended Daily Intake", fontdict = {'fontsize': 20, 'fontweight' : "bold"}, pad = 15)
    ax[0][0].tick_params(axis = 'y', which = 'major', labelsize = fontsize)
    ax[0][0].set_xlabel("")
    ax[0][0].set_ylabel("")
    ax[0][0].legend(prop={'size': legendsize})

    # AX2
    sns.barplot(x = "Value", y = "Nutrient", hue = "Food", data = comparison_fats, palette = palette, ax = ax[0][1])

    ax[0][1].set_title("Fats & Carbs (g)", fontdict = {'fontsize': 20, 'fontweight' : "bold"}, pad = 15)
    ax[0][1].tick_params(axis = 'y', which = 'major', labelsize = fontsize)
    ax[0][1].set_xlabel("")
    ax[0][1].set_ylabel("")
    ax[0][1].legend().set_visible(False)

    # AX3
    sns.barplot(x = "Value", y = "Nutrient", hue = "Food", data = comparison_chol, palette = palette, ax = ax[1][0])

    ax[1][0].set_title("Cholesterol (mg)", fontdict = {'fontsize': 20, 'fontweight' : "bold"}, pad = 15)
    ax[1][0].tick_params(axis = 'y', which = 'major', labelsize = fontsize)
    ax[1][0].set_xlabel("")
    ax[1][0].set_ylabel("")
    ax[1][0].legend().set_visible(False)

    # AX4
    sns.barplot(x = "Value", y = "Nutrient", hue = "Food", data = comparison_kcal, palette = palette, ax = ax[1][1])

    ax[1][1].set_title("Energy (kcal)", fontdict = {'fontsize': 20, 'fontweight' : "bold"}, pad = 15)
    ax[1][1].tick_params(axis = 'y', which = 'major', labelsize = fontsize)
    ax[1][1].set_xlabel("")
    ax[1][1].set_ylabel("")
    ax[1][1].legend().set_visible(False)

    fig.tight_layout(pad = 3)
    return fig

fig = full_comparison_plot(comparisons)

In [None]:
def color_mapper(df):
    color_map = {}

    for ind, row in df.iterrows():
        if row["Origin"] == "Plant-based":
            color_map[ind] = "blue"
        else:
            color_map[ind] = "red"

    return color_map

In [None]:
### I create some positive and negative filters for later use

### NEGATIVE FILTERS
def food_filter(key):
    others = ['Formula, ready-to-feed', 'Formula, prepared from powder', 'Formula, prepared from concentrate', 'Sugar substitutes', 'Not included in a food category']
    baby_food = ['Baby food: yogurt', 'Baby food: snacks and sweets', 'Baby food: meat and dinners', ]
    desserts_and_snacks = ['Ice cream and frozen dairy desserts', 'Milk shakes and other dairy drinks', 'Cakes and pies', 'Candy not containing chocolate', 'Doughnuts, sweet rolls, pastries', 'Crackers, excludes saltines', 'Cookies and brownies', 'Biscuits, muffins, quick breads', 'Pancakes, waffles, French toast', 'Cereal bars', 'Nutrition bars', 'Saltine crackers', 'Pretzels/snack mix', 'Potato chips', 'Candy containing chocolate', 'Pancakes, waffles, French toast']
    drinks = ['Soft drinks', 'Diet soft drinks', 'Flavored or carbonated water', 'Other diet drinks', 'Beer', 'Liquor and cocktails', 'Wine', 'Nutritional beverages', 'Protein and nutritional powders', 'Sport and energy drinks', 'Diet sport and energy drinks']
    sandwiches = ['Burritos and tacos', 'Other sandwiches (single code)', 'Burgers (single code)', 'Egg/breakfast sandwiches (single code)', 'Frankfurter sandwiches (single code)', 'Frankfurter sandwiches (single code)', 'Vegetables on a sandwich']
    prepared_dishes = ['Rolls and buns', 'Egg rolls, dumplings, sushi', 'Pasta mixed dishes, excludes macaroni and cheese', 'Macaroni and cheese', 'Pizza', 'Meat mixed dishes', 'Stir-fry and soy-based sauce mixtures', 'Bean, pea, legume dishes', 'Seafood mixed dishes', 'Rice mixed dishes', 'Fried rice and lo/chow mein', 'Poultry mixed dishes']
    sauces = ['Dips, gravies, other sauces''Pasta sauces, tomato-based', 'Mustard and other condiments', 'Mayonnaise', 'Jams, syrups, toppings']
    milks = ['Lamb, goat, game', 'Human milk', 'Milk, reduced fat', 'Milk, whole', 'Milk, lowfat', 'Milk, nonfat', 'Flavored milk, whole', 'Yogurt, regular', 'Yogurt, Greek']
    cheese = ['Cheese', 'Cottage/ricotta cheese']
    other_animal_products = ['Eggs and omelets', 'Butter and animal fats']
    meats = ['Ground beef', 'Cold cuts and cured meats', 'Bacon', 'Pork', 'Liver and organ meats', 'Frankfurters', 'Sausages']
    chicken = ['Turkey, duck, other poultry', 'Chicken, whole pieces', 'Chicken patties, nuggets and tenders']
    fish = ['Fish', 'Shellfish']

    milk_substitutes = ['Milk substitutes']
    beans = ['Beans, peas, legumes']
    soy_products = ['Processed soy products']
    nuts = ['Nuts and seeds']
    other_veggie_products = ['Peanut butter and jelly sandwiches (single code)', 'Oatmeal']

    animal_products = milks + cheese + other_animal_products + meats + chicken + fish
    veggie_products = milk_substitutes + beans + soy_products + nuts + other_veggie_products


    filters_map = {
                    "others" : others,
                    "baby_food" : baby_food,
                    "desserts_and_snacks" : desserts_and_snacks,
                    "drinks" : drinks,
                    "sandwiches" : sandwiches,
                    "prepared_dishes" : prepared_dishes,
                    "sauces" : sauces,
                    "milks" : milks,
                    "cheese" : cheese,
                    "other_animal_products" : other_animal_products,
                    "meats" : meats,
                    "chicken" : chicken,
                    "fish" : fish,
                    "milk_substitutes" : milk_substitutes,
                    "beans" : beans,
                    "soy_products" : soy_products,
                    "nuts" : nuts,
                    "other_veggie_products" : other_veggie_products,
                    "animal_products" : animal_products,
                    "veggie_products" : veggie_products
                  }
    
    return filters_map[key]


def multiple_filter(keys):
    final_list = []
    for key in keys:
        final_list = final_list + food_filter(key)

    return final_list

In [None]:
# Empty list of resources
        resources = []

        # Append the chosen column names of the resources df to the resources empty list
        for resource in resources_df.columns[:-1]:
            # checkbox to select the resources
            resources.append(st.sidebar.checkbox(label = resource))

        # Mean or median
        measure = st.sidebar.radio("Measure of center", options = ["Median", "Mean"]).lower()

        if resources.count(True) > 0:
            #### Data transformation
            # Mask for the dataframe
            column_selection = resources_df.columns[:-1][resources]
            # Stats object
            stats_object = md.stats
            # Table data
            stats = stats_object.calculate(resources_df, column_selection)
            # Plot data
            to_plot = stats_object.to_plot(stats)

            #### Creating table/plots
            stats_ = stats.T.reset_index()
            stats_.columns = ["Resource", "Measure", "Animal-based", "Plant-based"]

            header = list(stats_.columns)
            data = stats_.T

            table = go.Figure(data = go.Table(
                                columnwidth = [25, 25, 25, 25],
                                header = dict(values = header,
                                            fill_color = "#3D5475",
                                            align = "left",
                                            font = dict(size = 20, color = "white")),
                            cells = dict(values = data,
                                        fill_color = "#7FAEF5",
                                        align = "left",
                                        font = dict(size = 16),
                                        height = 30))
                            )

            st.write(to_plot)
            for column in column_selection:
                to_plot_ = to_plot[to_plot["Resource"] == column]
                fig = px.bar(to_plot_[to_plot_["Mean_median"] == measure], x = "Resource", y = "Values",
                                color = "Origin", barmode = "group", title = "Test")
                st.write(fig)
            
            #### Data visualization
            
            st.write(table)

In [None]:
class comparator:
    def __init__(self, foods, daily_intake):
        self.foods = foods
        self.daily_intake = daily_intake
        self.comparison = self.__comparator()

    ####
    def __comparator(self):
        # Merge first foods series with daily intake series
        comparison = pd.merge(self.daily_intake, self.foods[0], how = "outer", left_index = True, right_index = True)

        # If there's more than one item in foods list...
        if len(self.foods) > 1:
            # then merge the rest of the items with the dataframe we just created
            for food in self.foods[1:]:
                comparison = pd.merge(comparison, food, how = "outer", left_index = True, right_index = True)


        # To conclude, iterate over all food elements
        for food in self.foods:
            # Calculate the % of the daily nutrient intake the food provides with
            comparison[f"Relative - {food.name}"] = (comparison.loc[:, food.name] / comparison.loc[:, "Daily Intake"]) * 100

        return comparison

    ####
    def to_plot(self):
        # We get the columns with the relative nutritional values of the foods
        rel_comparison = comparison.iloc[:, -len(foods):]

        # We'll save the dataframes in the following list
        relatives = []

        # Iterate over the columns in comparison
        for column in rel_comparison.columns:
            # Get the Series coresponding to the food column
            rel = rel_comparison.loc[:, column]
            # Get nutrients out of the index
            rel = rel.reset_index()
            # Add a column with the food name
            rel["Food"] = column[11:]
            # Rename the columns for later use
            rel.columns = ["Nutrient", "Comparison", "Food"]
            # add the dataframe to our list
            relatives.append(rel)

        # Once we have all the dataframes, we'll stack them together vertically and return it
        return pd.concat(relatives)

In [None]:
def rows_filter(df, filter_, positive = True):
    if positive:
        filtered_df = df[df["Category name"].isin(filter_)]
    else:
        filtered_df = df[~df["Category name"].isin(filter_)]
        
    return filtered_df

def multiple_filter(df, filters_, positive = True):
    dfs = []
    if positive:
        for filter_ in filters_:
            filtered_df = rows_filter(df, filter_)
            dfs.append(filtered_df)

        final_df = pd.concat(dfs)

    else:
        final_df = df[~df["Category name"].isin(filters_)]

    return final_df

In [None]:
class filter_tool:
    def rows_filter(df, filter_, positive = True):
        if positive:
            filtered_df = df[df["Category name"].isin(filter_)]
        else:
            filtered_df = df[~df["Category name"].isin(filter_)]
            
        return filtered_df

    def multiple_filter(df, filters_, positive = True):
        dfs = []
        if positive:
            for filter_ in filters_:
                filtered_df = rows_filter(df, filter_)
                dfs.append(filtered_df)

            final_df = pd.concat(dfs)

        else:
            final_df = df[~df["Category name"].isin(filters_)]

        return final_df