# MA
## Creation of a process for scientific visualization development based on the example of the new ZHAW protein source database
## Christina Köck
## July 2023
### Link to the Gitlab-Repo: https://github.com/TinyTen/MA

Code for Visualizations for section recipe comparison for iteration 2. Data are read from Dapro Excel.

### Libraries and data

In [31]:
from cmcrameri import cm
import math as math
import pandas as pd
import numpy as np
import seaborn as sns

import matplotlib.pyplot as plt
import plotly.express as px


from math import pi

from matplotlib import colors

import sparql_dataframe

### Read in dapro Data

In [32]:
df_dapro = pd.read_excel('../Data/df_dapro_en.xlsx')
df_dapro.rename(columns= {'Unnamed: 0': ' '}, inplace=True)
df_dapro.set_index(' ', drop = True, inplace=True)
df_dapro

Unnamed: 0,Alanin_mg/100g,Alkohol (Ethanol)_mg/100g,Arginin_mg/100g,Asparagic acid_mg/100g,Butteric acid/butyric acid_mg/100g,Calcium_mg/100g,Cholesterin_mg/100g,Cystein_mg/100g,Decanic acid/capric acid_mg/100g,Decosanic acid/beetic acid_mg/100g,...,Water -insoluble fiber_mg/100g,Xylit_mg/100g,Sugar (total) _mg/100g,http://purl.obolibrary.org/obo/CHEBI_16646_mg/100g,http://purl.obolibrary.org/obo/FOODON_03316427_mg/100g,http://purl.obolibrary.org/obo/FOODON_03420190_mg/100g,Eutrophying emissions per 100g (gPO₄eq per 100g},Freshwater withdrawals per 100g (liters per 100g),Land use per 100g (m² per 100g),Fat_g/100g
,,,,,,,,,,,,,,,,,,,,,
Apple,0.0008,1.82757,0.052,0.0,3.25,0.303,0.0,0.005,3.25,0.859,...,0.0,2.01,0.49,0.0,0.0,0.0,0.145,18.01,0.063,0.4
Feta,0.078,17.15552,0.0,0.0,0.0,0.137,0.251,0.017,0.0,0.0,...,0.0004,0.0,0.795,0.0,0.0,0.0,,,,23.0
Pine nut,0.002,10.16515,1.03,0.0,0.146,0.204,0.0,0.235,0.146,0.144,...,0.0,7.2,12.47,0.0,0.0,0.0,,,,60.0
Sesame,0.01,10.16515,1.46,0.0,0.204,0.202,0.0,0.347,0.204,0.559,...,0.0,11.18,0.25,0.0,0.0,0.0,,,,50.0
Pistachio,0.005,14.99285,1.18,0.0,2.31,0.206,0.0,0.158,2.31,0.212,...,0.0,10.61,5.2,0.0,0.0,0.0,,,,52.0
Cashew nut,0.005,14.99285,2.06,0.0,22.18,0.213,0.005,0.258,22.18,0.0,...,0.0,3.1,0.323,0.0,0.0,0.0,,,,42.0
Hard cheese,0.052,15.3088,0.05,0.0,0.0,0.179,0.206,0.03,0.0,0.0,...,0.0014,0.0,0.55,0.0,0.0,0.0,9.837,560.52,8.779,32.0
Hazel nut,0.001,14.99285,1.6,0.0,5.97,0.219,0.006,0.163,5.97,0.0,...,0.0,7.7,22.2,0.0,0.0,0.0,,,,61.6
Chicken,0.0094,9.46711,0.065,0.0,0.0,0.264,0.0,0.011,0.0,0.0,...,0.0019,0.0,1.96,0.0,0.0,0.0,2.176,57.77,0.627,2.0


## Visualisierungen

## scientific colormaps (see http://www.fabiocrameri.ch/visualisation.php)

In [33]:
import sys

sys.path.insert(1, '../Data')

from cmcrameri import cm
from colors_cameri import davos, oslo, bilbao

In [34]:
davos_rgb = [el[1] for el in davos]
oslo_rgb = [el[1] for el in oslo]

In [35]:
zhaw_color = (0.00000 , 0.39216 , 0.65098)

## Overview over chosen samples (food products)

In [36]:
# rename dataset
df_food = df_dapro

## function for calculating recipes

In [37]:
# arguments are orginal dataframe, ingredient names as string and proportions (= g of ingredients to use)

def calculate_recipe(df, ingredients, proportions):
    # create column recipe in the df dataframe
    df.loc['recipe'] = 0
    sum_recipe = df.loc['recipe']
    # create dictionary with ingredient and proportion
    dict_recipe = dict(zip(ingredients, proportions))
    # multiply amount of food item of all parameters with proportion
    for key in dict_recipe:
        sum_recipe += (dict_recipe[key] * (df.loc[key]/100))
    # devide per sum proportions to create values per 100g
    sum_recipe_per100 = (sum_recipe/sum(dict_recipe.values()))*100
    return pd.DataFrame(sum_recipe_per100)

In [38]:
ingredients = ['Durum wheat', 'Chicken','Hard cheese']
proportions = [300,200,50]
dict_recipe = dict(zip(ingredients, proportions))

#### Werte werden auf 100 g des Rezeptes berechnet.

In [39]:
df_recipe = calculate_recipe(df_food.select_dtypes(include=np.number),
                             ingredients = ingredients, 
                             proportions = proportions)

In [40]:
df_recipe.loc[['Protein_g/g', 
              'Freshwater withdrawals per 100g (liters per 100g)' , 
               'Alanin_mg/100g', 'Arginin_mg/100g', 'Cystein_mg/100g', 
               'Histidin_mg/100g',        'Isoleucin_mg/100g',  'Lysin_mg/100g', 
               'Vitamin B12-Cobalamin_μg/100g', 'Sodium_mg/100g']]

Unnamed: 0,recipe
Protein_g/g,0.3222727
Freshwater withdrawals per 100g (liters per 100g),
Alanin_mg/100g,0.01092727
Arginin_mg/100g,0.11
Cystein_mg/100g,0.02309091
Histidin_mg/100g,0.3567273
Isoleucin_mg/100g,0.0
Lysin_mg/100g,0.1236364
Vitamin B12-Cobalamin_μg/100g,0.1562909
Sodium_mg/100g,1.297801e-07


## Show plotly plot for recipe composition

In [41]:
import plotly.express as px

# give ingredients as names, value = proportion, sum of recipe is in title

fig = px.pie(names = ingredients, values = proportions, 
    color_discrete_sequence=oslo_rgb[1::3], 
            title = ('Currently Chosen Composition of the Recipe: Proportions in Percent of Weight. Sum of the Recipe is {}g.').format(sum(proportions)))
fig.show()
# fig.write_html("PieChartRecipe2.html")

## fast way to calculate recipe for ingredients separately:
just multiply the columns with proportions and divide per sum of recipe mass

In [42]:
df_food.loc[ingredients][['Protein_g/g', 
              'Freshwater withdrawals per 100g (liters per 100g)' , 
               'Alanin_mg/100g', 'Arginin_mg/100g', 'Cystein_mg/100g', 
               'Histidin_mg/100g',        'Isoleucin_mg/100g',  'Lysin_mg/100g', 
               'Vitamin B12-Cobalamin_μg/100g', 'Sodium_mg/100g']].mul(proportions, axis='rows')/sum(proportions)

Unnamed: 0,Protein_g/g,Freshwater withdrawals per 100g (liters per 100g),Alanin_mg/100g,Arginin_mg/100g,Cystein_mg/100g,Histidin_mg/100g,Isoleucin_mg/100g,Lysin_mg/100g,Vitamin B12-Cobalamin_μg/100g,Sodium_mg/100g
,,,,,,,,,,
Durum wheat,0.21,,0.002782,0.081818,0.016364,0.327273,0.0,0.061091,0.133489,3.471095e-08
Chicken,0.096,21.007273,0.003418,0.023636,0.004,0.025818,0.0,0.053455,0.015956,5.17928e-08
Hard cheese,0.016273,50.956364,0.004727,0.004545,0.002727,0.003636,0.0,0.009091,0.006845,4.327632e-08


## create plotly bar plot for recipe with contribution of ingredients stacked

In [43]:
# https://community.plotly.com/t/how-to-set-different-x-and-y-axis-for-each-subplot/57417

import plotly.express as px
import pandas as pd

# choose parameters to display, ingredients and proportion is taken from lists preciously specified
# recipe is calculated directely as shown above, dataframe is transposed to have parameters as index
choice = ['Protein_g/g',
              'Fat_g/100g'] 
df_plot = (df_food.loc[ingredients][choice].mul(proportions, axis='rows')/sum(proportions)).T.reset_index()


# initiate plot, specify stacked and use index (= parameter) for subplots
# choose size and oslo color (every third color so colors are distinguishable)
fig = px.bar(df_plot,
             barmode="stack",
             facet_col='index', 
             color_discrete_sequence=oslo_rgb[0::3],
                                       facet_col_spacing=0.1,
    color_continuous_scale=None,
   width = 1050, height = 600
  )



title = 'Display of the chosen parameter for the current recipe (per 100g of total recipe): <br>The absolute contribution of all ingredients is shown. <br>Items can be turned on or off by clicking on the item in the legend.'
    
# set title, set bottom ticks to be white, remove x title
fig.update_layout(title_text=title, title_y = 0.95, 
         margin={'t': 120})
fig.update_xaxes(matches=None,showticklabels=False, side = "bottom", color = 'white', )
fig.update_layout(  xaxis_title=' ')


fig.update_yaxes(matches=None, showticklabels=True)

# original name of subplots was 'index = parameter', therefore all before the '=' is removed
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[1]))

fig.show()
# fig.write_html("BarsRecipeGram2.html")

## Calculation of contributions to the recipe measures per unit of energy

### Per energy

In [44]:
# calculate how much energy the ingredients bring into the recipe
energy_per_ingredient = df_food.loc[ingredients]['Energy (kilojoule) _kJ/100g'].mul(proportions, axis='rows')/100

# calculate proportions but not divide per mass but per energy per ingredient
df_per_energy = ((df_food.loc[ingredients].mul(proportions, axis='rows')/sum(energy_per_ingredient))).T
df_per_energy

Unnamed: 0,Durum wheat,Chicken,Hard cheese
Alanin_mg/100g,0.863188,1.060649,1.466855
Alkohol (Ethanol)_mg/100g,684.624542,1068.221157,431.842031
Arginin_mg/100g,25.387870,7.334274,1.410437
Asparagic acid_mg/100g,0.000000,0.000000,0.000000
Butteric acid/butyric acid_mg/100g,116.784203,0.000000,0.000000
...,...,...,...
http://purl.obolibrary.org/obo/FOODON_03420190_mg/100g,0.000000,0.000000,0.000000
Eutrophying emissions per 100g (gPO₄eq per 100g},,245.528914,277.489422
Freshwater withdrawals per 100g (liters per 100g),,6518.476728,15811.565585
Land use per 100g (m² per 100g),,70.747532,247.644570


### Units need to be changed, /100g is remove, /1kJ is added

In [45]:
sep = '/'
dict_unit_per_energy = {}
for index in df_per_energy.index:
    dict_unit_per_energy[index] = index.split(sep, 1)[0] + '/1kJ'


In [46]:
df_per_energy.rename(index = dict_unit_per_energy, inplace=True)

## create plotly bar plot for recipe (per energy) with contribution of ingredients stacked

In [47]:
# https://community.plotly.com/t/how-to-set-different-x-and-y-axis-for-each-subplot/57417
# same plot as previous, just for energy dataframe

import plotly.express as px
import pandas as pd


choice = ['Protein_g/1kJ', 
              'Freshwater withdrawals per 100g (liters per 100g)/1kJ']
df_plot_per_energy = df_per_energy.loc[choice]
fig = px.bar(df_plot_per_energy,
#              x='ingredient',
#              y='value', 
             barmode="stack",
             facet_col=df_plot_per_energy.index, 
             color_discrete_sequence=oslo_rgb[1::3],
                                                    facet_col_spacing=0.06,
    color_continuous_scale=None,
    title = 'Display of the chosen parameter for the current recipe (per Energy 1kJ): <br> The absolute content of all ingredients is shown.'
            )
fig.update_xaxes(matches=None, showticklabels=True)
fig.update_yaxes(matches=None, showticklabels=True)
fig.show()
# fig.write_html("BarsRecipeItemEnergy2.html")

## AAs Diagram (Rose Chart)

### Calculate AA Score first

In [48]:
# only use amino acid parameters

df_recipe_prot = (df_food.loc[ingredients][[
'Histidin_mg/100g',
'Isoleucin_mg/100g',
'Leucin_mg/100g',
'Lysin_mg/100g',
 'Threonin_mg/100g',
'Tryptophan_mg/100g',
'Valin_mg/100g',
    'Methionin_mg/100g',
    'Cystein_mg/100g',
    'Alanin_mg/100g',
    'Tyrosin_mg/100g',
    'Protein_g/g']]).T

In [49]:
df_recipe_prot

Unnamed: 0,Durum wheat,Chicken,Hard cheese
Histidin_mg/100g,0.6,0.071,0.04
Isoleucin_mg/100g,0.0,0.0,0.0
Leucin_mg/100g,0.001,0.144,0.8
Lysin_mg/100g,0.112,0.147,0.1
Threonin_mg/100g,0.087,0.18,1.1
Tryptophan_mg/100g,1.0,1.83,0.4
Valin_mg/100g,3.3,1.49,4.5
Methionin_mg/100g,0.017,0.051,0.9
Cystein_mg/100g,0.03,0.011,0.03
Alanin_mg/100g,0.0051,0.0094,0.052


In [50]:
# combine values according to FAO standard
df_recipe_prot.loc['Methionine+Cysteine(SAA)_mg/100g'] = df_recipe_prot.loc['Methionin_mg/100g'] + df_recipe_prot.loc['Cystein_mg/100g']
df_recipe_prot.loc['Phenylalanine+Tyrosine_mg/100g'] = df_recipe_prot.loc['Alanin_mg/100g'] + df_recipe_prot.loc['Tyrosin_mg/100g']

In [51]:
# reference values essential amino acids: https://www.fao.org/ag/humannutrition/35978-02317b979a686a57aa4593304ffc17f06.pdf, TABLE 5:
ess_aa= pd.read_excel("../Data/EssentialAminoAcids.xlsx", sheet_name = 'Sheet2')
ess_aa.set_index('Amino acid', inplace=True)

In [52]:
# comnine reference values with recipe df

df_aa_sep = df_recipe_prot.merge(ess_aa, right_on= ess_aa.index, left_index=True )


    

In [53]:
# score is calculated by dividing per reference value ('mg/g crude protein')
for ingredient in ingredients:
    list_new = df_aa_sep[ingredient]/df_aa_sep['mg/g crude protein']
    df_aa_sep[ingredient] = list_new

In [54]:
# remove reference values
df_aa_sep.drop(['key_0', 'mg/g crude protein'], axis =1, inplace = True)

In [55]:
df_aa_sep

Unnamed: 0_level_0,Durum wheat,Chicken,Hard cheese
Amino acid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Histidin_mg/100g,0.0375,0.004437,0.0025
Isoleucin_mg/100g,0.0,0.0,0.0
Leucin_mg/100g,1.6e-05,0.002361,0.013115
Lysin_mg/100g,0.002333,0.003062,0.002083
Threonin_mg/100g,0.00348,0.0072,0.044
Tryptophan_mg/100g,0.151515,0.277273,0.060606
Valin_mg/100g,0.0825,0.03725,0.1125
Methionine+Cysteine(SAA)_mg/100g,0.002043,0.002696,0.040435
Phenylalanine+Tyrosine_mg/100g,0.003295,0.00462,0.006146


In [56]:
# caclucate sum by multiplying with proportions, divide by sum recipe , divide by sum protein and muliply again with proportions
df_aa_sep = df_aa_sep.mul(proportions, axis='columns')/sum(proportions)/sum(df_food.loc[ingredients]['Protein_g/g'].mul(proportions, axis='rows')/sum(proportions))


In [57]:
# calulate sum for checking
sum_prot = []

for index in df_aa_sep.index:
    sum_prot.append(sum(df_aa_sep.loc[index][ingredients]))
    
df_aa_sep['sum'] = sum_prot

In [58]:
# for testing the plot the numbers are multiplied by 15, otherwise the values are too low
# please remove when using for real applications


df_aa_sep = df_aa_sep*15

In [59]:
# remove 'per 100g' from index

new_index = []
for index in df_aa_sep.index:
    new_index.append(index.split('_')[0])
    
df_aa_sep.index = new_index

In [60]:
#https://medium.com/@abhishekdas.nitc/nightingale-plots-in-python-using-plotly-da42bc18d15d

# initiate plotly go

import plotly.graph_objects as go
import plotly

fig = go.Figure()

i = 0

# show stacked plots for every ingredient:
for ingredient in ingredients:

    fig.add_trace(go.Barpolar(
        # this is the value
        r = list(df_aa_sep[ingredient]),
        # this is the name of amino acid
        theta=list(df_aa_sep.index),
        
        name = ingredient,
#         color is taken from oslo with step 2
        marker_color=oslo_rgb[i*2],

        marker_line_color="black",
        hoverinfo = ['all'],
        opacity=0.7   
    ))
    i += 2
    



# choose title, font size, size
fig.update_layout(
    title='Amino Acids Scores of all Essential Amino Acids separately: To be complete, the protein needs to reach 1 (outer border) <br>for all Amino Acids. The contribution of all ingredients is shown. <br>Items can be turned on or off by clicking on the item in the legend. Zoom in by dragging the pointer over the<br>desired area.',
    font_size=12,
    legend_font_size=15,
    polar_angularaxis_rotation=90,
    width=1100,
    height=900,
    
    # set ticks and lines
    polar = dict(
              bgcolor = "rgb(223, 223, 223)",
              angularaxis = 
                dict(
                    linewidth = 3,
                    showline=True,
                    linecolor='black'
                    ),
          radialaxis = 
                dict( tickmode = 'array', 
                     
                     # only show plot up to value 1
                     range=[0, 1],
                     # set ticks
                     tickvals = [0, 0.2, 0.4, 0.6, 0.8, 1, 1.2],       
                    showline = True,
                    linewidth = 2,
                    gridcolor = "white",
                    gridwidth = 2,
                    )
                ),

            )
# update gap betweem title and plot
fig.update_layout( margin={'t': 200})

fig.show()
# fig.write_html("AASRoseChart2.html")