# Food Environmental Impact
## ILV Datenvisualisierung und Visual Analytics
## Christina Köck
## Februar 2023
### Link to the Gitlab-Repo: https://gitlab.web.fh-kufstein.ac.at/christina.koeck/datenvisualisierung_und_visualanalytics

Die Visualisierungen werden in diesem Notebook erstellt. Designentscheidungen werden hier dokumentiert. Nach der Entwicklung wird der Code in eine streamlit-Anwendung für das Dashboarding übertragen.

Die Streamlit-Anwendung ist für interessierte Verbaucher mit leicht wissenschaftlichem Hintergrund gedacht. Die Informationen sollten für VerbraucherInnen verständlich sein, allerdings sind gewisse Kenntnisse zu den Nachhaltigkeitsparamtern vorausgesetzt. Besonders die Darstellung der Korrelation setzt Kenntnisse der Pearson-Korrelation voraus. Die Streamlit-Anwendung könnte auch im Unterricht verwendet werden, um Lernenden verschiedene Ernährungsformen näherzubringen. Die Anwendung ist so gestaltet, dass sie mit verschiedenen Fragen durch die Daten führt. Somit soll auf verschiedene Aspekte und Zusammenhänge hingewiesen werden. Die NutzerInnen können dabei selbst wählen, welche Lebenmittel dargestellt werden sollen.

### Libraries and data

In [1]:
# ! pip install cmcrameri

In [2]:
from cmcrameri import cm
import math as math
import pandas as pd
import numpy as np
import seaborn as sns

import matplotlib.pyplot as plt
import plotly.express as px

import sparql_dataframe

In [3]:
# # Data are from:
# # Hannah Ritchie and Max Roser (2022)
# # "Environmental Impacts of Food Production".
# # Published online at OurWorldInData.org. 'https://ourworldindata.org/environmental-impacts-of-food' [Online Resource]
# # Data was retrieved in a shorter form from https://www.kaggle.com/datasets/selfvivek/environment-impact-of-food-production
# df_food = pd.read_excel("Mockdata012.xlsx")
# df_food.drop('Unnamed: 0', axis = 1, inplace=True)


In [4]:
# df_food.head(12)

## Read in Dapro Data

In [5]:
endpoint = 'http://localhost:8000'

q = """
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX schema: <http://schema.org/>
PREFIX dapro: <http://dapro.opendata.zhaw.ch/DaPro.owl#>

SELECT ?name
WHERE {
	?dapro a dapro:Product .
    ?dapro rdfs:label ?name.       
}
"""


df_products = sparql_dataframe.get(endpoint, q)
# df_products.head(20)

In [6]:
food_items = df_products['name'].astype('str')
food_items = [str(i) for i in food_items]
food_items.insert(0, 'Apfel')
# food_items

In [7]:
# das funktioniert
d = {}
# food_items = ["Apfel", "Birne", "Ahornsirup", "Brie", "Knoblauch", 'Ackerbohne']

endpoint = 'http://localhost:8000'

for item in food_items:

    q = """
    PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
    PREFIX owl: <http://www.w3.org/2002/07/owl#>
    PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
    PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
    PREFIX schema: <http://schema.org/>
    PREFIX dapro: <http://dapro.opendata.zhaw.ch/DaPro.owl#>
    SELECT ?foodoningredient ?foodoningredientval ?unit
    WHERE {{
        ?dapro rdfs:label "{}" .
        ?dapro schema:ingredients ?recingredient .
        ?recingredient schema:ingredients ?foodoningredient .
        ?recingredient schema:value ?foodoningredientval .
        ?recingredient schema:unitCode ?unit .   
    }}
        """.format(item)
    
    df_interim = sparql_dataframe.get(endpoint, q)
    df_interim.set_index('foodoningredient', inplace = True)
    df_interim.rename(columns={"foodoningredientval": item}, inplace = True)
    df_interim = df_interim.reindex(sorted(df_interim.index))
    d["df_{}".format(item)] = df_interim
    
df_dapro = d[next(iter(d))]
# # df_dapro.drop('unit', axis = 1, inplace = True)
for key in d.keys():
    if d[key]['unit'].equals(d[next(iter(d))]['unit']):
        df_dapro = df_dapro.merge(d[key][d[key].columns[0]], how='outer', left_index= True,
                               right_on = 'foodoningredient' 
                             )
        

# df_dapro.drop('unit', axis = 1, inplace = True)
df_dapro.set_index('unit', append = True, inplace = True)
df_dapro.index = ['_'.join(ind) for ind in df_dapro.index.values]

df_dapro=df_dapro.T

In [8]:
# read in ecological data:

d = {}


endpoint = 'http://localhost:8000'
for item in food_items:
    q = """
    PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
    PREFIX owl: <http://www.w3.org/2002/07/owl#>
    PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
    PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
    PREFIX schema: <http://schema.org/>
    PREFIX dapro: <http://dapro.opendata.zhaw.ch/DaPro.owl#>

    SELECT   ?name1 ?UBP
    WHERE {{

        ?ingredient rdfs:label "ecological scarcity 2013, total, UBP/kg in {}"   .
        ?ingredient rdfs:label ?name1.
        ?ingredient schema:value ?UBP .
        OPTIONAL{{ ?ingredient schema:unitCode ?unit .}}   
    }}
    """.format(item)


    df_interim = sparql_dataframe.get(endpoint, q)
    df_interim.set_index('name1', inplace = True)
    df_interim.rename(columns={"name1": item}, inplace = True)
    df_interim = df_interim.reindex(sorted(df_interim.index))
    d["df_{}".format(item)] = df_interim
    
df_UBP = d[next(iter(d))]
# # df_dapro.drop('unit', axis = 1, inplace = True)
for key in d.keys():
#     if d[key]['unit'].equals(d[next(iter(d))]['unit']):
        df_UBP = pd.concat([df_UBP, d[key][d[key].columns[0]]])
        
# df_dapro.drop('unit', axis = 1, inplace = True)
# df_dapro.set_index('unit', append = True, inplace = True)
# df_dapro.index = ['_'.join(ind) for ind in df_dapro.index.values]

# df_dapro=df_dapro.T

In [9]:
df_UBP.drop('UBP', axis = 1, inplace=True)
df_UBP.dropna(axis = 0, inplace = True)

# calculate for 100g not for 1 kg:
df_UBP = df_UBP/10

In [10]:
def lastWord(string):
  # finding the index of last space
  index = string.rfind(" ")
 
  # last word
  return string[index+1:]

In [11]:
actual_food = []
for index in df_UBP.index:
    actual_food.append(lastWord(index))

df_UBP.index = actual_food

In [12]:
df_UBP.rename(columns = {0: 'ecological scarcity 2013, total, UBP/100g'}, inplace=True)
df_dapro.rename(index = {'Apfel_y': 'Apfel'}, inplace=True)
df_dapro.drop(df_dapro.index[0], axis = 0, inplace = True)

In [13]:
df_dapro = df_dapro.merge(df_UBP,  how = 'left', left_index=True, right_index=True)


In [14]:
df_eco = pd.read_excel('UBP.xlsx', sheet_name = 'others')
df_eco.drop('Category', axis = 1, inplace = True)


In [15]:
df_eco.set_index('Food product', inplace = True)

In [16]:
#set pro 100g instead of 1kg:
df_eco = df_eco/10
df_eco.rename(columns = {
    'Eutrophying emissions per kilogram (gPO₄eq per kilogram)': 'Eutrophying emissions per 100g (gPO₄eq per 100g}', 
    'Freshwater withdrawals per kilogram (liters per kilogram)' : 'Freshwater withdrawals per 100g (liters per 100g)',
    'Land use per kilogram (m² per kilogram)': 'Land use per 100g (m² per 100g)'
}
    , inplace = True)

In [17]:
df_eco.rename(index = {'Groundnuts': 'Erdnuss', 
                      'Apples': 'Apfel', 
                      'Milk': 'Kuhmilch',
                      'Cheese': 'Hartkäse',
                      'Eggs': 'Hühnerei'}, inplace=True)

In [18]:
df_dapro = df_dapro.merge(df_eco, how = 'left', left_index=True, right_index=True )

In [19]:
df_dapro.rename(columns = {'Eiweiß (Protein)_mg/100g': 'Eiweiß (Protein)_g/g'
                          }, inplace=True)

In [20]:
# from googletrans import Translator

# translator = Translator()
# columns_en = list(df_dapro.columns[:20].map(lambda x: translator.translate(x, dest='en').text))
# columns_en.append(list(df_dapro.columns[20:40].map(lambda x: translator.translate(x, dest='en').text)))
# print(len(columns_en))
# columns_en.append(list(df_dapro.columns[40:60].map(lambda x: translator.translate(x, dest='en').text)))
# print(len(columns_en))
# columns_en.append(list(df_dapro.columns[60:80].map(lambda x: translator.translate(x, dest='en').text)))
# print(len(columns_en))
# columns_en.append(list(df_dapro.columns[80:100].map(lambda x: translator.translate(x, dest='en').text)))
# print(len(columns_en))
# columns_en.append(list(df_dapro.columns[100:119].map(lambda x: translator.translate(x, dest='en').text)))



In [21]:
from googletrans import Translator

translator = Translator()
columns_en = []

for col in df_dapro.columns:
    word = translator.translate(str(col), dest='en').text
    columns_en.append(word)

ReadTimeout: The read operation timed out

In [None]:
len(columns_en)

In [None]:
columns_en = columns_en + list(df_dapro.columns[119:])

In [None]:
columns_en

In [None]:
len(columns_en)

In [None]:
df_dapro_en = df_dapro
df_dapro_en.columns = columns_en

In [None]:
df_dapro_en

In [None]:
from googletrans import Translator

translator = Translator()
index_en = []

for index in df_dapro.index:
    word = translator.translate(str(index), dest='en').text
    index_en.append(word)

In [None]:
df_dapro_en.index = index_en

In [None]:
df_dapro_en

In [None]:
df_dapro_en.rename(index = {'linseed': 'Linseed', 
                            'peanut': 'Peanut', 
                            'hazelnut': 'Hazel nut',
                            'sesame': 'Sesame',
                            'Cashewnuss': 'Cashew nut', 
                            'Pine core': 'Pine nut', 
                           'Trackie': 'Pistachio', 
                           'Sunflower core': 'Sunflower seed', 
                           'Hartweizengriess': 'Durum wheat'}, inplace = True)

In [None]:
df_dapro_en.rename(columns = {'Protein (protein) _G/g': 'Protein_g/g', 
                            'Energy (kilojoule) _kj/100g': 'Energy (kilojoule) _kJ/100g', 
                            'Natrium_mg/100g': 'Sodium_mg/100g',
                            'Strength_mg/100g': 'Starch_mg/100g',
}, inplace = True)

In [None]:
df_dapro_en.to_excel('df_dapro_en.xlsx')

In [None]:
# df_dapro.to_excel('df_dapro.xlsx')

## Visualisierungen

In [None]:
def annontations_hor(plots):
    # Place a label for each bar
    for bar in plots.patches:
        # Get X and Y placement of label from rect
        x_value = bar.get_width()
        y_value = bar.get_y() + bar.get_height() / 2

        # Number of points between bar and label; change to your liking
        space = -30
        # Vertical alignment for positive values
        ha = 'left'

        # If value of bar is negative: place label to the left of the bar
        if x_value < 0:
            # Invert space to place label to the left
            space *= -1
            # Horizontally align label to the right
            ha = 'right'

        # Use X value as label and format number
        label = '{:,.0f}'.format(x_value)

        # Create annotation
        plt.annotate(
            label,                      # Use `label` as label
            (x_value, y_value),         # Place label at bar end
            xytext=(space, 0),          # Horizontally shift label by `space`
            textcoords='offset points', # Interpret `xytext` as offset in points
            va='center',                # Vertically center label
            ha=ha,                      # Horizontally align label differently for positive and negative values
            color = 'white',
        fontsize = 25)            # Change label color to white


## scientific colormaps (see http://www.fabiocrameri.ch/visualisation.php)

In [None]:
from colors_cameri import bilbao, tofino, davos, lisbon, oslo

In [None]:
tofino_rgb = [el[1] for el in tofino]

In [None]:
davos_rgb = [el[1] for el in davos]
oslo_rgb = [el[1] for el in oslo]

In [None]:
# colors =  dict(zip(df_food["Category"].unique(), tofino_rgb))

In [None]:
zhaw_color = (0.00000 , 0.39216 , 0.65098)

### Verteilungen bestimmter Spalten
 - alle beliebigen Spalten können gewählt werden werden
 - Zusammenhänge der Spalten werden dargestellt
 - ob Parameter positiv oder negative zu bewerten sind, muss User feststellen

## TODO:
 [ x ] Auswahl aller Spalten

In [None]:
df = df_dapro

In [None]:
df.columns[30:]

In [None]:
df[['Eiweiß (Protein)_mg/100g', 'Energie (Kilokalorien)_kcal/100g', 'Energie (Kilojoule)_kJ/100g']]

In [None]:
# chose the dimensions to display

# x = 'Vitamin B12-Cobalamin_μg/100g'
x = 'Einfach ungesättigte Fettsäuren_mg/100g'
#  'Wasserunlösliche Ballaststoffe_mg/100g'
size = 'Vitamin B12-Cobalamin_μg/100g'
y = 'Eiweiß (Protein)_mg/100g'
# y = 'Eiweiß (Protein)_mg/100g'


fig = px.scatter(df, 
                y=y,
                   size= size,
                 x = x,
                 color = df.index, 
           hover_name=df.index, 
                 size_max=60,
         color_discrete_sequence = oslo_rgb[:9],
                 height = 750, 
                 title = 'Distribution of the food products in the database in regard to the chosen parameters.<br>Two parameters are shown on the x- and y- axis respectively, the size of the bubbles show the parameter<br>"{}".<br>By hovering over the bubbles the numbers are shown.'.format(size)
                )
fig.update_layout(
                          margin={'t': 200})
fig.show()

In [None]:
# chose the dimensions to display

x = 'Vitamin C-Ascorbinsäure_μg/100g'
size = 'Zucker (gesamt)_mg/100g'
y = 'Eiweiß (Protein)_mg/100g'


fig = px.scatter(df, 
                y=y,
                   size= size,
                 x = x,
                 color = df.index, 
           hover_name=df.index, 
                 size_max=60,
         color_discrete_sequence = oslo_rgb,
                 title = 'Distribution of the food products in the database in regard to the chosen parameters.'
                )
fig.show()

In [None]:
# plt.figure(figsize=(8, 4))
# sns.scatterplot(df_food, x = x, y = y, size = size, hue = 'Category', palette=cm.oslo.colors, legend='brief')
# plt.legend(loc=(1.04, 0))

# sns.despine(left=True, bottom=True)

### Anzahl pro Kategorie
 - zur Übersicht ob etwas überrepräsentiert ist
 - Aggregation, keine negative oder positive Aussage, keine Referenzwerte nötig
  - Annotation/Farbe ja/nein

## TODO:
### kann ich das besser zur Übersicht verwenden? 
### Clickable machen?
### Treemap

In [None]:
fig = px.sunburst(df, path=[df.index, df.Category], color= 'Eiweiß (Protein)_mg/100g'
                  , color_discrete_sequence = (oslo_rgb + oslo_rgb*2), 
                  title = 'Categories in the database (inner circle) and corresponding food products' 
                  '(outer circle). <br>Click on one category to zoom in. To go back, click on the category again.'
                 )
                
fig.show()

In [None]:
# # chose a parameter to display

# list = ['Category', 'Allergens', 'FurProc', 'NutritionalForm']

# choice = 'Allergens'

# plt.figure(figsize=(8, 4))
# sns.countplot(df_food, x= choice , color= zhaw_color, order=df[choice].value_counts().index)
# sns.despine(left=True, bottom=True)
# plt.title('Count of food products in the database in regard to the chosen parameter.')


### Anzahl missing values
 - Nur Anzahl pro Parameter oder Datensatz oder beides
 - keine Referenzwerte
 - fehlende Werte negativ notiert (dunkles rot/schwarz)
     - Farben für Konsistenz beibehalten oder rot/schwarz?

## TODO:
[ X ] paar Produkte auswählen 

In [None]:
# df_food.set_index('Food product', inplace=True)

In [None]:
# choice = ['Rice', 'Potatoes', 'Milk']
choice = df.columns[:]

df_plot = df[choice].isna()

plt.figure(figsize = (33, 10))

sns.heatmap(df_plot, cbar = False, cmap = sns.blend_palette(cm.oslo.colors, n_colors=6))
plt.tick_params(axis='both', which='major', 
                labelsize=10, labelbottom = False, bottom=False, top = False, labeltop=True)
plt.xticks(rotation = 90)
plt.title('Count of unknown values in the database. Dark color signifies known values, bright color signifies unknown value.')
plt.show()

In [None]:
df_plot

In [None]:
import plotly

choice = df.columns[:]

df_plot = df[choice].isna()

title_text = 'Count of unknown values in the database. Dark color signifies <br> known values, bright color signifies unknown value.'

plt.figure(figsize=(20, 20))
fig = px.imshow(df_plot, text_auto=False, aspect="auto", width=2000,height=800, 
    color_continuous_scale=oslo_rgb
               )
fig.update_xaxes(side = "top")
fig.update_layout(title_text=title_text,title_y = 0.95)
fig.show()

In [None]:
import plotly

choice = df.columns[50:65]

size = len(choice)

df_plot = df[choice]>0

title_text = 'Count of zero values in the database. Dark color signifies zero values,<br>bright color signifies values bigger than zero.'

plt.figure(figsize=(20, 20))
fig = px.imshow(df_plot, text_auto=False, aspect="auto", width=size*50,height=500, 
    color_continuous_scale=oslo_rgb
               )
fig.update_xaxes(side = "top")
fig.update_layout(title_text=title_text,title_y = 0.95,
         margin={'t': 200})

fig.update_coloraxes(showscale=False)
fig.show()

In [None]:
df[choice]>0

### distribution of a certain parameter
 - Verteilung der Daten, was sind typische hohe/niedrige Werte?
 - Skala kann eine Rolle spielen, wenn kleine und große Wertebereiche bei den Parametern vorhanden sind (Kalorien vs Asche)
 - trotzdem keine Referenzwerte verwendet, tatsächlicher Wertebereich soll abgebildet werden
 - keine negative /Positive Bewertung der Verteilung

## TODO:
### nicht verständlich, so darstellen, dass Werte über 0, vielleicht Balken
### Sinnvolle Beschreibung

In [None]:
df_dapro[df_dapro.columns[10:14]]

In [None]:
# # column = ['Eiweiß (Protein)']
# column = df_dapro.columns[10:14]
# # column = df_food.columns[23:27]

# plt.figure(figsize=(17, 10))
# sns.histplot(df_dapro[column], palette = sns.blend_palette(cm.oslo.colors, n_colors=6), multiple='dodge' )
# # plt.yticks(fontsize=20)
# # plt.xticks(fontsize=20)
# sns.despine(left=True, bottom=True)

In [None]:
# column = ['calories [kcal]', 'EuEmkg']

# plt.figure(figsize=(17, 10))
# sns.kdeplot(df_food[column], palette = cm.davos.colors)
# # plt.yticks(fontsize=20)
# # plt.xticks(fontsize=20)
# sns.despine(left=True, bottom=True)

### Bubbles with data
Nur Anzahl der Datenquellen, Datensätze, Parameter


## TODO:
### mit Übersicht der Datenquellen kombinieren

In [None]:
import numpy as np
import matplotlib.pyplot as plt

db_characterise = {
    'dimensions': ['food products: {}'.format(len(df_dapro.index)), 
                   'parameter : {}'.format(len(df_dapro.columns)), 'data sources: {}'.format(5)],
    'count': [len(df_dapro.index), len(df_dapro.columns), 2],
    'color': (sns.blend_palette(cm.oslo.colors, n_colors=5)[-4:])
}


class BubbleChart:
    def __init__(self, area, bubble_spacing=0):
        """
        Setup for bubble collapse.

        Parameters
        ----------
        area : array-like
            Area of the bubbles.
        bubble_spacing : float, default: 0
            Minimal spacing between bubbles after collapsing.

        Notes
        -----
        If "area" is sorted, the results might look weird.
        """
        area = np.asarray(area)
        r = np.sqrt(area / np.pi)

        self.bubble_spacing = bubble_spacing
        self.bubbles = np.ones((len(area), 4))
        self.bubbles[:, 2] = r
        self.bubbles[:, 3] = area
        self.maxstep = 2 * self.bubbles[:, 2].max() + self.bubble_spacing
        self.step_dist = self.maxstep / 2

        # calculate initial grid layout for bubbles
        length = np.ceil(np.sqrt(len(self.bubbles)))
        grid = np.arange(length) * self.maxstep
        gx, gy = np.meshgrid(grid, grid)
        self.bubbles[:, 0] = gx.flatten()[:len(self.bubbles)]
        self.bubbles[:, 1] = gy.flatten()[:len(self.bubbles)]

        self.com = self.center_of_mass()

    def center_of_mass(self):
        return np.average(
            self.bubbles[:, :2], axis=0, weights=self.bubbles[:, 3]
        )

    def center_distance(self, bubble, bubbles):
        return np.hypot(bubble[0] - bubbles[:, 0],
                        bubble[1] - bubbles[:, 1])

    def outline_distance(self, bubble, bubbles):
        center_distance = self.center_distance(bubble, bubbles)
        return center_distance - bubble[2] - \
            bubbles[:, 2] - self.bubble_spacing

    def check_collisions(self, bubble, bubbles):
        distance = self.outline_distance(bubble, bubbles)
        return len(distance[distance < 0])

    def collides_with(self, bubble, bubbles):
        distance = self.outline_distance(bubble, bubbles)
        idx_min = np.argmin(distance)
        return idx_min if type(idx_min) == np.ndarray else [idx_min]

    def collapse(self, n_iterations=50):
        """
        Move bubbles to the center of mass.

        Parameters
        ----------
        n_iterations : int, default: 50
            Number of moves to perform.
        """
        for _i in range(n_iterations):
            moves = 0
            for i in range(len(self.bubbles)):
                rest_bub = np.delete(self.bubbles, i, 0)
                # try to move directly towards the center of mass
                # direction vector from bubble to the center of mass
                dir_vec = self.com - self.bubbles[i, :2]

                # shorten direction vector to have length of 1
                dir_vec = dir_vec / np.sqrt(dir_vec.dot(dir_vec))

                # calculate new bubble position
                new_point = self.bubbles[i, :2] + dir_vec * self.step_dist
                new_bubble = np.append(new_point, self.bubbles[i, 2:4])

                # check whether new bubble collides with other bubbles
                if not self.check_collisions(new_bubble, rest_bub):
                    self.bubbles[i, :] = new_bubble
                    self.com = self.center_of_mass()
                    moves += 1
                else:
                    # try to move around a bubble that you collide with
                    # find colliding bubble
                    for colliding in self.collides_with(new_bubble, rest_bub):
                        # calculate direction vector
                        dir_vec = rest_bub[colliding, :2] - self.bubbles[i, :2]
                        dir_vec = dir_vec / np.sqrt(dir_vec.dot(dir_vec))
                        # calculate orthogonal vector
                        orth = np.array([dir_vec[1], -dir_vec[0]])
                        # test which direction to go
                        new_point1 = (self.bubbles[i, :2] + orth *
                                      self.step_dist)
                        new_point2 = (self.bubbles[i, :2] - orth *
                                      self.step_dist)
                        dist1 = self.center_distance(
                            self.com, np.array([new_point1]))
                        dist2 = self.center_distance(
                            self.com, np.array([new_point2]))
                        new_point = new_point1 if dist1 < dist2 else new_point2
                        new_bubble = np.append(new_point, self.bubbles[i, 2:4])
                        if not self.check_collisions(new_bubble, rest_bub):
                            self.bubbles[i, :] = new_bubble
                            self.com = self.center_of_mass()

            if moves / len(self.bubbles) < 0.1:
                self.step_dist = self.step_dist / 2

    def plot(self, ax, labels, colors):
        """
        Draw the bubble plot.

        Parameters
        ----------
        ax : matplotlib.axes.Axes
        labels : list
            Labels of the bubbles.
        colors : list
            Colors of the bubbles.
        """
        for i in range(len(self.bubbles)):
            circ = plt.Circle(
                self.bubbles[i, :2], self.bubbles[i, 2], color=colors[i])
            ax.add_patch(circ)
            ax.text(*self.bubbles[i, :2], labels[i],
                    horizontalalignment='center', verticalalignment='center', 
                   color = 'black', fontsize = 18)


bubble_chart = BubbleChart(area=db_characterise['count'],
                           bubble_spacing=0.1)

bubble_chart.collapse()

fig, ax = plt.subplots(subplot_kw=dict(aspect="equal"))
bubble_chart.plot(
    ax, db_characterise['dimensions'], db_characterise['color'])
ax.axis("off")
ax.relim()
ax.autoscale_view()
ax.set_title('Characteristics of the DaPro database: Count of food products, parameters and data sources')
ax.set_xticklabels(db_characterise['count'])

plt.show()

In [None]:
# !pip install pyvis

from pyvis.network import Network

net = Network(notebook=True, directed = True,
              heading = 'Characteristics of the DaPro database: Count of food products, parameters and data sources')

count_sources = 8

db_characterise = {
    'dimensions': ['food products: {}'.format(len(df_dapro.index)), 
                   'parameter : {}'.format(len(df_dapro.columns)), 'data sources: {}'.format(count_sources)],
    'count': [len(df_dapro.index), len(df_dapro.columns), 2],
    'color': oslo_rgb[1: 1+len(db_characterise['dimensions'])]
}


net.add_nodes(range(len(db_characterise['dimensions'])), 
              label= db_characterise['dimensions'],
              
              size=[ len(df_dapro.index), len(df_dapro.columns), count_sources],
              
              color=db_characterise['color'])



net.toggle_physics(True)
net.show('bubbles.html')

#### Add databsse structure and sources

In [None]:
# !pip install pyvis

from pyvis.network import Network

net = Network(notebook=True, directed = True,
                heading = 'Structure of the database sources: international food databases are collected in the Swiss Food Data Mediator and directed to DaPro.')



net.add_nodes(range(count_sources+2), 
              label=['DaPro', 'Swiss Food Data Mediator', 'USDA FoodData Central', 'BLSDB', 'Schweizer Nährwertdatenbank', 'FOODON', 
                     'ecoinvent', 'IUNR-DB', 'Recipes', 'Scientific Studies'],
      
              
              title=[ 'https://dapro.ulozezoz.myhostpoint.ch/webvowl/#', 'Link Swiss Food Data Mediator', 
                    'https://fdc.nal.usda.gov/','https://www.blsdb.de/', 
                     'https://naehrwertdaten.ch/de/', 'https://foodon.org/', 
                     'https://ecoinvent.org/', 'https://www.zhaw.ch/en/lsfm/institutes-centres/iunr/', 
                    '?', '?'],
              
              color=[oslo_rgb[5], 
                      oslo_rgb[3], 
                     oslo_rgb[1], oslo_rgb[1], oslo_rgb[1], oslo_rgb[1], oslo_rgb[1], oslo_rgb[1], 
                     oslo_rgb[9], oslo_rgb[9]],
             )

net.add_edges([(1,0,4), (2, 1,1),  (3, 1,1), (4, 1,1), (5, 1,1), (6, 1,1), (7, 1,1), (8, 1,1), (9, 1,1)])

net.toggle_physics(True)
net.show('mygraph.html')