In [None]:
flex_subtitle = "built using jupyter-flex"
flex_external_link = "https://github.com/STEVENGOBAMI/openfoods/blob/main/P03_01_dashboard.ipynb"

flex_title = "DASHBOARD"
flex_orientation = "rows"

# ANALYSE UNIVARIEE

## Row 1

### Top 20 des contributeurs

In [None]:
# Importation des différentes librairies

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.figure_factory as ff
import statsmodels.api as sm
from sklearn import decomposition, preprocessing

In [None]:
data_fr = pd.read_csv("data_fr.csv",sep=",",low_memory=False)

In [None]:
data_fr.shape

In [None]:
fig = px.bar(data_fr['creator'].value_counts().head(20).sort_values(), orientation ="h", text = "value")

fig.update_traces(texttemplate='%{text:.2s}', textposition='outside')

fig.update_layout(
    title_text="Top 20 des contributeurs",
    width=950,
    height=600,
    showlegend = False,
    yaxis_title="Contributeurs",
    xaxis_title=" "
)
fig.show()

### Part de chaque Nutriscore

In [None]:
positive_columns = [
        "energy_100g", "proteins_100g", "salt_100g", "sodium_100g",
        "sugars_100g", "saturated-fat_100g", "additives_n", "fat_100g", 
         "carbohydrates_100g", "fiber_100g", "fruits-vegetables-nuts_100g",
]

In [None]:
nutri_fact_100g = ["proteins_100g", "salt_100g", "sodium_100g", "sugars_100g", "saturated-fat_100g", 
                "fat_100g", "carbohydrates_100g", "fiber_100g", "fruits-vegetables-nuts_100g"]

In [None]:
fig = go.Figure(go.Pie(
    labels=data_fr['nutrition_grade_fr'].value_counts().index, 
    values=data_fr['nutrition_grade_fr'].value_counts().values,
    textinfo = "label+percent", hole = 0.3)
)

fig.update_traces(marker=dict(colors=['#ff7f00', 'yellow', '#ff4500', 'green', '#82c46c']))

fig.update_layout(
    title_text=" ",
    showlegend = False,
    width=500,
    height=500)
    #annotations=[dict(text="Top 10 et Autres", x=0.15, y=0.5, font_size=15, showarrow=False)])


fig.show()

# Les produits notés d sont les plus représentés et ceux notés b sont les moins représentés

# ANALYSE UNIVARIEE 2

## Row 1

### Top 20 et les Autres

In [None]:
data_fr.loc[:,'top_category'] = data_fr['main_category_fr'].where(
    data_fr['main_category_fr'].isna() | data_fr['main_category_fr'].isin(data_fr['main_category_fr'].value_counts().index[:20]), 
    other='Autres', 
)

fig = go.Figure(go.Pie(
    labels=data_fr['top_category'].value_counts().index, 
    values=data_fr['top_category'].value_counts().values, 
    textinfo = "label+percent",
    textposition="inside",
    pull=[0.05 if cat == 'Autres' else 0 for cat in data_fr['top_category'].value_counts().index])
)

fig.update_layout(
    #title_text="Top 20 et les Autres",
    showlegend = False,
    width=600,
    height=600)

fig.show()

### Répartition du Top 20

In [None]:
fig = go.Figure(go.Pie(
    labels=data_fr['top_category'].value_counts().index[1:], 
    values=data_fr['top_category'].value_counts().values[1:], 
    textinfo = "label+percent",
    textposition="inside",)
)

fig.update_traces(hole=.3)

fig.update_layout(
    #title_text="Répartition du Top 20",
    showlegend = False,
    width=600,
    height=600)

fig.show()

# ANALYSE MULTIVARIEE

## Row 1

### Matrice des corrélations

In [None]:
numeric_columns = [
        "additives_n", "energy_100g", "proteins_100g", "fat_100g", "salt_100g", "sodium_100g", 
        "sugars_100g", "saturated-fat_100g", "carbohydrates_100g",  "fiber_100g",
        "nutrition-score-fr_100g", "nutrition-score-uk_100g", "fruits-vegetables-nuts_100g"
]

In [None]:
new_numeric_columns = [
        "additives_n", "energy_100g", "saturated-fat_100g", "sugars_100g",
        "fiber_100g", "proteins_100g", "salt_100g", "fruits-vegetables-nuts_100g", 
        "nutrition-score-fr_100g"
]

In [None]:
corr = data_fr[numeric_columns].corr().round(2)

fig = px.imshow(corr.where(np.tril(np.ones(corr.shape), -1).astype(bool)),
    #title="Corrélation de Pearson",
    width=1000,
    height=600
)

    
fig.show()

### Boxplot Nutriscore par nutriment

In [None]:
filtered_data = data_fr[data_fr[["nutrition_grade_fr","nutrition-score-fr_100g"]].notnull().all(1)]

In [None]:
def boxtrace(df, n_cols=3, x="nutrition_grade_fr"):
    n_rows = -(-len(new_numeric_columns) // n_cols)
    row_pos, col_pos = 1, 0
    fig = make_subplots(rows=n_rows, cols=n_cols, subplot_titles=new_numeric_columns)

    for col in new_numeric_columns:
        trace = px.box(filtered_data, x=x, y=col,  color="nutrition_grade_fr",
        category_orders={ "nutrition_grade_fr": ["a", "b", "c", "d", "e"]},
        color_discrete_map={"a":"green", "b":"#82c46c", "c":"yellow", "d":"#ff7f00", "e":"#ff4500"})["data"]
        
        if col_pos == n_cols: row_pos += 1
        col_pos = col_pos + 1 if (col_pos < n_cols) else 1
        fig.add_trace(trace[0], row=row_pos, col=col_pos)
        fig.add_trace(trace[1], row=row_pos, col=col_pos)
        fig.add_trace(trace[2], row=row_pos, col=col_pos)
        fig.add_trace(trace[3], row=row_pos, col=col_pos)
        fig.add_trace(trace[4], row=row_pos, col=col_pos)

        
    return fig

In [None]:
fig = boxtrace(filtered_data)

fig.update_layout(width=1000,
                   height=600, 
                   #title="Variation du Nutriscore en fonction des nutriments",
                   title_x=0.5,
                   showlegend = False
                  )

fig.show()