# Analyse de données avec Pandas et Plotly

## Librairies

In [None]:
import pandas as pd
import plotly.express as px
%matplotlib inline

pd.options.plotting.backend = "plotly"

import json
colors=json.load(open('color_code.json'))

## Données

In [124]:
df = px.data.tips()
print(df.head())


   total_bill   tip     sex smoker  day    time  size
0       16.99  1.01  Female     No  Sun  Dinner     2
1       10.34  1.66    Male     No  Sun  Dinner     3
2       21.01  3.50    Male     No  Sun  Dinner     3
3       23.68  3.31    Male     No  Sun  Dinner     2
4       24.59  3.61  Female     No  Sun  Dinner     4


## Analyse Univariée

### Variable discrète

In [125]:
df["sex"].value_counts(normalize=True, sort=True)

sex
Male      0.643443
Female    0.356557
Name: proportion, dtype: float64

In [175]:

fig = px.histogram(df, x="sex")
fig.update_traces(marker_color=[colors["BLUE7"], colors["BLUE1"]], marker_line_color=colors["GRAY3"], marker_line_width=1.5)
fig.update_layout(
    template="plotly_white",
    title=dict(
        text="<b>Distribution des pourboires par genre</b>",
        font_size=30,
        ),
    margin=dict(
        l=50,
        r=50,
        b=100,
        t=200
    ),
    width=900,
    height=700,
    showlegend=False)
fig.show()

In [177]:
fig = px.histogram(df, x="tip", nbins=50, marginal="box")
fig.update_traces(marker_color=colors["BLUE1"], marker_line_color=colors["GRAY3"], marker_line_width=1.5)
fig.update_layout(
    template="plotly_white",
    title=dict(
        text="<b>Distribution des pourboires</b>",
        font_size=30,
        ),
    margin=dict(
        l=50,
        r=50,
        b=100,
        t=200
    ),
    width=900,
    height=700,
    showlegend=False)
fig.show()

# Analyse Multivariée 

### Discret / Discret

In [None]:
pd.crosstab(df["size"], df["day"], normalize=False)

day,Thur,Fri,Sat,Sun
size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,1,1,2,0
2,48,16,53,39
3,4,1,18,15
4,5,1,13,18
5,1,0,1,3
6,3,0,0,1


In [140]:
order = ["Thur", "Fri", "Sat", "Sun"]
fig = px.imshow(pd.crosstab(df["size"], df["day"])[order], text_auto=True, aspect="auto", color_continuous_scale="Blues")
fig.update_layout(
    template="plotly_white",
    title=dict(
        text="<b>Répartition des pourboires par jour</b>",
        font_size=30,
        ),
    margin=dict(
        l=50,
        r=50,
        b=100,
        t=200
    ),
    width=900,
    height=700,
    showlegend=False)
fig.show()

### Discret / Continu

In [None]:
df.groupby("sex")["tip"].describe()

sex
Female    2.833448
Male      3.089618
Name: tip, dtype: float64

In [178]:
fig = px.bar(df.groupby("sex")["tip"].mean())
fig.update_traces(marker_color=[colors["BLUE7"], colors["BLUE1"]], marker_line_color=colors["GRAY3"], marker_line_width=1.5)
fig.update_layout(
    template="plotly_white",
    title=dict(
        text="<b>Moyenne des pourboires par genre</b>",
        font_size=30,
        ),
    margin=dict(
        l=50,
        r=50,
        b=100,
        t=200
    ),
    width=900,
    height=700,
    showlegend=False)
fig.show()

In [171]:
fig = px.histogram(df, x="tip", color="sex", nbins=50, marginal="box", color_discrete_map=
                   {"Female": colors["BLUE7"],
                    "Male": colors["BLUE1"]
                    })
fig.update_traces(marker_line_color=GRAY3, marker_line_width=1.5)
fig.update_layout(
    template="plotly_white",
    title=dict(
        text="<b>Distribution des pourboires</b>",
        font_size=30,
        ),
    margin=dict(
        l=50,
        r=50,
        b=100,
        t=200
    ),
    width=900,
    height=700,
    showlegend=True)
fig.show()

In [182]:
fig = px.scatter(df, x="tip", y="total_bill", color="sex", color_discrete_map=
                   {"Female": colors["ORANGE1"],
                    "Male": colors["BLUE1"]
                    })
fig.update_traces(marker_line_color=colors["GRAY3"], marker_line_width=1.5)
fig.update_layout(
    template="plotly_white",
    title=dict(
        text="<b>Distribution des pourboires par genre</b><br><sup>en fonction du montant total de l'addition</sup>",
        font_size=30,
        ),
    margin=dict(
        l=50,
        r=50,
        b=100,
        t=200
    ),
    width=900,
    height=700,
    showlegend=True)
fig.show()