# Import des outils / jeu de données

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

In [None]:
np.random.seed(0)
sns.set_theme()

In [None]:
df = pd.read_csv(
    "data/data-cleaned-feature-engineering.csv",
    sep=",",
    index_col="ID",
    parse_dates=True,
)

In [None]:
df.head()

## Variables globales

In [None]:
var_numeriques = [
    "Year_Birth",
    "Income",
    "Recency",
    "MntWines",
    "MntFruits",
    "MntMeatProducts",
    "MntFishProducts",
    "MntSweetProducts",
    "MntGoldProds",
    "NumDealsPurchases",
    "NumWebPurchases",
    "NumCatalogPurchases",
    "NumStorePurchases",
    "NumWebVisitsMonth",
]

In [None]:
var_categoriques = [
    "Education",
    "Marital_Status",
    "Kidhome",
    "Teenhome",
    "AcceptedCmp1",
    "AcceptedCmp2",
    "AcceptedCmp3",
    "AcceptedCmp4",
    "AcceptedCmp5",
    "Response",
]

In [None]:
df[var_categoriques] = df[var_categoriques].astype(str).astype("category")

# Visualisation

## En fonction de Response (variable cible catégorique)

In [None]:
for var in var_numeriques:
    _, ax = plt.subplots(1, 2, figsize=(8, 2))
    sns.boxplot(df, x=df[var], y=df["Response"], width=0.25, ax=ax[0])
    sns.histplot(
        df,
        x=df[var],
        kde=True,
        ax=ax[1],
        hue=df["Response"],
        stat="probability",
        common_norm=False,
    )
    plt.show()

## En fonction de NumStorePurchases (variable cible continue)

In [None]:
for var in var_numeriques:
    sns.relplot(df, x=df[var], y=df["NumStorePurchases"])
    plt.show()

In [None]:
# todo: renommer cette partie ? / mettre un texte introductif pour expliquer la démarche ("Une fois les données nettoyées, on les explore de nouveau, plus en profondeur")

In [None]:
# todo: organiser en plusieurs sous-parties

In [None]:
# todo: choisir les représentations les plus pertinentes à montrer ci-dessous

In [None]:
sns.histplot(
    df,
    x="Income",
    hue="NbAcceptedCampaigns",
    kde=True,
    stat="density",
    common_norm=False,
)

In [None]:
sns.histplot(
    df,
    x="Income",
    hue="NbAcceptedCampaigns",
    kde=True,
    stat="probability",
    common_norm=False,
)

In [None]:
sns.displot(df, x="Income", hue="NbAcceptedCampaigns", kind="kde", common_norm=False)

In [None]:
# todo: faire la même chose en boxplots !

In [None]:
sns.histplot(df, x="Income", hue="HasAcceptedCampaigns", kde=True)

In [None]:
sns.histplot(df, x="Income", hue="Response", kde=True)

### Année naissance (test)

In [None]:
sns.histplot(df, x="Year_Birth", hue="Response", kde=True)

### todo

In [None]:
sns.histplot(
    data=df, x="Education", hue="HasAcceptedCampaigns", multiple="dodge", shrink=0.8
)

In [None]:
sns.histplot(
    data=df,
    x="NbChildren",
    hue="Response",
    multiple="dodge",
    discrete=True,
    shrink=0.4,
)

In [None]:
sns.histplot(
    data=df,
    x="NbChildren",
    hue="HasAcceptedCampaigns",
    multiple="dodge",
    discrete=True,
    shrink=0.4,
)

In [None]:
plt.figure(figsize=(12, 12))
sns.heatmap(
    df.corr()[df.corr().abs() > 0.5],
    annot=True,
    cmap="BrBG",
    linewidths=0.5,
    vmax=1,
    vmin=-1,
)

## todo

In [None]:
sns.scatterplot(data=df, x="Income", y="MntMeatProducts", hue="Marital_Status")

In [None]:
sns.scatterplot(data=df, x="Income", y="MntWines")

In [None]:
sns.histplot(data=df, x="Education", hue="Response", multiple="dodge", shrink=0.8)

In [None]:
sns.displot(
    data=df,
    x="Marital_Status",
    hue="Response",
    col="Kidhome",
    multiple="dodge",
    shrink=0.8,
)