# Import des outils / jeu de données

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

In [None]:
np.random.seed(0)
sns.set_theme()

In [None]:
df = pd.read_csv(
    "data/data-cleaned-feature-engineering.csv",
    sep=",",
    index_col="ID",
    parse_dates=True,
)

In [None]:
df.head()

## Variables globales

In [None]:
var_numeriques = [
    "Year_Birth",
    "Income",
    "Recency",
    "MntWines",
    "MntFruits",
    "MntMeatProducts",
    "MntFishProducts",
    "MntSweetProducts",
    "MntGoldProds",
    "NumDealsPurchases",
    "NumWebPurchases",
    "NumCatalogPurchases",
    "NumStorePurchases",
    "NumWebVisitsMonth",
]

In [None]:
var_categoriques = [
    "Education",
    "Marital_Status",
    "Kidhome",
    "Teenhome",
    "AcceptedCmp1",
    "AcceptedCmp2",
    "AcceptedCmp3",
    "AcceptedCmp4",
    "AcceptedCmp5",
    "Response",
]

var_categoriques_extra = ["NbAcceptedCampaigns", "HasAcceptedCampaigns", "NbChildren"]

var_categoriques_fe = (
    var_categoriques + var_categoriques_extra
)  # todo: sauvegarder toutes ces listes dans un pd.Series et les lire

In [None]:
df[var_categoriques_fe] = df[var_categoriques_fe].astype(str).astype("category")

# Visualisation

# Relation avec la variable cible (Response)

In [None]:
for var in var_numeriques:
    _, ax = plt.subplots(1, 2, figsize=(10, 3))
    sns.boxplot(df, x=df[var], y=df["Response"], width=0.25, ax=ax[0])
    sns.histplot(
        df,
        x=df[var],
        kde=True,
        ax=ax[1],
        hue=df["Response"],
        stat="probability",
        common_norm=False,
    )
    plt.show()

In [None]:
for var in var_categoriques_fe:
    _, ax = plt.subplots(1, 2, figsize=(10, 3))

    sns.histplot(
        df,
        x=df[var],
        hue=df["Response"],
        multiple="dodge",
        shrink=0.5,
        ax=ax[0],
    )
    sns.histplot(
        df,
        hue=df[var],
        x=df["Response"],
        multiple="dodge",
        shrink=0.5,
        ax=ax[1],
    )

    plt.show()

# Graphiques supplémentaires

In [None]:
_, ax = plt.subplots(1, 2, figsize=(12, 4))

ax[0].set_title("Revenu en fonction du nombre de campagnes acceptées")

sns.boxplot(y=df["Income"], x=df["NbAcceptedCampaigns"], ax=ax[0])
sns.histplot(
    df,
    x="Income",
    hue="NbAcceptedCampaigns",
    kde=True,
    stat="probability",
    common_norm=False,
    ax=ax[1],
)

In [None]:
_, ax = plt.subplots(1, 2, figsize=(12, 4))

ax[0].set_title("Revenu en fonction du nombre de campagnes acceptées")

sns.boxplot(y=df["Income"], x=df["HasAcceptedCampaigns"], ax=ax[0])
sns.histplot(
    df,
    x="Income",
    hue="HasAcceptedCampaigns",
    kde=True,
    stat="probability",
    common_norm=False,
    ax=ax[1],
)

### todo

In [None]:
sns.histplot(
    data=df, x="Education", hue="HasAcceptedCampaigns", multiple="dodge", shrink=0.8
)

In [None]:
sns.histplot(
    data=df,
    x="NbChildren",
    hue="Response",
    multiple="dodge",
    discrete=True,
    shrink=0.4,
)

In [None]:
sns.histplot(
    data=df,
    x="NbChildren",
    hue="HasAcceptedCampaigns",
    multiple="dodge",
    discrete=True,
    shrink=0.4,
)

## Matrice de corrélation

In [None]:
plt.figure(figsize=(12, 12))
sns.heatmap(
    df.corr()[df.corr().abs() > 0.5],
    annot=True,
    cmap="BrBG",
    linewidths=0.5,
    vmax=1,
    vmin=-1,
)

## todo

In [None]:
# Exemple de graphique sur plusieurs colonnes
# sns.displot(
#     data=df,
#     x="Marital_Status",
#     hue="Response",
#     col="Kidhome",
#     multiple="dodge",
#     shrink=0.8,
# )