In [2]:
import pandas as pd
import numpy as np
import json

from sklearn.decomposition import PCA, FactorAnalysis
from sklearn.preprocessing import StandardScaler
from scipy.stats import pearsonr
import matplotlib.pyplot as plt
import pingouin as pg

import ipywidgets as widgets

import warnings
warnings.filterwarnings('ignore')

np.set_printoptions(precision=3)
pd.set_option("display.precision", 4)
pd.set_option('display.max_columns', None)

df = pd.read_excel("QuantiCovid_isolationForestFR.xlsx", sheet_name='data_raw')

In [3]:
def FactorAnalysis_dashboard( key):
    factor = ["componant_Vol_ecologie","componant_Vol_economie","componant_Trend_Local","Componant_Trend_Prix","Componant_Trend_secu","Componant_confiance_marque","Componant_confiance_comp","Componant_confiance_benev","Componant_confiance_integ","Componant_image_marque","Componant_image_val","Componant_reconditionnement","Componant_image_RSE","Componant_changement_internet","Componant_confiance","Componant_image"]
    value_selection = [["accord_achat_ecolo_postconf" , "intention_ecologie_postconf"],["accord_achat_econome_postconf" , "intention_economie_postconf" , "intention_economie2_postconf"],
         ["TRENDS_LOC_1" , "TRENDS_LOC_2" , "TRENDS_LOC_3"],["TRENDS_PRIX_1" , "TRENDS_PRIX_2" , "TRENDS_PRIX_3"],["TRENDS_SECU_1" , "TRENDS_SECU_2" , "TRENDS_SECU_3" , "TRENDS_SECU_4"],
        ["confiance_distrib_nAlim" , "confiance_distrib_alim" , "confiance_distrib" , "confiance_brico"],["CONF_COMP_1" , "CONF_COMP_2" , "CONF_COMP_3"],["CONF_BENEV_1" , "CONF_BENEV_2" , "CONF_BENEV_3"],["CONF_INTEG_1" , "CONF_INTEG_2" ,  "CONF_INTEG_3" , "CONF_INTEG_4"],["image_distrib_nAlim" , "image_distrib_alim" , "image_distrib" , "image_brico"],["IMG_VAL_1" , "IMG_VAL_2" , "IMG_VAL_3" , "IMG_VAL_4" ],
         ["raison_achat_reconditionner_postconf" , "achat_magasin_reconditionne_postconf" , "achat_enligne_reconditionne_postconf"], ["IMG_RSE_1" , "IMG_RSE_2" , "IMG_RSE_3" , "IMG_RSE_4" , "IMG_RSE_5"], ["habitude_InternetToPointvente_postconf" , "habitude_InternetToDrive_postconf" , "habitude_InternetTolocker_postconf"],["CONF_COMP_1", "CONF_COMP_2" , "CONF_COMP_3", "CONF_BENEV_1",  "CONF_BENEV_2",  "CONF_BENEV_3", "CONF_INTEG_1", "CONF_INTEG_2",  "CONF_INTEG_3", "CONF_INTEG_4", "confiance_distrib_nAlim", "confiance_distrib_alim", "confiance_distrib", "confiance_brico"],["IMG_VAL_1", "IMG_VAL_2", "IMG_VAL_3",  "IMG_VAL_4" , "IMG_RSE_1" , "IMG_RSE_2", "IMG_RSE_3", "IMG_RSE_4", "IMG_RSE_5", "image_distrib_nAlim", "image_distrib_alim", "image_distrib", "image_brico" ]]
    key = value_selection[factor.index(key)]

    print("alpha de crombach : {}".format(pg.cronbach_alpha(data=df[key], ci=.95)))

    X = StandardScaler().fit_transform(df[key])
    feature_names = key

    n_comps = 1

    methods = [
        ("PCA", PCA()),
        ("Unrotated FA", FactorAnalysis()),
        ("Varimax FA", FactorAnalysis(rotation="varimax")),
    ]

    fig, axes = plt.subplots(ncols=len(methods), figsize=(10, 8))

    for ax, (method, fa) in zip(axes, methods):
        fa.set_params(n_components=n_comps)
        fa.fit(X)

        components = fa.components_.T
        print("\n\n %s :\n" % method)

        fa_loadings = fa.components_.T    # loadings

        # variance explained
        total_var = X.var(axis=0).sum()  # total variance of original variables,
                                            # equal to no. of vars if they are standardized

        var_exp = np.sum(fa_loadings**2, axis=0)
        prop_var_exp = var_exp/total_var
        cum_prop_var_exp = np.cumsum(var_exp/total_var)

        print(f"variance explained: {var_exp.round(2)}")
        print(f"proportion of variance explained: {prop_var_exp.round(3)}")
        print(f"cumulative proportion of variance explained: {cum_prop_var_exp.round(3)}")

        vmax = np.abs(components).max()
        ax.imshow(components, cmap="RdBu_r", vmax=vmax, vmin=-vmax)
        ax.set_yticks(np.arange(len(feature_names)))
        if ax.is_first_col():
            ax.set_yticklabels(feature_names)
        else:
            ax.set_yticklabels([])
        ax.set_title(str(method))
        ax.set_xticks([0])
        ax.set_xticklabels(["Comp. 1"])

    fig.suptitle("Factors")
    plt.tight_layout()
    plt.show()
    with pd.option_context('display.max_rows', None, 'display.max_columns', None):  # more options can be specified also
        display(df[key].describe())
    

In [4]:
import plotly.graph_objects as go

def descriptive(key):
    
    factor = ["componant_Vol_ecologie","componant_Vol_economie","componant_Trend_Local","Componant_Trend_Prix","Componant_Trend_secu","Componant_confiance_marque","Componant_confiance_comp","Componant_confiance_benev","Componant_confiance_integ","Componant_image_marque","Componant_image_val","Componant_reconditionnement","Componant_image_RSE","Componant_changement_internet","Componant_confiance","Componant_image"]
    value_selection = [["accord_achat_ecolo_postconf" , "intention_ecologie_postconf"],["accord_achat_econome_postconf" , "intention_economie_postconf" , "intention_economie2_postconf"],
         ["TRENDS_LOC_1" , "TRENDS_LOC_2" , "TRENDS_LOC_3"],["TRENDS_PRIX_1" , "TRENDS_PRIX_2" , "TRENDS_PRIX_3"],["TRENDS_SECU_1" , "TRENDS_SECU_2" , "TRENDS_SECU_3" , "TRENDS_SECU_4"],
        ["confiance_distrib_nAlim" , "confiance_distrib_alim" , "confiance_distrib" , "confiance_brico"],["CONF_COMP_1" , "CONF_COMP_2" , "CONF_COMP_3"],["CONF_BENEV_1" , "CONF_BENEV_2" , "CONF_BENEV_3"],["CONF_INTEG_1" , "CONF_INTEG_2" ,  "CONF_INTEG_3" , "CONF_INTEG_4"],["image_distrib_nAlim" , "image_distrib_alim" , "image_distrib" , "image_brico"],["IMG_VAL_1" , "IMG_VAL_2" , "IMG_VAL_3" , "IMG_VAL_4" ],
         ["raison_achat_reconditionner_postconf" , "achat_magasin_reconditionne_postconf" , "achat_enligne_reconditionne_postconf"], ["IMG_RSE_1" , "IMG_RSE_2" , "IMG_RSE_3" , "IMG_RSE_4" , "IMG_RSE_5"], ["habitude_InternetToPointvente_postconf" , "habitude_InternetToDrive_postconf" , "habitude_InternetTolocker_postconf"],["CONF_COMP_1", "CONF_COMP_2" , "CONF_COMP_3", "CONF_BENEV_1",  "CONF_BENEV_2",  "CONF_BENEV_3", "CONF_INTEG_1", "CONF_INTEG_2",  "CONF_INTEG_3", "CONF_INTEG_4", "confiance_distrib_nAlim", "confiance_distrib_alim", "confiance_distrib", "confiance_brico"],["IMG_VAL_1", "IMG_VAL_2", "IMG_VAL_3",  "IMG_VAL_4" , "IMG_RSE_1" , "IMG_RSE_2", "IMG_RSE_3", "IMG_RSE_4", "IMG_RSE_5", "image_distrib_nAlim", "image_distrib_alim", "image_distrib", "image_brico" ]]
    key = value_selection[factor.index(key)]
    
    with pd.option_context('display.max_rows', None, 'display.max_columns', None):  # more options can be specified also
        display(df[key].describe())
    fig = go.Figure()
    for column in key:
        fig.add_trace(go.Box(y=df[df[column] != -1][column] / np.max(df[column]), name = column))
    fig.show()
    

In [5]:
import semopy as sem
import plotly.express as px
from semopy import Model
import cv2

pd.set_option('display.max_rows', 1000)  # or 1000
pd.options.display.float_format = '{:,.3f}'.format

def model_print():
    desc = """
        # measurement model     
        Volonte_Ecologique =~ accord_achat_ecolo_postconf + intention_ecologie_postconf
        Volonte_Economie =~ accord_achat_econome_postconf + intention_economie_postconf + intention_economie2_postconf
        
        Trend_Local =~ TRENDS_LOC_1 + TRENDS_LOC_2 + TRENDS_LOC_3
        Trend_prix =~ TRENDS_PRIX_1 + TRENDS_PRIX_2 + TRENDS_PRIX_3
        Trend_secu =~ TRENDS_SECU_1 + TRENDS_SECU_2 + TRENDS_SECU_3 + TRENDS_SECU_4
        Trend_nouveaute =~ TRENDS_NEW_1 + TRENDS_NEW_2 + TRENDS_NEW_3
        
        Confiance_marque =~ confiance_distrib_nAlim + confiance_distrib_alim + confiance_distrib + confiance_brico
        confiance_comp =~ CONF_COMP_1 + CONF_COMP_2 + CONF_COMP_3
        confiance_benev =~ CONF_BENEV_1 + CONF_BENEV_2 + CONF_BENEV_3
        confiance_integ =~ CONF_INTEG_1 + CONF_INTEG_2 +  CONF_INTEG_3 + CONF_INTEG_4
        confiance =~ confiance_integ + confiance_benev + confiance_benev + confiance_comp + Confiance_marque
        
        Image_marque =~ image_distrib_nAlim + image_distrib_alim + image_distrib + image_brico 
        image_val =~ IMG_VAL_1 + IMG_VAL_2 + IMG_VAL_3 + IMG_VAL_4 
        image_rse =~ IMG_RSE_1 + IMG_RSE_2 + IMG_RSE_3 + IMG_RSE_4 + IMG_RSE_5
        Image =~ image_rse + image_val + Image_marque
        
        Achat_Reconditionne =~ raison_achat_reconditionner_postconf + achat_magasin_reconditionne_postconf + achat_enligne_reconditionne_postconf

        Changement_Livraison =~ habitude_InternetTolivraison_postconf 
        Changement_Deplacement =~ habitude_InternetToPointvente_postconf + habitude_InternetToDrive_postconf + habitude_InternetTolocker_postconf
        
        # regressions
        Changement_Livraison ~ Age + Volonte_Ecologique 
        Changement_Deplacement ~  Age + Volonte_Ecologique 
        Image ~ confiance + Trend_secu + Trend_prix
        confiance ~  Trend_Local + Trend_nouveaute
        
        Volonte_Economie ~ Volonte_Ecologique + Trend_prix
        Volonte_Ecologique ~  Achat_Reconditionne + Trend_nouveaute
        
        # residual correlations
        habitude_InternetTolivraison_postconf ~~ habitude_InternetToPointvente_postconf + habitude_InternetToDrive_postconf + habitude_InternetTolocker_postconf
        image_distrib_nAlim ~~ confiance_distrib_nAlim
        image_distrib_alim ~~ confiance_distrib_alim
        confiance_distrib ~~ image_distrib
        confiance_brico ~~ image_brico
        Trend_prix ~~ Trend_secu + Trend_nouveaute + Trend_Local
        Trend_nouveaute ~~ Trend_Local
        habitude_InternetTolivraison_postconf ~~ habitude_InternetToPointvente_postconf
        habitude_InternetTolivraison_postconf ~~ habitude_InternetToDrive_postconf
        habitude_InternetTolivraison_postconf ~~ habitude_InternetTolocker_postconf
        
    """

    model = Model(desc)
    res = model.fit(df)
    ins = model.inspect(mode='list', what="names", std_est=True)
    
    g = sem.semplot(model, "BatModel.jpg")
    
    img = cv2.imread('BatModel.jpg')
    img = cv2.resize(img, (10000, 3200), interpolation = cv2.INTER_AREA)

    fig = px.imshow(img)
    with pd.option_context('display.max_rows', None, 'display.max_columns', None):  # more options can be specified also
        display(sem.calc_stats(model))
        
    fig.show()
    with pd.option_context('display.max_rows', None, 'display.max_columns', None):  # more options can be specified also
        display(ins[ins["op"] == "~"])
    
    report(model, "Quantitative study Covid Data")

In [6]:
from statsmodels.stats.outliers_influence import variance_inflation_factor
from statsmodels.tools.tools import add_constant

def vif_study(key):
    print(key)
    factor = ["Changement_Livraison","Changement_Deplacement","Image","confiance","Volonte_Economie","Volonte_Ecologique"]
    
    value_selection = [ ["Age", "accord_achat_ecolo_postconf", "intention_ecologie_postconf"],
                        ["Age", "accord_achat_ecolo_postconf", "intention_ecologie_postconf"],
                        ["confiance_distrib_nAlim", "confiance_distrib_alim", "confiance_distrib", "confiance_brico", "CONF_COMP_1", "CONF_COMP_2", "CONF_COMP_3", "CONF_BENEV_1", "CONF_BENEV_2", "CONF_BENEV_3",  "CONF_INTEG_1", "CONF_INTEG_2",  "CONF_INTEG_3", "CONF_INTEG_4", 
                         "TRENDS_SECU_1", "TRENDS_SECU_2", "TRENDS_SECU_3", "TRENDS_SECU_4",  "TRENDS_PRIX_1", "TRENDS_PRIX_2", "TRENDS_PRIX_3"],
                        ["TRENDS_LOC_1", "TRENDS_LOC_2", "TRENDS_LOC_3", "TRENDS_NEW_1", "TRENDS_NEW_2", "TRENDS_NEW_3"],
                        ["accord_achat_ecolo_postconf", "intention_ecologie_postconf",  "TRENDS_PRIX_1", "TRENDS_PRIX_2", "TRENDS_PRIX_3"],
                        ["raison_achat_reconditionner_postconf", "achat_magasin_reconditionne_postconf", "achat_enligne_reconditionne_postconf", "TRENDS_NEW_1", "TRENDS_NEW_2", "TRENDS_NEW_3"]
                      ]
    
    key = value_selection[factor.index(key)]
    
    X = df[key].copy()
    X = add_constant(X)
    
    with pd.option_context('display.max_rows', None, 'display.max_columns', None):  # more options can be specified also
        display(pd.Series([variance_inflation_factor(X.values, i) for i in range(X.shape[1])], index=X.columns))
    

In [7]:
def app_dashboard( menue, key):
    factor = ["componant_Vol_ecologie","componant_Vol_economie","componant_Trend_Local","Componant_Trend_Prix","Componant_Trend_secu","Componant_confiance_marque","Componant_confiance_comp","Componant_confiance_benev","Componant_confiance_integ","Componant_image_marque","Componant_image_val","Componant_reconditionnement","Componant_image_RSE","Componant_changement_internet","Componant_confiance","Componant_image"]
    if menue == 'Analyse Factoriel':
        value_comp.options = factor
        if not value_comp.value in factor:
            key = factor[0]
        FactorAnalysis_dashboard( key)
    if menue == 'Etude Descriptive':
        value_comp.options = factor
        if not value_comp.value  in factor:
            key = factor[0]
        descriptive(key)
    if menue == 'test de colinéarité':
        value_comp.options = ["Changement_Livraison","Changement_Deplacement","Image","confiance","Volonte_Economie","Volonte_Ecologique"]
        rep = key
        if key in factor:
            rep = value_comp.options[0]
        vif_study(rep)
    if menue == 'Modèle Prédictif SEM':
        model_print()
        

In [8]:
from ipywidgets import Layout

factor = ["componant_Vol_ecologie","componant_Vol_economie","componant_Trend_Local","Componant_Trend_Prix","Componant_Trend_secu","Componant_confiance_marque","Componant_confiance_comp","Componant_confiance_benev","Componant_confiance_integ","Componant_image_marque","Componant_image_val","Componant_reconditionnement","Componant_image_RSE","Componant_changement_internet","Componant_confiance","Componant_image"]

value_comp = widgets.Dropdown(
    options=factor,
    description='Composant :'
    )

button_menue =  widgets.ToggleButtons(
    options=['Etude Descriptive', 'Analyse Factoriel', 'Modèle Prédictif SEM','test de colinéarité'],
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    tooltips=['Description of slow', 'Description of regular', 'Description of fast'],
#     icons=['check'] * 3
)

In [9]:
widgets.interactive(app_dashboard, key = value_comp,menue = button_menue )

interactive(children=(ToggleButtons(description='menue', options=('Etude Descriptive', 'Analyse Factoriel', 'M…