In [None]:
#Import package from the script
import sys
sys.path.insert(0,"../src/Loading/")
sys.path.insert(0,"../src/Preprocessing/")
sys.path.insert(0,"../src/Modeling/")
sys.path.insert(0,"../src/Evaluation/")
sys.path.insert(0,"../src/Interpretability/")
sys.path.insert(0,"../src/Monitoring/")
sys.path.insert(0,"../src/Utils/")


import loading
import preprocessing
import interpretability
import utils as u
import modeling
import evaluation

import json 
import pandas_profiling
import pandas as pd
import numpy as np

In [None]:
#Load of the conf file 
path_conf ="../params/conf/conf.json"
conf = json.load(open(path_conf, 'r'))

# Launch the logger only once 
path_log = conf['path_log'] # "../log/my_log_file.txt"
log_level = conf['log_level'] # "DEBUG"
logger = u.my_get_logger(path_log, log_level, my_name="main_logger")

## Loading and EDA

In [None]:
#Reading of the dataset selected in the conf file
df = loading.read_csv_from_name(conf)
df.head()

In [None]:
#Doing the EDA thanks to Pandas Profilling 
df.profile_report()

## Preprocessing

In [None]:
#Preprocessing of the selected dataset
df_preprocessed, X_columns, y_column, encoding_dict = preprocessing.main_preprocessing_from_name(df, conf)

#Writting of the preprocessed dataset
loading.write_preprocessed_csv_from_name(df_preprocessed, conf, 'data_preprocessed.csv')

#Writting the encoding dictionnary 
loading.write_dict_json_from_name(encoding_dict, conf, 'encoding_dict.json')

#Visualize the preprocessed dataset
df_preprocessed.head()

#### Spliting the data

In [None]:
#Basic Splitting between train and test
X_train, X_test, y_train, y_test = preprocessing.basic_split(df_preprocessed, 0.25, X_columns, y_column)

## Modeling

In [None]:
#Modelisation using the model selected in the conf file
model, best_params = modeling.main_modeling_from_name(X_train, y_train, conf)

#Saving the model
u.save_model(model, conf)

## Evaluation

In [None]:
#Independent step from the other, we reload what we need:
#Can be lauched without the previous steps

#Loading of the model
model = u.load_model(conf)
#Loading of the preprocessed dataset
df = loading.load_preprocessed_csv_from_name(conf, 'data_preprocessed.csv')
#Loading the encodinf dictionnary 
encoding_dict = loading.load_dict_json_from_name(conf, 'encoding_dict.json')

#Basic Splitting:
y_column = u.get_y_column_from_conf(conf)
X_columns = [x for x in df.columns if x != y_column]
X_train, X_test, y_train, y_test = preprocessing.basic_split(df, 0.25, X_columns, y_column)

In [None]:
#Computing metrics
dict_metrics = evaluation.main_evaluation(model, X_test, y_test, conf)
dict_metrics

## Interpretabily 

In [None]:
#Get The features importance by permutation 

interpretability.permutation_features_importance(conf, model, X_train, y_train)

#### SHAP

In [None]:
#Initialize the SHAP class
shap_analysis = interpretability.shap_analysis(model, X_test, y_test, conf)

# Variable Importance graphic
shap_analysis.features_importance()

In [None]:
# summary plot
# Features importance + effect of the features according to their value
shap_analysis.summary_plot()

In [None]:
#global interpretation 
shap_analysis.global_force_plot(n=1000)

In [None]:
#Local evaluation of the model
shap_analysis.local_force_plot(i=0)

In [None]:
#Watch the effect of each category for the job
shap_var_importance = shap_analysis.cat_features_explanability('diabetesMed', encoding_dict)

#### DICE

In [None]:
# Initialize the DICE class
dice_interpretability = interpretability.dice_interpretability(model, X_test, y_test, conf)

#Generate counterfactuals for a given input point
dice_interpretability.local_interpretability()

In [None]:
#Calculate the counterfactuals necessity for all features
dice_interpretability.global_interpretability(n=1000)