# Desenvolupament pràctic TFG

Per al desenvolupament del projecte pràctic, farem us de una base de dades que les seves entrades consisteixen en una persona que demana un crèdit al banc. Cada persona es classifica segons el risc que generi fer-li un prèstam (poden ser bons prestams o dolents).

In [1]:
import pandas as pd
import altair as alt
from IPython.display import display
import warnings

warnings.filterwarnings("ignore")
%load_ext autoreload
%autoreload 2


### Entrenament per al model

In [2]:
from carla.data.catalog import CsvCatalog, OnlineCatalog
from carla.models.catalog.catalog import MLModelCatalog, OnlineCatalog
from carla.models.negative_instances import predict_negative_instances
import carla.recourse_methods.catalog as recourse_catalog
from carla.data.causal_model import CausalModel
from carla.recourse_methods import GrowingSpheres, Wachter, CCHVAE, Dice, FOCUS, CEM

Using TensorFlow backend.


[INFO] Using Python-MIP package version 1.12.0 [model.py <module>]


In [3]:
import tensorflow as tf
from tensorflow.compat.v1 import Session

In [4]:
data = pd.DataFrame(pd.read_csv('./archive/german_credit_data.csv'))
data.fillna(value="unknown", inplace=True)
data_carla = data.loc[:, ~data.columns.str.contains("^Unnamed")]

for i in range(len(data_carla["Risk"])):
    if data_carla["Risk"][i] == 'good':
        data_carla["Risk"][i] = 1.0
    else:
        data_carla["Risk"][i] = 0.0

data_carla.to_csv("./archive/german_credit_data_noNan.csv", index=False)

continuous = ["Id","Age", "Credit amount", "Duration"]
categorical = ["Sex", "Job", "Housing", "Saving accounts", "Checking account", "Purpose"]
immutables = ["Age", "Id"]

data_bank = CsvCatalog(file_path = "./archive/german_credit_data_noNan.csv",
                 continuous=continuous,
                 categorical=categorical,
                 immutables=immutables,
                 target='Risk')

display(data_bank.df)

Unnamed: 0,Id,Age,Credit amount,Duration,Risk,...,Purpose_education,Purpose_furniture/equipment,Purpose_radio/TV,Purpose_repairs,Purpose_vacation/others
0,0.000000,0.857143,0.050567,0.029412,1.0,...,0.0,0.0,1.0,0.0,0.0
1,0.001001,0.053571,0.313690,0.647059,0.0,...,0.0,0.0,1.0,0.0,0.0
2,0.002002,0.535714,0.101574,0.117647,1.0,...,1.0,0.0,0.0,0.0,0.0
3,0.003003,0.464286,0.419941,0.558824,1.0,...,0.0,1.0,0.0,0.0,0.0
4,0.004004,0.607143,0.254209,0.294118,0.0,...,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...
995,0.995996,0.214286,0.081765,0.117647,1.0,...,0.0,1.0,0.0,0.0,0.0
996,0.996997,0.375000,0.198470,0.382353,1.0,...,0.0,0.0,0.0,0.0,0.0
997,0.997998,0.339286,0.030483,0.117647,1.0,...,0.0,0.0,1.0,0.0,0.0
998,0.998999,0.071429,0.087763,0.602941,0.0,...,0.0,0.0,1.0,0.0,0.0


In [56]:
# display(data_bank.df[["Id","Sex_male", "Housing_own", "Housing_free", "Housing_rent"]][:20])

In [5]:
from carla.models.catalog import MLModelCatalog

training_params = {"lr": 0.002, "epochs": 50, "batch_size": 1024, "hidden_size": [18, 9, 3]}

ml_model = MLModelCatalog(
    data_bank,
    model_type="ann",
    load_online=False, # Carrega un entrenament ja fet que es troba online (nomes compatible amb datasets inclosos)
    backend="tensorflow"
)

ml_model.train(
    learning_rate=training_params["lr"],
    epochs=training_params["epochs"],
    batch_size=training_params["batch_size"],
    hidden_size=training_params["hidden_size"],
    force_train=False #True en la primera execució 
)


Loaded model from C:\Users\gerar\carla\models\custom\ann_layers_18_9_3.h5
test accuracy for model: 0.744


In [6]:
factuals = data_bank.df[1:2]
factuals

Unnamed: 0,Id,Age,Credit amount,Duration,Risk,...,Purpose_education,Purpose_furniture/equipment,Purpose_radio/TV,Purpose_repairs,Purpose_vacation/others
1,0.001001,0.053571,0.31369,0.647059,0.0,...,0.0,0.0,1.0,0.0,0.0


In [7]:
factuals = data_bank.df[1:2]
factuals

Unnamed: 0,Id,Age,Credit amount,Duration,Risk,...,Purpose_education,Purpose_furniture/equipment,Purpose_radio/TV,Purpose_repairs,Purpose_vacation/others
1,0.001001,0.053571,0.31369,0.647059,0.0,...,0.0,0.0,1.0,0.0,0.0


In [8]:
factuals = data_bank.df[1:2]
hyperparams = {
    "kappa": 0.9,
    "init_learning_rate": 10.,
    "binary_search_steps": 5,
    "max_iterations": 10,
    "initial_const": 10,
    "beta": 1.0,
    "gamma": 100.,
    "mode": "PN",
    "num_classes": 2,
    "data_name": "PruebaName",
    "ae_params": {
        "hidden_layer": [12, 9, 3],
        "train_ae": True,
        "epochs": 50
    }
    
}

with tf.Session() as sess:
    recourse_method = recourse_catalog.CEM(sess=sess, mlmodel=ml_model, hyperparams=hyperparams) # Sess = tensorflow session. tensorflow.python.client.session.Session
    df_cfs = recourse_method.get_counterfactuals(factuals)

display(df_cfs)

 [deprecation_wrapper.py __getattr__]
 [deprecation_wrapper.py __getattr__]
Train on 700 samples, validate on 300 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
 [deprecation_wrapper.py __getattr__]
 [deprecation_wrapper.py __getattr__]
 [deprecation_wrapper.py __getattr__]
Instructions for updating:
Deprecated in favor of operator or tf.math.divide. [deprecation.py new_func]
 [deprecation_wrapper.py __getattr__]
Instructions for updating:
Use 

FailedPreconditionError: Error while reading resource variable dense_12/bias from Container: localhost. This could mean that the variable was uninitialized. Not found: Resource localhost/dense_12/bias/class tensorflow::Var does not exist.
	 [[{{node dense_12/BiasAdd/ReadVariableOp}}]]

En aquest moment, tenim un model de tipus ann el qual es troba entrenat, i com retorna el seu valor, 

In [None]:
recourse_method = recourse_catalog.GrowingSpheres(model)
data_counterfactuals = recourse_method.get_counterfactuals(some_factuals)
data_counterfactuals.to_excel("prueba.xlsx")

counterfactual = data_counterfactuals.iloc[0]


In [12]:
from carla.plotting.plotting import summary_plot, single_sample_plot
single_s

NameError: name 'single_s' is not defined