# Modelos de Machine Learning: Benchmarks

## Intro

## Config

In [1]:
Variable_Target = 'CPC'
version = '4_sinClient'

nombre_scaler = 'scaler_model_'+Variable_Target+'_v'+version+'.joblib'
nombre_pca = 'pca_model_'+Variable_Target+'_v'+version+'.joblib'

nombre_RF = 'model_rf_'+Variable_Target+'_v'+version+'.joblib'
nombre_XGB = 'model_xgboost_'+Variable_Target+'_v'+version+'.json'
nombre_NN = 'model_NN_'+Variable_Target+'_v'+version+'.joblib'
nombre_scaler_NN = 'scaler_NN_model_'+Variable_Target+'_v'+version+'.joblib'

### Imports

In [2]:
import pandas as pd
import numpy as np
import joblib
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import r2_score
from sklearn.decomposition import PCA
import math
import altair as alt

### Carga de Datos

In [3]:
def load_data(df_in):
    df = pd.read_csv(df_in+'.csv')
    df = df.drop("Unnamed: 0", axis=1)
    return df

# Cargar los datos
df = load_data('df_C_S_v4')
df = df[df['Client'] != 'Boxto']
#df = df[df['Client'] != 'AOV']
df = df.reset_index()


### Elección Output y features

La regla empírica establece que:

aproximadamente el 68% de los datos caerán dentro de 1 desviación estándar de la media,

alrededor del 95% de los datos caerán dentro de 2 desviaciones estándar y

aproximadamente el 99.7% de los datos caerán dentro de 3 desviaciones estándar de la media.

In [4]:


# Calcular la media y la desviación estándar de la variable objetivo y
mean_y, std_y = np.mean(df[Variable_Target]), np.std(df[Variable_Target])
outlier_threshold = 3 * std_y

max_y = mean_y + (3 * std_y)
min_y = mean_y - (3 * std_y)

df = df[df[Variable_Target] <= max_y]
df = df[df[Variable_Target] >= min_y]
df.shape

(4284, 56)

In [5]:
X = df.copy()
X = pd.DataFrame(X, columns=['Año','Mes', 'Objective', 'Cost', 'Country', 'Media_type', 'Traffic_source', 'Format_New','Platform','Strategy','Plataforma','Campaign_Type','Ecommerce','Service_Product',#])#,'Client'
                            'Bench Gral CPC','Bench Search CPC','Bench GralSch CPL', 'Bench Search CPL','Bench GralSch CTR', 'Bench Search CTR', 'Bench GralSch CR','Bench Search AvgCR',
                             'Bench GralFB CPC', 'Bench FB CPC','Bench GralFB CPAction', 'Bench FB CPAction', 'Bench GralFB CTR', 'Bench FB CTR', 'Bench GralFB CR', 'Bench FB AvgCR'
                             ,'Bench GralYT CPV', 'Bench YT CPV', 'Bench GralYT CTR', 'Bench YT CTR','Bench GralYT VR', 'Bench FB AvgVR'])



In [6]:
X

Unnamed: 0,Año,Mes,Objective,Cost,Country,Media_type,Traffic_source,Format_New,Platform,Strategy,...,Bench GralFB CTR,Bench FB CTR,Bench GralFB CR,Bench FB AvgCR,Bench GralYT CPV,Bench YT CPV,Bench GralYT CTR,Bench YT CTR,Bench GralYT VR,Bench FB AvgVR
0,2023,4,Fans,86.778068,Mexico,Social,Facebook,Display,Facebook&Instagram,Awareness,...,0.9,1.04,9.21,2.31,0.048,0.048,0.65,0.33,31.9,31.9
1,2023,4,Purchase,208.035266,Chile,Unknown,Google,Display,Discovery,Conversion,...,0.9,1.04,9.21,2.31,0.048,0.048,0.65,0.33,31.9,31.9
2,2023,4,Purchase,169.859977,Colombia,Unknown,Google,Display,Discovery,Conversion,...,0.9,1.04,9.21,2.31,0.048,0.048,0.65,0.33,31.9,31.9
3,2023,4,Purchase,192.010904,Mexico,Unknown,Google,Display,Discovery,Conversion,...,0.9,1.04,9.21,2.31,0.048,0.048,0.65,0.33,31.9,31.9
4,2023,4,Reach,585.975296,Mexico,Unknown,Google,Display,Discovery,Awareness,...,0.9,1.04,9.21,2.31,0.048,0.048,0.65,0.33,31.9,31.9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4349,2022,10,Conversion,515.200000,USA,Unknown,Google,Display,NoPlatform,Conversion,...,0.9,0.90,9.21,2.82,0.048,0.047,0.65,0.78,31.9,29.6
4350,2022,10,Conversion,244.890000,USA,Search,Google,Display,NoPlatform,Conversion,...,0.9,0.90,9.21,2.82,0.048,0.047,0.65,0.78,31.9,29.6
4351,2022,10,Conversion,121.830000,USA,Unknown,Google,Display,NoPlatform,Conversion,...,0.9,0.90,9.21,2.82,0.048,0.047,0.65,0.78,31.9,29.6
4353,2022,10,Conversion,285.160000,USA,Search,Google,Display,NoPlatform,Conversion,...,0.9,0.90,9.21,2.82,0.048,0.047,0.65,0.78,31.9,29.6


In [7]:
categorical_features = ['Objective', 'Country', 'Media_type', 'Traffic_source','Format_New','Platform','Strategy','Plataforma','Campaign_Type','Ecommerce','Service_Product']  #,'Tipo Search','Tipo FB','Tipo YT'], 'Client'

# Preprocesamiento de variables categóricas
X_dum = pd.get_dummies(X, columns=categorical_features)
X_dum = X_dum.reset_index()

#Elijo el valor a predecir. CPC: La hipótesis es que el valor de COST es conocido y controlable: El output en rigor será Clicks y expresaremos el resultado en CPC Cost/Clicks
y = df[Variable_Target]

In [8]:
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

X_dum_aux = X_dum[['Año','Mes','Cost','Bench Gral CPC','Bench Search CPC','Bench GralSch CPL', 'Bench Search CPL','Bench GralSch CTR', 'Bench Search CTR', 'Bench GralSch CR','Bench Search AvgCR', 'Bench GralFB CPC', 'Bench FB CPC',
                                       'Bench GralFB CPAction', 'Bench FB CPAction', 'Bench GralFB CTR', 'Bench FB CTR', 'Bench GralFB CR', 'Bench FB AvgCR', 'Bench GralYT CPV', 'Bench YT CPV', 'Bench GralYT CTR',
                                       'Bench YT CTR','Bench GralYT VR', 'Bench FB AvgVR']]

X_Scaled = scaler.fit_transform(X_dum_aux)

pca = PCA(n_components=2)  # Aquí estamos conservando solo una componente principal
X_pca = pca.fit_transform(X_Scaled)
X_pca = pd.DataFrame(X_pca)

X_dum['X_pca_0'] = X_pca[0]
X_dum['X_pca_1'] = X_pca[1]
X['X_pca_0'] = X_pca[0]
X['X_pca_1'] = X_pca[1]

In [9]:
scaler.feature_names_in_

array(['Año', 'Mes', 'Cost', 'Bench Gral CPC', 'Bench Search CPC',
       'Bench GralSch CPL', 'Bench Search CPL', 'Bench GralSch CTR',
       'Bench Search CTR', 'Bench GralSch CR', 'Bench Search AvgCR',
       'Bench GralFB CPC', 'Bench FB CPC', 'Bench GralFB CPAction',
       'Bench FB CPAction', 'Bench GralFB CTR', 'Bench FB CTR',
       'Bench GralFB CR', 'Bench FB AvgCR', 'Bench GralYT CPV',
       'Bench YT CPV', 'Bench GralYT CTR', 'Bench YT CTR',
       'Bench GralYT VR', 'Bench FB AvgVR'], dtype=object)

In [10]:
X_pca

Unnamed: 0,0,1
0,-1.154067,-1.537169
1,-1.157625,-1.549033
2,-1.156505,-1.545298
3,-1.157155,-1.547465
4,-1.168716,-1.586013
...,...,...
4279,4.668498,-0.082075
4280,4.676430,-0.055626
4281,4.680041,-0.043585
4282,4.675248,-0.059567


In [11]:
# Separo en conjuntos de Train y de Test
#X_train, X_test, y_train, y_test = train_test_split(X_dum, y, test_size=0.2, random_state=0)
X_train = X_dum
y_train = y

## RandomForest

RMSE = 0.26

In [12]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import cross_val_score
from sklearn.metrics import mean_squared_error, make_scorer
from sklearn.model_selection import train_test_split, GridSearchCV

In [13]:
rf = RandomForestRegressor(n_estimators=20, max_depth=14, min_samples_split=4, min_samples_leaf=2)

In [14]:
rf.fit(X_train.drop('index',axis=1), y_train)

#### VC

In [15]:
from sklearn.model_selection import KFold

In [16]:
kf_rf = KFold(n_splits=5, shuffle=True, random_state=42)
X_train_rf_prueba = X_train.copy()
X_train_rf_prueba = X_train_rf_prueba.reset_index()
y_train_rf_prueba = y_train.copy()
y_train_rf_prueba = y_train_rf_prueba.reset_index()


X_train_rf_prueba = X_train_rf_prueba.drop('index', axis=1)
y_train_rf_prueba = y_train_rf_prueba.drop('index', axis=1)

In [17]:
y_train_rf_prueba

Unnamed: 0,CPC
0,0.033766
1,0.707603
2,0.078240
3,0.029364
4,0.032527
...,...
4279,1.459490
4280,0.533529
4281,0.525129
4282,1.485208


In [None]:
mse_test_list_total = dict()
mse_train_list_total = dict()
for i in [2,3,4,5,6,7,8,9,10]:
    print(i)
    mse_test_list = []
    mse_train_list = []
    for train_index, test_index in kf_rf.split(X_train_rf_prueba):
        X_train_cv, X_test_cv = X_train_rf_prueba.iloc[train_index], X_train_rf_prueba.iloc[test_index]
        y_train_cv, y_test_cv = y_train_rf_prueba.iloc[train_index], y_train_rf_prueba.iloc[test_index]
        
        # Inicializar el modelo de clasificación
        rf_cv = RandomForestRegressor(n_estimators=20, max_depth=14, min_samples_split=4, min_samples_leaf=2)
    
        # Ajustar el modelo con los datos de entrenamiento
        rf_cv.fit(X_train_cv, y_train_cv)
    
        # Hacer predicciones en los datos de prueba
        y_pred_test_cv = rf_cv.predict(X_test_cv)
        y_pred_train_cv = rf_cv.predict(X_train_cv)
    
        # Calcular la precisión y agregarla a la lista de puntuaciones
        mse_test = mean_squared_error(y_test_cv, y_pred_test_cv)
        mse_train = mean_squared_error(y_train_cv, y_pred_train_cv)
        #print(y_test_cv)
        #print(y_pred_cv)
        mse_test_list.append(mse_test)
        mse_train_list.append(mse_train)
        
    mse_cv_test = np.mean(mse_test_list)
    mse_test_list_total[i] = mse_cv_test
    mse_cv_train = np.mean(mse_train_list)
    mse_train_list_total[i] = mse_cv_train
print(mse_cv_test)
print(mse_cv_train)

df_curve = pd.DataFrame(list(mse_test_list_total.items()), columns=['Index', 'ErrorTest'])
df_curve_2 = pd.DataFrame(list(mse_train_list_total.items()), columns=['Index', 'ErrorTrain'])
df_curve = pd.merge(df_curve,df_curve_2,on='Index',how='left')
#df_curve = pd.DataFrame(mse_test_list_total)

2


  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)


In [None]:
# Supongamos que tienes un DataFrame llamado 'df' con columnas "indice", "columna_A" y "columna_B"

# Convertir el DataFrame de pandas a un formato aceptado por Altair
df_altair = pd.melt(df_curve, id_vars=['Index'], value_vars=['ErrorTest', 'ErrorTrain'])

# Crear el gráfico de líneas divididas por color en Altair
line_chart = alt.Chart(df_altair).mark_line(point=True).encode(
    x='Index:Q',
    y='value:Q',
    color='variable:N'
).properties(
    width=600,
    height=400,
    title='Gráfico de Líneas para Columnas A y B'
)

# Mostrar el gráfico
line_chart

## XGBoost

RMSE_test = 0.24

### Modelo

In [15]:
import xgboost as xgb

In [16]:
xgboost = xgb.XGBRegressor(learning_rate=0.05, max_depth=10, n_estimators=70)

In [17]:
#xgboost.fit(X_train.drop('index',axis=1), y_train)
xgboost.fit(X_train.drop('index',axis=1), y_train)

### Validación Cruzada

In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import KFold

In [None]:
# Definir los hiperparámetros a ajustar
parameters = {'learning_rate': [0.05, 0.075, 0.1],
              'max_depth': [10, 12, 15],
              'n_estimators': [70, 80, 100]}

In [None]:
# Configurar la validación cruzada
kfold = KFold(n_splits=5, shuffle=True, random_state=42)

In [None]:
# Realizar la búsqueda de parámetros
grid_search = GridSearchCV(estimator=xgboost, param_grid=parameters, cv=kfold, scoring='neg_mean_squared_error', verbose = 3)
grid_result = grid_search.fit(X_train, y_train)

In [None]:
# Imprimir los resultados
print("Mejor: %f usando %s" % (grid_result.best_score_, grid_result.best_params_))

In [None]:
kf = KFold(n_splits=5, shuffle=True, random_state=42)
X_train_xgb_prueba = X_train.copy()
X_train_xgb_prueba = X_train_xgb_prueba.reset_index()
y_train_xgb_prueba = y_train.copy()
y_train_xgb_prueba = y_train_xgb_prueba.reset_index()


X_train_xgb_prueba = X_train_xgb_prueba.drop('index', axis=1)
y_train_xgb_prueba = y_train_xgb_prueba.drop('index', axis=1)

In [None]:
mse_test_list_total = dict()
mse_train_list_total = dict()
for i in [2,3,4,5,6,7,8,9,10,11,12,13]:
    print(i)
    mse_test_list = []
    mse_train_list = []
    for train_index, test_index in kf.split(X_train_xgb_prueba):
        X_train_cv, X_test_cv = X_train_xgb_prueba.iloc[train_index], X_train_xgb_prueba.iloc[test_index]
        y_train_cv, y_test_cv = y_train_xgb_prueba.iloc[train_index], y_train_xgb_prueba.iloc[test_index]
        
        # Inicializar el modelo de clasificación
        xgboost_cv = xgb.XGBRegressor(learning_rate=0.08, max_depth=i, n_estimators=40)
    
        # Ajustar el modelo con los datos de entrenamiento
        xgboost_cv.fit(X_train_cv, y_train_cv)
    
        # Hacer predicciones en los datos de prueba
        y_pred_test_cv = xgboost_cv.predict(X_test_cv)
        y_pred_train_cv = xgboost_cv.predict(X_train_cv)
    
        # Calcular la precisión y agregarla a la lista de puntuaciones
        mse_test = mean_squared_error(y_test_cv, y_pred_test_cv)
        mse_train = mean_squared_error(y_train_cv, y_pred_train_cv)
        #print(y_test_cv)
        #print(y_pred_cv)
        mse_test_list.append(mse_test)
        mse_train_list.append(mse_train)
        
    mse_cv_test = np.mean(mse_test_list)
    mse_test_list_total[i] = mse_cv_test
    mse_cv_train = np.mean(mse_train_list)
    mse_train_list_total[i] = mse_cv_train
print(mse_cv_test)
print(mse_cv_train)

df_curve = pd.DataFrame(list(mse_test_list_total.items()), columns=['Index', 'ErrorTest'])
df_curve_2 = pd.DataFrame(list(mse_train_list_total.items()), columns=['Index', 'ErrorTrain'])
df_curve = pd.merge(df_curve,df_curve_2,on='Index',how='left')
#df_curve = pd.DataFrame(mse_test_list_total)

In [None]:
df_curve

In [None]:
# Supongamos que tienes un DataFrame llamado 'df' con columnas "indice", "columna_A" y "columna_B"

# Convertir el DataFrame de pandas a un formato aceptado por Altair
df_altair = pd.melt(df_curve, id_vars=['Index'], value_vars=['ErrorTest', 'ErrorTrain'])

# Crear el gráfico de líneas divididas por color en Altair
line_chart = alt.Chart(df_altair).mark_line(point=True).encode(
    x='Index:Q',
    y='value:Q',
    color='variable:N'
).properties(
    width=600,
    height=400,
    title='Gráfico de Líneas para Columnas A y B'
)

# Mostrar el gráfico
line_chart

## Redes Neuronales

RMSE: 0.24

In [18]:
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold

In [19]:
X_train_NN = X_train.drop('index',axis=1)
#X_test_NN = X_test#.drop('index',axis=1)

In [20]:
scaler_NN = StandardScaler()
X_train_scaled_NN = scaler_NN.fit_transform(X_train_NN)
#X_test_scaled_NN = scaler.transform(X_test_NN)

In [23]:
# Definir el modelo de redes neuronales para regresión
model_NN = MLPRegressor(hidden_layer_sizes=(100,200,200,100,), activation='relu', alpha=0.001, solver='adam',random_state=42, max_iter=70,learning_rate_init=0.0015)

In [24]:
#mse_train_list = []
#mse_test_list = []
for i in range(1,70):
    print(i)

    model_NN.partial_fit(X_train_scaled_NN, y_train)
    # Hacer predicciones en el conjunto de prueba
    #y_pred_train = model.predict(X_train_scaled_NN)
    #y_pred_test = model.predict(X_test_scaled_NN)
    #mse_train = mean_squared_error(y_train, y_pred_train)
    #mse_train_list.append(mse_train)
    #mse_test = mean_squared_error(y_test, y_pred_test)
    #mse_test_list.append(mse_test)

#df_curve = pd.DataFrame()
#df_curve['train'] = mse_train_list
#df_curve['test'] = mse_test_list
#df_curve = df_curve.reset_index()


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69


## Exportar Modelos

In [25]:
import joblib
joblib.dump(scaler, nombre_scaler)  # nombre_scaler = 'scaler_model_CPC_v3.joblib'
joblib.dump(pca, nombre_pca)  # nombre_pca = 'pca_model_CPC_v3.joblib'

joblib.dump(rf, nombre_RF) # nombre_RF = 'model_rf_CPC_v3.joblib'
xgboost.save_model(nombre_XGB) # nombre_XGB = 'model_xgboost_v3.json'
joblib.dump(model_NN, nombre_NN)  # nombre_NN = 'model_NN_CPC_v3.joblib'
joblib.dump(scaler_NN, nombre_scaler_NN)  # nombre_scaler_NN = 'scaler_NN_model_CPC_v3.joblib'


['scaler_NN_model_CPC_v4_sinClient.joblib']

In [45]:
dir(xgboost)

['_Booster',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setstate__',
 '__sizeof__',
 '__sklearn_clone__',
 '__sklearn_is_fitted__',
 '__slotnames__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_build_request_for_signature',
 '_can_use_inplace_predict',
 '_check_feature_names',
 '_check_n_features',
 '_configure_fit',
 '_create_dmatrix',
 '_estimator_type',
 '_get_default_requests',
 '_get_iteration_range',
 '_get_metadata_request',
 '_get_param_names',
 '_get_tags',
 '_get_type',
 '_more_tags',
 '_repr_html_',
 '_repr_html_inner',
 '_repr_mimebundle_',
 '_set_evaluation_result',
 '_validate_data',
 '_validate_params',
 'apply',
 'base_score',
 'best_iteration',
 'best_score',
 'booster',
 'callbacks

## SHAP

In [103]:
import shap

In [104]:
# Crea un objeto explainer de Shap
explainer_rf = shap.Explainer(rf)

# Calcula los Shap values para un conjunto de datos (puedes ajustar n_samples según tus necesidades)
shap_values_rf = explainer_rf.shap_values(X_train)

# Calcula la importancia promedio de cada característica
feature_importance_rf = pd.DataFrame(abs(shap_values_rf).mean(axis=0), columns=['Importance'])
feature_importance_rf['Feature'] = X_train.columns

# Ordena el DataFrame por importancia
feature_importance_rf = feature_importance_rf.sort_values(by='Importance', ascending=False)

feature_importance_rf = feature_importance_rf.set_index('Feature')
feature_importance_rf = feature_importance_rf.rename(columns={'Importance': 'Importance_rf'})

In [105]:
# Crea un objeto explainer de Shap
explainer_xgb = shap.Explainer(xgboost)

# Calcula los Shap values para un conjunto de datos (puedes ajustar n_samples según tus necesidades)
shap_values_xgb = explainer_xgb.shap_values(X_train[xgboost.feature_names_in_])

# Calcula la importancia promedio de cada característica
feature_importance_xgb = pd.DataFrame(abs(shap_values_xgb).mean(axis=0), columns=['Importance'])
feature_importance_xgb['Feature'] = X_train[xgboost.feature_names_in_].columns

# Ordena el DataFrame por importancia
feature_importance_xgb = feature_importance_xgb.sort_values(by='Importance', ascending=False)

feature_importance_xgb = feature_importance_xgb.set_index('Feature')
feature_importance_xgb = feature_importance_xgb.rename(columns={'Importance': 'Importance_xgb'})



In [106]:
model_importance = pd.merge(feature_importance_rf,feature_importance_xgb,on='Feature',how='left')
model_importance['Importance'] = model_importance['Importance_rf']+model_importance['Importance_xgb']
model_importance = model_importance.sort_values(by='Importance',ascending=False)

In [107]:
model_importance

Unnamed: 0_level_0,Importance_rf,Importance_xgb,Importance
Feature,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Campaign_Type_SEARCH,0.007232,0.231537,0.238770
Campaign_Type_LINK_CLICKS,0.139073,0.008760,0.147833
Campaign_Type_REACH,0.125877,0.000063,0.125940
Campaign_Type_NoType,0.000989,0.071593,0.072582
Bench FB AvgVR,0.069897,0.000000,0.069897
...,...,...,...
Bench FB CTR,0.000000,0.000000,0.000000
Bench FB AvgCR,0.000000,0.000000,0.000000
Bench YT CPV,0.000000,0.000000,0.000000
Bench YT CTR,0.000000,0.000000,0.000000


In [108]:
model_importance.to_csv('model_importance_CPC_v4.csv')

In [109]:
grupos_features = pd.read_csv('Grupos_features_v4.csv')

In [110]:
model_importance = pd.merge(model_importance,grupos_features,on='Feature',how='left')
model_importance_gr = model_importance.groupby('Grupo').sum('Importance')['Importance'].sort_values(ascending=False)

In [111]:
model_importance_gr = pd.DataFrame(model_importance_gr).reset_index()

In [114]:
model_importance_gr

Unnamed: 0,Grupo,Importance
0,Campaign_Type,0.660375
1,Objective,0.137835
2,Country,0.131394
3,Industria,0.123467
4,Strategy,0.081034
5,X_pca,0.07947
6,Mes,0.064006
7,Media_type,0.061082
8,Platform,0.054785
9,Cost,0.044576


In [115]:
chart = alt.Chart(model_importance_gr).mark_bar(color='green').encode(
    x=alt.X('Importance:Q', title='Importancia'),
    y=alt.Y('Grupo:N', title='Grupo', sort='-x'),
).properties(
    title='CPC sin Client'
)

In [116]:
chart