# GENERACIÓN DE MODELOS

##### Autor: 
* Javier Tomás Fernández Martín

## 0. Preliminares

Antes de empezar con el código, importamos todas las librerias que vamos a necesitar.

In [36]:
import pandas as pd
import sklearn
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import KBinsDiscretizer, MinMaxScaler, OneHotEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.tree import DecisionTreeClassifier
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from catboost import CatBoostClassifier
from sklearn.linear_model import Perceptron
from sklearn.metrics import check_scoring
from sklearn.model_selection import GridSearchCV, RepeatedStratifiedKFold, train_test_split
from sklearn.ensemble import AdaBoostClassifier, BaggingClassifier, GradientBoostingClassifier, HistGradientBoostingClassifier, RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier


from sklearn.compose import make_column_transformer


from keras import layers
from keras import models
import tensorflow.keras as keras
import tensorflow as tf



Además de definir una semilla para asegurar que sea reproducible

In [37]:
random_state = 42


Y definimos un par de funciones que usaremos para evaluar módelos con distintos parámetros y compararlos entre sí

In [38]:
def optimize_params(estimator, X, y, cv, scoring=None, refit=True, **param_grid):
    """Exhaustive search over specified parameter values for an estimator."""
    grid_search_cv = GridSearchCV(estimator,
                                  param_grid,
                                  scoring=scoring,
                                  refit=refit,
                                  cv=cv,
                                  return_train_score=True).fit(X, y)

    cv_results = pd.DataFrame(grid_search_cv.cv_results_)

    # Drop the results for each validation split and sort by the refit metric
    labels = cv_results.filter(regex="split")
    by = cv_results.filter(regex="rank_test").columns[0]
    cv_results = cv_results.drop(labels, axis=1).sort_values(by)

    display(cv_results)

    return grid_search_cv

## 1.Carga de datos

In [39]:
df = pd.read_csv('bbdd.csv', index_col=0)


  df = pd.read_csv('bbdd.csv', index_col=0)


Mostramos una pequeña muestra para asegurar que se ha cargado bien y de paso echarle un vistazo a nuestra BBDD

In [40]:
df.sample(5, random_state=random_state)

Unnamed: 0,P1_TFT8_Ashe_Tier,P1_TFT8_Ashe_Obj1,P1_TFT8_Ashe_Obj2,P1_TFT8_Ashe_Obj3,P1_TFT8_Blitzcrank_Tier,P1_TFT8_Blitzcrank_Obj1,P1_TFT8_Blitzcrank_Obj2,P1_TFT8_Blitzcrank_Obj3,P1_TFT8_Galio_Tier,P1_TFT8_Galio_Obj1,...,P2_TFT8_Syndra_Obj2,P2_TFT8_Syndra_Obj3,P2_TFT8_Urgot_Tier,P2_TFT8_Urgot_Obj1,P2_TFT8_Urgot_Obj2,P2_TFT8_Urgot_Obj3,P2_Augment1,P2_Augment2,P2_Augment3,P1_Win
3883,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,TFT6_Augment_SecondWind1,TFT6_Augment_TradeSectorPlus,TFT8_Augment_KaisaCarry,1
2439,0,0,0,0,3,TFT_Item_BrambleVest,TFT_Item_IonicSpark,TFT8_Item_GenAEEmblemItem,0,0,...,TFT_Item_StatikkShiv,0,0,0,0,0,TFT8_Augment_HeartTrait,TFT8_Augment_AnnieSupport,TFT8_Augment_GenAEEmblem,0
1786,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,TFT6_Augment_SecondWind2,TFT6_Augment_Electrocharge2,TFT8_Augment_NunuSupport,1
2329,0,0,0,0,2,0,0,0,0,0,...,0,0,2,0,0,0,TFT6_Augment_ClearMind,TFT6_Augment_PortableForge,TFT8_Augment_ApheliosCarry,1
2567,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,TFT6_Augment_GrandGambler,TFT7_Augment_Preparation2,TFT8_Augment_SonaExile,0


Convertimos todas las columnas categóricas en tipo de datos de cadena de texto y separar las columnas numéricas y categóricas, para después tenerlas accesibles


In [41]:

cat_cols = [col for col in df.columns if 'Obj' in col or 'Augment' in col]
df[cat_cols] = df[cat_cols].astype(str)



num_cols = [col for col in df.columns if col not in cat_cols + ['P1_Win']]
cat_cols_idx = [df.columns.get_loc(col) for col in cat_cols]


Divido los datos

In [42]:

X = df.drop('P1_Win', axis=1)
X = X.filter(like='Tier')
y = df['P1_Win']


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=random_state, stratify=y)

In [43]:

cat_cols = [col for col in X.columns if 'Obj' in col or 'Augment' in col]



num_cols = [col for col in X.columns if col not in cat_cols + ['P1_Win']]

Y comprobamos que se han cargado bien

In [44]:
X.shape

(4263, 118)

In [45]:
X_train.sample(5, random_state=random_state)

Unnamed: 0,P1_TFT8_Ashe_Tier,P1_TFT8_Blitzcrank_Tier,P1_TFT8_Galio_Tier,P1_TFT8_Gangplank_Tier,P1_TFT8_Kayle_Tier,P1_TFT8_Lulu_Tier,P1_TFT8_Nasus_Tier,P1_TFT8_Lux_Tier,P1_TFT8_Poppy_Tier,P1_TFT8_Renekton_Tier,...,P2_TFT8_Zac_Tier,P2_TFT8_Zed_Tier,P2_TFT8_Aphelios_Tier,P2_TFT8_Fiddlesticks_Tier,P2_TFT8_Janna_Tier,P2_TFT8_Leona_Tier,P2_TFT8_Mordekaiser_Tier,P2_TFT8_Nunu_Tier,P2_TFT8_Syndra_Tier,P2_TFT8_Urgot_Tier
1248,0,2,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
12,0,0,0,2,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,2,0
3730,0,0,0,2,0,0,0,0,0,0,...,2,0,0,1,0,1,0,2,0,1
879,2,0,0,0,0,0,0,0,0,2,...,0,2,0,2,1,0,0,0,0,0
4143,0,2,0,0,0,0,0,0,0,0,...,0,0,1,2,0,1,0,0,0,1


In [46]:
y_train.sample(5, random_state=random_state)

1248    0
12      1
3730    1
879     0
4143    1
Name: P1_Win, dtype: int64

In [47]:
X_test.sample(5, random_state=random_state)

Unnamed: 0,P1_TFT8_Ashe_Tier,P1_TFT8_Blitzcrank_Tier,P1_TFT8_Galio_Tier,P1_TFT8_Gangplank_Tier,P1_TFT8_Kayle_Tier,P1_TFT8_Lulu_Tier,P1_TFT8_Nasus_Tier,P1_TFT8_Lux_Tier,P1_TFT8_Poppy_Tier,P1_TFT8_Renekton_Tier,...,P2_TFT8_Zac_Tier,P2_TFT8_Zed_Tier,P2_TFT8_Aphelios_Tier,P2_TFT8_Fiddlesticks_Tier,P2_TFT8_Janna_Tier,P2_TFT8_Leona_Tier,P2_TFT8_Mordekaiser_Tier,P2_TFT8_Nunu_Tier,P2_TFT8_Syndra_Tier,P2_TFT8_Urgot_Tier
1531,0,2,0,0,0,0,0,0,0,0,...,2,0,0,0,0,0,0,0,0,0
3155,0,0,2,3,0,0,3,0,0,0,...,0,0,1,0,2,1,0,0,0,0
1264,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
637,0,0,0,0,0,0,0,2,0,0,...,0,0,0,0,0,0,0,0,0,0
968,0,0,0,0,0,0,0,0,0,0,...,0,0,0,2,0,0,1,0,2,0


In [48]:
y_test.sample(5, random_state=random_state)

1531    1
3155    1
1264    0
637     1
968     1
Name: P1_Win, dtype: int64

## 2. Generación de modelos

Primero vamos a crear el preprocesamiento que vamos a aplicar a los datos. 

In [49]:
preprocessor = make_column_transformer(
    ((MinMaxScaler(feature_range=(0, 1))), num_cols),
    (OneHotEncoder(handle_unknown="ignore", sparse=False), cat_cols),
    remainder="passthrough"
)

Voy a utilizar la técnica de validación cruzada estratificada de 1 x 10 en la mayoría de modelos

In [50]:
n_splits = 10
n_repeats = 1

cv = RepeatedStratifiedKFold(n_splits=n_splits, n_repeats=n_repeats, random_state=random_state)


### 2.1 VECINOS MÁS CERCANOS

In [51]:
n_neighbors = 5
k_neighbors_model = KNeighborsClassifier(n_neighbors)

pipelineKNN = make_pipeline(preprocessor, k_neighbors_model)

In [52]:
pipelineKNN.fit(X_train, y_train)

Pipeline(steps=[('columntransformer',
                 ColumnTransformer(remainder='passthrough',
                                   transformers=[('minmaxscaler',
                                                  MinMaxScaler(),
                                                  ['P1_TFT8_Ashe_Tier',
                                                   'P1_TFT8_Blitzcrank_Tier',
                                                   'P1_TFT8_Galio_Tier',
                                                   'P1_TFT8_Gangplank_Tier',
                                                   'P1_TFT8_Kayle_Tier',
                                                   'P1_TFT8_Lulu_Tier',
                                                   'P1_TFT8_Nasus_Tier',
                                                   'P1_TFT8_Lux_Tier',
                                                   'P1_TFT8_Poppy_Tier',
                                                   'P1_TFT8_Renekton_Tier',
                                

In [53]:
accKNN = pipelineKNN.score(X_test, y_test)

print('Train score:', pipelineKNN.score(X_train, y_train))
print('Test score:', accKNN)

Train score: 0.7281524926686217
Test score: 0.5967174677608441


Como el número de vecinos puede ser muy relevante a la hora de evaluar este tipo de modelos, merece la pena comprobar que no se puede mejorar el resultado obtenido optimizando dicho parámetro

In [54]:
n_splits = 10
n_repeats = 5

cv_knn = RepeatedStratifiedKFold(n_splits=n_splits, n_repeats=n_repeats, random_state=random_state)

In [55]:
n_neighbors = [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
weights = ["uniform", "distance"]

k_neighbors_classifier = optimize_params(pipelineKNN, X_train, y_train,cv_knn, kneighborsclassifier__weights=weights, kneighborsclassifier__n_neighbors=n_neighbors)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_kneighborsclassifier__n_neighbors,param_kneighborsclassifier__weights,params,mean_test_score,std_test_score,rank_test_score,mean_train_score,std_train_score
11,0.011271,0.001154,0.02547,0.001931,10,distance,"{'kneighborsclassifier__n_neighbors': 10, 'kne...",0.584282,0.02275,1,1.0,0.0
7,0.011298,0.002027,0.025846,0.002188,8,distance,"{'kneighborsclassifier__n_neighbors': 8, 'knei...",0.579765,0.0214,2,1.0,0.0
4,0.010818,0.001992,0.029812,0.001856,7,uniform,"{'kneighborsclassifier__n_neighbors': 7, 'knei...",0.579589,0.02181,3,0.700124,0.004062
9,0.011391,0.001687,0.025805,0.00159,9,distance,"{'kneighborsclassifier__n_neighbors': 9, 'knei...",0.579355,0.021447,4,1.0,0.0
5,0.010542,0.00174,0.026668,0.004281,7,distance,"{'kneighborsclassifier__n_neighbors': 7, 'knei...",0.579238,0.02175,5,1.0,0.0
15,0.011165,0.001393,0.024948,0.001669,12,distance,"{'kneighborsclassifier__n_neighbors': 12, 'kne...",0.57824,0.022592,6,1.0,0.0
13,0.011317,0.001545,0.02526,0.001759,11,distance,"{'kneighborsclassifier__n_neighbors': 11, 'kne...",0.577713,0.021693,7,1.0,0.0
8,0.011608,0.001636,0.032119,0.008572,9,uniform,"{'kneighborsclassifier__n_neighbors': 9, 'knei...",0.577654,0.021498,8,0.684503,0.004511
3,0.010362,0.001399,0.02569,0.002248,6,distance,"{'kneighborsclassifier__n_neighbors': 6, 'knei...",0.577126,0.022457,9,1.0,0.0
12,0.011193,0.001191,0.030263,0.002244,11,uniform,"{'kneighborsclassifier__n_neighbors': 11, 'kne...",0.576246,0.021552,10,0.67334,0.004757


### 2.2 DECISION TREE 

Crear el pipeline

In [56]:
pipeline = Pipeline(
    steps=[
        ('preprocessor', preprocessor),
        ('classifier', DecisionTreeClassifier())
    ])


Entrenar, predecir y evaluar

In [57]:
pipeline.fit(X_train, y_train)

Pipeline(steps=[('preprocessor',
                 ColumnTransformer(remainder='passthrough',
                                   transformers=[('minmaxscaler',
                                                  MinMaxScaler(),
                                                  ['P1_TFT8_Ashe_Tier',
                                                   'P1_TFT8_Blitzcrank_Tier',
                                                   'P1_TFT8_Galio_Tier',
                                                   'P1_TFT8_Gangplank_Tier',
                                                   'P1_TFT8_Kayle_Tier',
                                                   'P1_TFT8_Lulu_Tier',
                                                   'P1_TFT8_Nasus_Tier',
                                                   'P1_TFT8_Lux_Tier',
                                                   'P1_TFT8_Poppy_Tier',
                                                   'P1_TFT8_Renekton_Tier',
                                     

In [58]:
accTree = pipeline.score(X_test, y_test)
print('Train score:', pipeline.score(X_train, y_train))
print('Test score:', accTree)

Train score: 1.0
Test score: 0.570926143024619


Se muestra la matriz de confusión

In [59]:
y_pred = pipeline.predict(X_test)

In [60]:
print(sklearn.metrics.confusion_matrix(y_test,y_pred))

[[249 178]
 [188 238]]


Una vez tenemos la estructura de un arbol básico, vamos a probar a evaluarlo con distintos parámetros

In [61]:
pipeline = make_pipeline(preprocessor, DecisionTreeClassifier(random_state=random_state))

In [62]:
criterion = ["gini", "entropy"]
max_depth = [1, 2, 3, 4, 5, 6, None]
ccp_alpha = [0.0, 0.01, 0.02, 0.03, 0.04, 0.05]

decision_tree_classifier = optimize_params(pipeline, X_train, y_train, cv, decisiontreeclassifier__criterion=criterion, decisiontreeclassifier__max_depth=max_depth, decisiontreeclassifier__ccp_alpha=ccp_alpha)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_decisiontreeclassifier__ccp_alpha,param_decisiontreeclassifier__criterion,param_decisiontreeclassifier__max_depth,params,mean_test_score,std_test_score,rank_test_score,mean_train_score,std_train_score
3,0.020804,0.002394,0.002603,0.000664,0.0,gini,4,"{'decisiontreeclassifier__ccp_alpha': 0.0, 'de...",0.571261,0.022094,1,0.591919,0.008649
10,0.020806,0.001989,0.002902,0.000538,0.0,entropy,4,"{'decisiontreeclassifier__ccp_alpha': 0.0, 'de...",0.567449,0.021076,2,0.588759,0.008446
12,0.027691,0.002567,0.003188,0.000857,0.0,entropy,6,"{'decisiontreeclassifier__ccp_alpha': 0.0, 'de...",0.567155,0.018370,3,0.614109,0.010874
13,0.059893,0.003533,0.003610,0.000496,0.0,entropy,,"{'decisiontreeclassifier__ccp_alpha': 0.0, 'de...",0.564223,0.027858,4,1.000000,0.000000
5,0.026884,0.002358,0.003413,0.000499,0.0,gini,6,"{'decisiontreeclassifier__ccp_alpha': 0.0, 'de...",0.562170,0.019935,5,0.629651,0.011379
...,...,...,...,...,...,...,...,...,...,...,...,...,...
47,0.022603,0.001261,0.002803,0.000601,0.03,gini,6,"{'decisiontreeclassifier__ccp_alpha': 0.03, 'd...",0.500880,0.001173,22,0.500880,0.000130
48,0.046925,0.002516,0.002848,0.000443,0.03,gini,,"{'decisiontreeclassifier__ccp_alpha': 0.03, 'd...",0.500880,0.001173,22,0.500880,0.000130
49,0.012412,0.001021,0.002558,0.000472,0.03,entropy,1,"{'decisiontreeclassifier__ccp_alpha': 0.03, 'd...",0.500880,0.001173,22,0.500880,0.000130
51,0.018337,0.001981,0.002709,0.000644,0.03,entropy,3,"{'decisiontreeclassifier__ccp_alpha': 0.03, 'd...",0.500880,0.001173,22,0.500880,0.000130


### 2.3 RANDOM FOREST

In [63]:
random_forest_model = RandomForestClassifier(random_state=random_state)

pipelineRandom = make_pipeline(preprocessor, random_forest_model)

In [64]:
pipelineRandom.fit(X_train, y_train)

Pipeline(steps=[('columntransformer',
                 ColumnTransformer(remainder='passthrough',
                                   transformers=[('minmaxscaler',
                                                  MinMaxScaler(),
                                                  ['P1_TFT8_Ashe_Tier',
                                                   'P1_TFT8_Blitzcrank_Tier',
                                                   'P1_TFT8_Galio_Tier',
                                                   'P1_TFT8_Gangplank_Tier',
                                                   'P1_TFT8_Kayle_Tier',
                                                   'P1_TFT8_Lulu_Tier',
                                                   'P1_TFT8_Nasus_Tier',
                                                   'P1_TFT8_Lux_Tier',
                                                   'P1_TFT8_Poppy_Tier',
                                                   'P1_TFT8_Renekton_Tier',
                                

In [65]:
accRandom = pipelineRandom.score(X_test, y_test)

print('Train score:', pipelineRandom.score(X_train, y_train))
print('Test score:', accRandom)

Train score: 1.0
Test score: 0.6307151230949589


### 2.4 CAT BOOST

In [66]:
catBoost = CatBoostClassifier(
    learning_rate=0.1,
    random_state=random_state,
    l2_leaf_reg = 0.2,
    cat_features=cat_cols)



catBoost.fit(X_train, y_train)

0:	learn: 0.6887639	total: 3.34ms	remaining: 3.34s
1:	learn: 0.6837628	total: 5.66ms	remaining: 2.82s
2:	learn: 0.6785010	total: 7.54ms	remaining: 2.51s
3:	learn: 0.6729765	total: 9.88ms	remaining: 2.46s
4:	learn: 0.6693966	total: 12.4ms	remaining: 2.47s
5:	learn: 0.6655104	total: 14.6ms	remaining: 2.41s
6:	learn: 0.6618946	total: 16.5ms	remaining: 2.34s
7:	learn: 0.6583248	total: 18.7ms	remaining: 2.32s
8:	learn: 0.6551885	total: 20.8ms	remaining: 2.29s
9:	learn: 0.6513089	total: 22.8ms	remaining: 2.25s
10:	learn: 0.6479270	total: 25ms	remaining: 2.25s
11:	learn: 0.6445619	total: 27.4ms	remaining: 2.25s
12:	learn: 0.6407275	total: 29.5ms	remaining: 2.24s
13:	learn: 0.6380312	total: 31.7ms	remaining: 2.23s
14:	learn: 0.6363535	total: 34ms	remaining: 2.23s
15:	learn: 0.6340796	total: 36.7ms	remaining: 2.25s
16:	learn: 0.6314014	total: 39.3ms	remaining: 2.27s
17:	learn: 0.6290790	total: 41.7ms	remaining: 2.28s
18:	learn: 0.6269138	total: 44.5ms	remaining: 2.3s
19:	learn: 0.6243665	total:

<catboost.core.CatBoostClassifier at 0x1f9eeb4e050>

In [67]:
accCat = catBoost.score(X_test, y_test)

print('Train score:', catBoost.score(X_train, y_train))
print('Test score:', accCat)

Train score: 1.0
Test score: 0.6412661195779601


Como el modelo sobreajusta (train score = 1), se prueba otro modelo con menos árboles

In [68]:
catBoost2 = CatBoostClassifier(
    learning_rate=0.1,
    random_state=random_state,
    l2_leaf_reg = 0.2,
    cat_features=cat_cols,
    n_estimators=150)



catBoost2.fit(X_train, y_train)

0:	learn: 0.6887639	total: 2.7ms	remaining: 402ms


1:	learn: 0.6837628	total: 5.29ms	remaining: 392ms
2:	learn: 0.6785010	total: 7.66ms	remaining: 375ms
3:	learn: 0.6729765	total: 10ms	remaining: 366ms
4:	learn: 0.6693966	total: 12.3ms	remaining: 358ms
5:	learn: 0.6655104	total: 14.7ms	remaining: 352ms
6:	learn: 0.6618946	total: 17.2ms	remaining: 351ms
7:	learn: 0.6583248	total: 20ms	remaining: 355ms
8:	learn: 0.6551885	total: 22.6ms	remaining: 354ms
9:	learn: 0.6513089	total: 25.2ms	remaining: 353ms
10:	learn: 0.6479270	total: 27.9ms	remaining: 352ms
11:	learn: 0.6445619	total: 30ms	remaining: 345ms
12:	learn: 0.6407275	total: 32.1ms	remaining: 338ms
13:	learn: 0.6380312	total: 34.7ms	remaining: 337ms
14:	learn: 0.6363535	total: 36.8ms	remaining: 331ms
15:	learn: 0.6340796	total: 38.9ms	remaining: 325ms
16:	learn: 0.6314014	total: 41.4ms	remaining: 324ms
17:	learn: 0.6290790	total: 43.8ms	remaining: 321ms
18:	learn: 0.6269138	total: 45.8ms	remaining: 316ms
19:	learn: 0.6243665	total: 47.9ms	remaining: 311ms
20:	learn: 0.6227816	total:

<catboost.core.CatBoostClassifier at 0x1f9eeb4ce50>

In [69]:
accCat2 = catBoost2.score(X_test, y_test)

print('Train score:', catBoost2.score(X_train, y_train))
print('Test score:', accCat2)

Train score: 0.8912023460410557
Test score: 0.64947245017585


### 2.5 Adaptive boosting

In [70]:
adaboost_model = AdaBoostClassifier(random_state=random_state)


pipelineAdaBoost = make_pipeline(preprocessor, adaboost_model)

In [71]:
pipelineAdaBoost.fit(X_train, y_train)

Pipeline(steps=[('columntransformer',
                 ColumnTransformer(remainder='passthrough',
                                   transformers=[('minmaxscaler',
                                                  MinMaxScaler(),
                                                  ['P1_TFT8_Ashe_Tier',
                                                   'P1_TFT8_Blitzcrank_Tier',
                                                   'P1_TFT8_Galio_Tier',
                                                   'P1_TFT8_Gangplank_Tier',
                                                   'P1_TFT8_Kayle_Tier',
                                                   'P1_TFT8_Lulu_Tier',
                                                   'P1_TFT8_Nasus_Tier',
                                                   'P1_TFT8_Lux_Tier',
                                                   'P1_TFT8_Poppy_Tier',
                                                   'P1_TFT8_Renekton_Tier',
                                

In [72]:
accAdaBoost = pipelineAdaBoost.score(X_test, y_test)

print('Train score:', pipelineAdaBoost.score(X_train, y_train))
print('Test score:', accAdaBoost)

Train score: 0.6777126099706745
Test score: 0.6565064478311841


In [73]:

base_estimator = DecisionTreeClassifier(random_state=random_state)
base_estimator = [base_estimator]
n_estimators = [20, 50, 100]
learning_rate = [0.95, 1.0]
max_depth = [1, 2, 3]
criterion = ["gini", "entropy"]


adaboost_classifier = optimize_params(pipelineAdaBoost, X_train, y_train, cv, adaboostclassifier__base_estimator=base_estimator, adaboostclassifier__n_estimators=n_estimators, adaboostclassifier__learning_rate=learning_rate, adaboostclassifier__base_estimator__criterion = criterion, adaboostclassifier__base_estimator__max_depth=max_depth)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_adaboostclassifier__base_estimator,param_adaboostclassifier__base_estimator__criterion,param_adaboostclassifier__base_estimator__max_depth,param_adaboostclassifier__learning_rate,param_adaboostclassifier__n_estimators,params,mean_test_score,std_test_score,rank_test_score,mean_train_score,std_train_score
20,0.566888,0.034601,0.01932,0.002712,"DecisionTreeClassifier(criterion='entropy', ma...",entropy,1,0.95,100,{'adaboostclassifier__base_estimator': Decisio...,0.65044,0.024528,1,0.696546,0.003015
2,0.503171,0.011688,0.01991,0.001292,"DecisionTreeClassifier(criterion='entropy', ma...",gini,1,0.95,100,{'adaboostclassifier__base_estimator': Decisio...,0.648974,0.023826,2,0.695471,0.002491
5,0.494703,0.009781,0.018623,0.002038,"DecisionTreeClassifier(criterion='entropy', ma...",gini,1,1.0,100,{'adaboostclassifier__base_estimator': Decisio...,0.648094,0.023534,3,0.696188,0.004176
23,0.49386,0.011809,0.018365,0.001738,"DecisionTreeClassifier(criterion='entropy', ma...",entropy,1,1.0,100,{'adaboostclassifier__base_estimator': Decisio...,0.646041,0.024042,4,0.694982,0.004013
26,0.732753,0.037831,0.019965,0.002664,"DecisionTreeClassifier(criterion='entropy', ma...",entropy,2,0.95,100,{'adaboostclassifier__base_estimator': Decisio...,0.645748,0.019751,5,0.787846,0.008067
7,0.36434,0.013752,0.010659,0.00141,"DecisionTreeClassifier(criterion='entropy', ma...",gini,2,0.95,50,{'adaboostclassifier__base_estimator': Decisio...,0.644575,0.026157,6,0.743728,0.004645
8,0.711537,0.018914,0.018618,0.002202,"DecisionTreeClassifier(criterion='entropy', ma...",gini,2,0.95,100,{'adaboostclassifier__base_estimator': Decisio...,0.644575,0.020266,7,0.790942,0.003603
29,0.724235,0.02303,0.018395,0.001774,"DecisionTreeClassifier(criterion='entropy', ma...",entropy,2,1.0,100,{'adaboostclassifier__base_estimator': Decisio...,0.643988,0.026367,8,0.791235,0.006172
22,0.265771,0.00856,0.011321,0.001712,"DecisionTreeClassifier(criterion='entropy', ma...",entropy,1,1.0,50,{'adaboostclassifier__base_estimator': Decisio...,0.641056,0.02238,9,0.676703,0.006585
25,0.352993,0.010273,0.010581,0.001116,"DecisionTreeClassifier(criterion='entropy', ma...",entropy,2,0.95,50,{'adaboostclassifier__base_estimator': Decisio...,0.640762,0.017011,10,0.738351,0.003524


### 2.6 GRADIENT BOOSTING


In [74]:
gradient_boosting_model = GradientBoostingClassifier(random_state=random_state)


pipelineGradientBoost = Pipeline(
    steps=[
        ('preprocessor', preprocessor),
        ('classifier', gradient_boosting_model)
    ])


In [75]:
pipelineGradientBoost.fit(X_train, y_train)

Pipeline(steps=[('preprocessor',
                 ColumnTransformer(remainder='passthrough',
                                   transformers=[('minmaxscaler',
                                                  MinMaxScaler(),
                                                  ['P1_TFT8_Ashe_Tier',
                                                   'P1_TFT8_Blitzcrank_Tier',
                                                   'P1_TFT8_Galio_Tier',
                                                   'P1_TFT8_Gangplank_Tier',
                                                   'P1_TFT8_Kayle_Tier',
                                                   'P1_TFT8_Lulu_Tier',
                                                   'P1_TFT8_Nasus_Tier',
                                                   'P1_TFT8_Lux_Tier',
                                                   'P1_TFT8_Poppy_Tier',
                                                   'P1_TFT8_Renekton_Tier',
                                     

In [76]:
accGradientBoost = pipelineGradientBoost.score(X_test, y_test)


print('Train score:', pipelineGradientBoost.score(X_train, y_train))
print('Test score:', accGradientBoost)

Train score: 0.7390029325513197
Test score: 0.6471277842907386


### 2.7 HISTOGRAM GRADIENT BOOSTING

In [77]:
hist_gradient_boosting_model = HistGradientBoostingClassifier(random_state=random_state)


pipelineHist = Pipeline(
    steps=[
        ('preprocessor', preprocessor),
        ('classifier', hist_gradient_boosting_model)
    ])


In [78]:
pipelineHist.fit(X_train, y_train)

Pipeline(steps=[('preprocessor',
                 ColumnTransformer(remainder='passthrough',
                                   transformers=[('minmaxscaler',
                                                  MinMaxScaler(),
                                                  ['P1_TFT8_Ashe_Tier',
                                                   'P1_TFT8_Blitzcrank_Tier',
                                                   'P1_TFT8_Galio_Tier',
                                                   'P1_TFT8_Gangplank_Tier',
                                                   'P1_TFT8_Kayle_Tier',
                                                   'P1_TFT8_Lulu_Tier',
                                                   'P1_TFT8_Nasus_Tier',
                                                   'P1_TFT8_Lux_Tier',
                                                   'P1_TFT8_Poppy_Tier',
                                                   'P1_TFT8_Renekton_Tier',
                                     

In [79]:
accHist = pipelineHist.score(X_test, y_test)


print('Train score:', pipelineHist.score(X_train, y_train))
print('Test score:', accHist)

Train score: 0.895307917888563
Test score: 0.6283704572098476


### 2.8 PERCEPTRON

In [80]:
perceptron_model = Perceptron(random_state = random_state)
pipelinePerceptron = make_pipeline(preprocessor, perceptron_model)

In [81]:
pipelinePerceptron.fit(X_train, y_train)

Pipeline(steps=[('columntransformer',
                 ColumnTransformer(remainder='passthrough',
                                   transformers=[('minmaxscaler',
                                                  MinMaxScaler(),
                                                  ['P1_TFT8_Ashe_Tier',
                                                   'P1_TFT8_Blitzcrank_Tier',
                                                   'P1_TFT8_Galio_Tier',
                                                   'P1_TFT8_Gangplank_Tier',
                                                   'P1_TFT8_Kayle_Tier',
                                                   'P1_TFT8_Lulu_Tier',
                                                   'P1_TFT8_Nasus_Tier',
                                                   'P1_TFT8_Lux_Tier',
                                                   'P1_TFT8_Poppy_Tier',
                                                   'P1_TFT8_Renekton_Tier',
                                

In [82]:
accPerceptron = pipelinePerceptron.score(X_test, y_test)

print('Train score:', pipelinePerceptron.score(X_train, y_train))
print('Test score:', accPerceptron)

Train score: 0.6099706744868035
Test score: 0.5849941383352872


### 2.9 REDES NEURONALES

In [83]:
X_train_preprocessed = preprocessor.fit_transform(X_train)
X_test_preprocessed = preprocessor.transform(X_test)

In [84]:
callbacks1 = [
    tf.keras.callbacks.ModelCheckpoint(
        filepath="callbacks1.keras",
        save_best_only=True,
        monitor="val_loss")
]


In [85]:
model = models.Sequential()

model.add(layers.Dense(units=32, activation='relu', input_dim=X_train_preprocessed.shape[1]))

model.add(layers.Dropout(0.15))
model.add(layers.Dense(units=1, activation='sigmoid'))


In [86]:
model.compile(optimizer="Adam",
    loss="binary_crossentropy",
    metrics=["accuracy"])

In [87]:
history1 = model.fit(X_train_preprocessed, y_train, epochs=20, batch_size=32, validation_data=(X_test_preprocessed, y_test), callbacks=callbacks1)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [88]:
best1 = keras.models.load_model('callbacks1.keras')


In [89]:
model2 = models.Sequential()

model2.add(layers.Dense(units=32, activation='relu', input_dim=X_train_preprocessed.shape[1]))
model2.add(layers.Dropout(0.15))
model2.add(layers.Dense(units=64, activation='relu'))
model2.add(layers.Dropout(0.15))
model2.add(layers.Dense(units=128, activation='relu'))
model2.add(layers.Dropout(0.15))
model2.add(layers.Dense(units=64, activation='relu'))
model2.add(layers.Dense(units=1, activation='sigmoid'))


In [90]:
model2.compile(optimizer="Adam",
    loss="binary_crossentropy",
    metrics=["accuracy"])

In [91]:
callbacks2 = [
    tf.keras.callbacks.ModelCheckpoint(
        filepath="callbacks2.keras",
        save_best_only=True,
        monitor="val_loss")
]


In [92]:
history2 = model2.fit(X_train_preprocessed, y_train, epochs=20, batch_size=32, validation_data=(X_test_preprocessed, y_test), callbacks=callbacks2)

Epoch 1/20


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [93]:
best2 = keras.models.load_model('callbacks2.keras')


In [97]:
loss1, accRRNN1 = best1.evaluate(X_test, y_test)
loss2, accRRNN2 = best2.evaluate(X_test, y_test)



## 3. EVALUACIÓN DE MODELOS

In [98]:
models = pd.DataFrame({
    'Model': ['Decision Tree', 'CatBoost', 'AdaBoost', 'Grad boost', 'Histogram Grad','KNN', 'Random Forest', 'Perceptron', 'RRNN'],
    'Score': [accTree, accCat, accAdaBoost, accGradientBoost, accHist, accKNN, accRandom, accPerceptron, accRRNN1]})

models.sort_values(by='Score', ascending=False)

Unnamed: 0,Model,Score
8,RRNN,0.662368
2,AdaBoost,0.656506
3,Grad boost,0.647128
1,CatBoost,0.641266
6,Random Forest,0.630715
4,Histogram Grad,0.62837
5,KNN,0.596717
7,Perceptron,0.584994
0,Decision Tree,0.570926


Como se puede observar, no se obtienen buenas predicciones, y puede ser debido a la gran cantidad de 0 que tiene la base de datos. Lo comprobamos

In [123]:
print((df == "0").sum(axis="rows").to_string())


P1_TFT8_Ashe_Tier               0
P1_TFT8_Ashe_Obj1            4219
P1_TFT8_Ashe_Obj2            4248
P1_TFT8_Ashe_Obj3            4256
P1_TFT8_Blitzcrank_Tier         0
P1_TFT8_Blitzcrank_Obj1      4105
P1_TFT8_Blitzcrank_Obj2      4166
P1_TFT8_Blitzcrank_Obj3      4200
P1_TFT8_Galio_Tier              0
P1_TFT8_Galio_Obj1           4218
P1_TFT8_Galio_Obj2           4226
P1_TFT8_Galio_Obj3           4229
P1_TFT8_Gangplank_Tier          0
P1_TFT8_Gangplank_Obj1       4044
P1_TFT8_Gangplank_Obj2       4160
P1_TFT8_Gangplank_Obj3       4200
P1_TFT8_Kayle_Tier              0
P1_TFT8_Kayle_Obj1           4239
P1_TFT8_Kayle_Obj2           4245
P1_TFT8_Kayle_Obj3           4249
P1_TFT8_Lulu_Tier               0
P1_TFT8_Lulu_Obj1            4083
P1_TFT8_Lulu_Obj2            4136
P1_TFT8_Lulu_Obj3            4158
P1_TFT8_Nasus_Tier              0
P1_TFT8_Nasus_Obj1           4184
P1_TFT8_Nasus_Obj2           4230
P1_TFT8_Nasus_Obj3           4234
P1_TFT8_Lux_Tier                0
P1_TFT8_Lux_Ob

Y efectivamente, el 90% de los valores de los objetos de los campeones, al menos de coste 1, son valores nulos, introduciendo mucho ruido a los modelos y haciendo imposible crear modelos eficientes.