In [1]:
import os.path
import pandas as pd

import math
import numpy as np
from numpy import * 

import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

from sklearn import preprocessing
from sklearn import metrics
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_predict
from sklearn.model_selection import train_test_split
from sklearn.metrics import plot_confusion_matrix

from sklearn.neighbors import KNeighborsClassifier
from sklearn import linear_model
from sklearn.naive_bayes import GaussianNB
from sklearn import svm
from sklearn.tree import DecisionTreeClassifier
from sklearn.cluster import KMeans
from sklearn.linear_model import Perceptron
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import SGDClassifier
from sklearn.ensemble import RandomForestClassifier

import warnings
warnings.filterwarnings('ignore')

### 1.Preparacion de los datos

#### Lee el dataset que generamos anteriormente 

In [2]:
#Lee el excel generado en el otro documento .ipynb
path = "../DecatlonEstadistics/resources/data.xlsx"
norm_path = os.path.normpath(path) 
ddbb = pd.read_excel(norm_path)

del ddbb['1500m NF']

ddbb['Country']=ddbb['Country'].str.lower()

ddbb.columns = ['Position', 'Athlete', 'Age', 'Country', 'Total Points', 'Year', 'Competition', '100m', 
                 '100m Points', 'Lj', 'Lj Points', 'Sp', 'Sp Points', 'Hj', 'Hj Points', '400m', '400m Points', 
                 '110m H', '110m H Points', 'Dt', 'Dt Points', 'Pv', 'Pv Points', 'Jt', 'Jt Points', 
                 '1500m Points', '1500m']

ddbbData = ddbb[['100m','100m Points','Lj','Lj Points','Sp','Sp Points','Hj','Hj Points','400m Points','400m',
                 '110m H','110m H Points','Dt','Dt Points','Pv','Pv Points','Jt','Jt Points','1500m Points','1500m']]


#### Normaliza los datos del dataset entre [0,1]

In [3]:
#Normaliza los datos
normalizer = preprocessing.MinMaxScaler()
ddbbNormalized = normalizer.fit_transform(ddbbData)

#Introduce los datos normalizados en un nuevo en un dataframe
ddbbDataNorm = pd.DataFrame(ddbbNormalized)
ddbbDataNorm.columns  = ['100m','100m Points','Lj','Lj Points','Sp','Sp Points','Hj','Hj Points','400m Points','400m','110m H',
                '110m H Points','Dt','Dt Points','Pv','Pv Points','Jt','Jt Points','1500m Points','1500m']

#Da valore positivos a tiempos bajos en las pruebas de pista
for i in range(len(ddbbDataNorm['100m'])): ddbbDataNorm['100m'][i] = 1 - ddbbDataNorm['100m'][i]
for i in range(len(ddbbDataNorm['400m'])): ddbbDataNorm['400m'][i] = 1 - ddbbDataNorm['400m'][i]
for i in range(len(ddbbDataNorm['1500m'])): ddbbDataNorm['1500m'][i] = 1 - ddbbDataNorm['1500m'][i]
for i in range(len(ddbbDataNorm['110m H'])): ddbbDataNorm['110m H'][i] = 1 - ddbbDataNorm['110m H'][i]

#### Algoritmos y metricas que se van a estudiar

In [4]:
#Metricas de clasificacion basadas en la matriz de confusion
metricas = {
    'Accuracy': metrics.accuracy_score,
    'Precission': metrics.precision_score,
    'Recall': metrics.recall_score,
    'F1': metrics.f1_score,
}

In [5]:
#Diccionario con los algoritmos que se van a estudiar
algoritmos = {
    'KNN': KNeighborsClassifier(n_neighbors=5),
    #'REGLI': linear_model.LinearRegression(),
    'REGLO': linear_model.LogisticRegression(random_state=42),
    'GNB': GaussianNB(),
    'PERCEPTRON': Perceptron(tol=1e-5, random_state=1),
    'MLP': MLPClassifier(solver='lbfgs', alpha=1e-5,hidden_layer_sizes=(5, 2), random_state=1),
    'SVM': svm.SVC(kernel='linear'),
    'DESGRA': SGDClassifier(loss="hinge", penalty="l2"),
    'ARBOL': DecisionTreeClassifier(criterion="entropy", max_depth=5),
    'BOSQUE': RandomForestClassifier(n_estimators=100)
}

In [6]:
algoritmosClust = {
    'KMEANS': KMeans(n_clusters=1 , random_state= 5),
}

In [7]:
algoritmosIA = {
    
}

### 4.Hold Out

#### Metricas para los diferentes algoritmos -> Separacion de datos HoldOut

Funcion que genera las etiquetas predecidas para los diferentes algoritmos

In [8]:
#Funcion que genera predicciones para casos de test en función de la separación hecha por holdOut
def generaModelosHO(nombre,X_train, X_test, y_train, y_test):
    
    #Estudia el algoritmo pasado
    algoritmo = algoritmos[nombre]

    #Entrena el modelo
    algoritmo.fit(X_train, y_train)

    #Predice para los datos de test
    y_pred = algoritmo.predict(X_test)
    
    #Devuelve el las predicciones para los casos de test
    return(y_pred)

Funcion que devuelve para un algoritmo (Se le pasa las etiquetas reales y las generadas por el algoritmo) dado todas sus metricas.

Se usa tanto para Hold Out como Cross Validation

In [9]:
#Funcion para la evaluacion de las diferentes metricas
def evaluaMetricas(y_test, y_pred):
    resultado = {}
    for nombre, funcion in metricas.items():
        if(nombre == 'Recall' or nombre == 'Precission' or nombre == 'F1'):
            resultado[nombre] = funcion(y_test, y_pred,average='weighted')
        else:
            resultado[nombre] = funcion(y_test, y_pred)
            
    return resultado

Funcion que devuelve el resultado de las metricas para los algoritmos definidos en la parte superior

In [10]:
def mainHoldOut(ddbbData):
    y_pred = {}
    X_train, X_test, y_train, y_test = train_test_split(ddbbData, ddbb['Position'], test_size=0.3, random_state=42)

    for nombre, funcion in algoritmos.items():
        y_pred[nombre] = generaModelosHO(nombre,X_train, X_test, y_train, y_test)
        
    resultado = {}
    for nombre, funcionA in algoritmos.items():
        resultado[nombre] = evaluaMetricas(y_test, y_pred[nombre])
    
    #Tabla de metricas para los algoritmos estudiados
    return resultado

### 5.Cross Validation

#### Metricas para los diferentes algoritmos -> Separacion de datos CrossValidation

In [11]:
def generaModelosCV(funcion, ddbbData):
    y_pred = cross_val_predict(funcion, ddbbData, ddbb['Position'], cv=KFold(n_splits=10,random_state=42,shuffle=True))
    
    return y_pred

In [12]:
def mainCrossVal(ddbbData):
    y_pred = {}

    for nombre, funcion in algoritmos.items():
        y_pred[nombre] = generaModelosCV(funcion, ddbbData)
        
    resultado = {}
    for nombre, funcionA in algoritmos.items():
        resultado[nombre] = evaluaMetricas(ddbb['Position'], y_pred[nombre])
    
    #Tabla de metricas para los algoritmos estudiados
    return resultado

### 6.Comparativas de las metricas

#### Resultados de las metricas para HoldOut y CrossValidation de cada prueba

In [13]:
#PRUEBA -> 100m Lisos
ddbbData = ddbbDataNorm[['100m','100m Points']]
ho = mainHoldOut(ddbbData)
cv = mainCrossVal(ddbbData)

tabla=pd.DataFrame(ho)
tabla1=pd.DataFrame(cv)

print('METRICAS HOLD OUT-> 100m lisos')
tabla

METRICAS HOLD OUT-> 100m lisos


Unnamed: 0,KNN,REGLO,GNB,PERCEPTRON,MLP,SVM,DESGRA,ARBOL,BOSQUE
Accuracy,0.062745,0.047059,0.066667,0.039216,0.07451,0.047059,0.05098,0.054902,0.062745
Precission,0.044005,0.006458,0.042613,0.003916,0.021873,0.012014,0.007394,0.052794,0.085666
Recall,0.062745,0.047059,0.066667,0.039216,0.07451,0.047059,0.05098,0.054902,0.062745
F1,0.043435,0.011243,0.044211,0.007119,0.031055,0.017485,0.012036,0.042314,0.066802


In [14]:
print()
print()
print('METRICAS CROSS VALIDATION-> 100m lisos')
tablaExcelMetricas = tabla1.copy()
tabla1



METRICAS CROSS VALIDATION-> 100m lisos


Unnamed: 0,KNN,REGLO,GNB,PERCEPTRON,MLP,SVM,DESGRA,ARBOL,BOSQUE
Accuracy,0.056471,0.051765,0.044706,0.048235,0.062353,0.036471,0.04,0.055294,0.057647
Precission,0.048621,0.015632,0.017951,0.035838,0.035301,0.011897,0.033027,0.044179,0.055889
Recall,0.056471,0.051765,0.044706,0.048235,0.062353,0.036471,0.04,0.055294,0.057647
F1,0.045673,0.020995,0.024989,0.033611,0.038644,0.016946,0.032139,0.045135,0.056045


In [15]:
#PRUEBA -> Salto de longitud
ddbbData = ddbbDataNorm[['Lj','Lj Points']]

ho = mainHoldOut(ddbbData)
cv = mainCrossVal(ddbbData)

tabla=pd.DataFrame(ho)
tabla1=pd.DataFrame(cv)

print()
print()
print('METRICAS HOLD OUT-> Salto de longitud')
tabla



METRICAS HOLD OUT-> Salto de longitud


Unnamed: 0,KNN,REGLO,GNB,PERCEPTRON,MLP,SVM,DESGRA,ARBOL,BOSQUE
Accuracy,0.039216,0.027451,0.062745,0.039216,0.047059,0.031373,0.031373,0.05098,0.031373
Precission,0.019032,0.001911,0.028915,0.001588,0.012073,0.000984,0.001033,0.03754,0.039083
Recall,0.039216,0.027451,0.062745,0.039216,0.047059,0.031373,0.031373,0.05098,0.031373
F1,0.023732,0.003331,0.034327,0.003052,0.017413,0.001909,0.002,0.036317,0.031968


In [16]:
print()
print()
print('METRICAS CROSS VALIDATION-> Salto de longitud')
tablaExcelMetricas = pd.concat([tablaExcelMetricas, tabla1])
tabla1



METRICAS CROSS VALIDATION-> Salto de longitud


Unnamed: 0,KNN,REGLO,GNB,PERCEPTRON,MLP,SVM,DESGRA,ARBOL,BOSQUE
Accuracy,0.042353,0.021176,0.058824,0.029412,0.048235,0.02,0.04,0.049412,0.044706
Precission,0.0271,0.006142,0.074016,0.011114,0.019002,0.005971,0.038128,0.043914,0.043159
Recall,0.042353,0.021176,0.058824,0.029412,0.048235,0.02,0.04,0.049412,0.044706
F1,0.029041,0.009372,0.047598,0.015555,0.024399,0.009012,0.027759,0.042069,0.043419


In [17]:
#PRUEBA -> Lanzamiento de peso
ddbbData = ddbbDataNorm[['Sp','Sp Points']]

ho = mainHoldOut(ddbbData)
cv = mainCrossVal(ddbbData)

tabla=pd.DataFrame(ho)
tabla1=pd.DataFrame(cv)

print()
print()
print('METRICAS HOLD OUT-> Lanzamiento de peso')
tabla



METRICAS HOLD OUT-> Lanzamiento de peso


Unnamed: 0,KNN,REGLO,GNB,PERCEPTRON,MLP,SVM,DESGRA,ARBOL,BOSQUE
Accuracy,0.027451,0.023529,0.066667,0.054902,0.054902,0.031373,0.054902,0.047059,0.039216
Precission,0.009373,0.010928,0.017122,0.003026,0.00947,0.000984,0.004868,0.032102,0.033119
Recall,0.027451,0.023529,0.066667,0.054902,0.054902,0.031373,0.054902,0.047059,0.039216
F1,0.013544,0.012997,0.026868,0.005736,0.015639,0.001909,0.008791,0.034148,0.033999


In [18]:
print()
print()
print('METRICAS CROSS VALIDATION-> Lanzamiento de peso')
tablaExcelMetricas = pd.concat([tablaExcelMetricas, tabla1])
tabla1



METRICAS CROSS VALIDATION-> Lanzamiento de peso


Unnamed: 0,KNN,REGLO,GNB,PERCEPTRON,MLP,SVM,DESGRA,ARBOL,BOSQUE
Accuracy,0.032941,0.042353,0.055294,0.042353,0.052941,0.018824,0.051765,0.048235,0.028235
Precission,0.013588,0.015562,0.015167,0.016918,0.038552,0.009575,0.054581,0.031842,0.028547
Recall,0.032941,0.042353,0.055294,0.042353,0.052941,0.018824,0.051765,0.048235,0.028235
F1,0.018815,0.020978,0.022016,0.022201,0.027788,0.011055,0.040114,0.035789,0.028318


In [19]:
#PRUEBA -> Salto de altura
ddbbData = ddbbDataNorm[['Hj','Hj Points']]

ho = mainHoldOut(ddbbData)
cv = mainCrossVal(ddbbData)

tabla=pd.DataFrame(ho)
tabla1=pd.DataFrame(cv)

print()
print()
print('METRICAS HOLD OUT-> Salto de altura')
tabla



METRICAS HOLD OUT-> Salto de altura


Unnamed: 0,KNN,REGLO,GNB,PERCEPTRON,MLP,SVM,DESGRA,ARBOL,BOSQUE
Accuracy,0.043137,0.035294,0.058824,0.035294,0.047059,0.031373,0.043137,0.05098,0.05098
Precission,0.018287,0.011426,0.021051,0.001395,0.010295,0.001024,0.001868,0.02361,0.03548
Recall,0.043137,0.035294,0.058824,0.035294,0.047059,0.031373,0.043137,0.05098,0.05098
F1,0.024627,0.013173,0.02871,0.002684,0.015946,0.001984,0.003581,0.026567,0.038768


In [20]:
print()
print()
print('METRICAS CROSS VALIDATION-> Salto de altura')
tablaExcelMetricas = pd.concat([tablaExcelMetricas, tabla1])
tabla1



METRICAS CROSS VALIDATION-> Salto de altura


Unnamed: 0,KNN,REGLO,GNB,PERCEPTRON,MLP,SVM,DESGRA,ARBOL,BOSQUE
Accuracy,0.050588,0.023529,0.043529,0.032941,0.030588,0.02,0.043529,0.050588,0.034118
Precission,0.036495,0.008794,0.019101,0.012804,0.013285,0.009981,0.0192,0.030338,0.024975
Recall,0.050588,0.023529,0.043529,0.032941,0.030588,0.02,0.043529,0.050588,0.034118
F1,0.036584,0.011918,0.021359,0.016142,0.017215,0.011992,0.025808,0.035474,0.027847


In [21]:
#PRUEBA -> 400m lisos
ddbbData = ddbbDataNorm[['400m','400m Points']]

ho = mainHoldOut(ddbbData)
cv = mainCrossVal(ddbbData)

tabla=pd.DataFrame(ho)
tabla1=pd.DataFrame(cv)

print()
print()
print('METRICAS HOLD OUT-> 400m lisos')
tabla



METRICAS HOLD OUT-> 400m lisos


Unnamed: 0,KNN,REGLO,GNB,PERCEPTRON,MLP,SVM,DESGRA,ARBOL,BOSQUE
Accuracy,0.062745,0.039216,0.054902,0.035294,0.05098,0.031373,0.019608,0.054902,0.058824
Precission,0.048537,0.01216,0.046683,0.001435,0.007094,0.000984,0.045602,0.063206,0.077498
Recall,0.062745,0.039216,0.054902,0.035294,0.05098,0.031373,0.019608,0.054902,0.058824
F1,0.048932,0.015932,0.041559,0.002757,0.012189,0.001909,0.01145,0.047714,0.060738


In [22]:
print()
print()
print('METRICAS CROSS VALIDATION-> 400m lisos')
tablaExcelMetricas = pd.concat([tablaExcelMetricas, tabla1])
tabla1



METRICAS CROSS VALIDATION-> 400m lisos


Unnamed: 0,KNN,REGLO,GNB,PERCEPTRON,MLP,SVM,DESGRA,ARBOL,BOSQUE
Accuracy,0.055294,0.037647,0.045882,0.037647,0.042353,0.018824,0.044706,0.049412,0.065882
Precission,0.035644,0.014368,0.019558,0.021839,0.017707,0.008493,0.03026,0.03784,0.066516
Recall,0.055294,0.037647,0.045882,0.037647,0.042353,0.018824,0.044706,0.049412,0.065882
F1,0.040509,0.017923,0.023868,0.022919,0.022782,0.010585,0.026422,0.039306,0.066049


In [23]:
#PRUEBA -> 110m Vayas
ddbbData = ddbbDataNorm[['110m H','110m H Points']]

ho = mainHoldOut(ddbbData)
cv = mainCrossVal(ddbbData)

tabla=pd.DataFrame(ho)
tabla1=pd.DataFrame(cv)

print()
print()
print('METRICAS HOLD OUT-> 110m Vayas')
tabla



METRICAS HOLD OUT-> 110m Vayas


Unnamed: 0,KNN,REGLO,GNB,PERCEPTRON,MLP,SVM,DESGRA,ARBOL,BOSQUE
Accuracy,0.035294,0.054902,0.054902,0.035294,0.054902,0.031373,0.043137,0.035294,0.035294
Precission,0.01748,0.007945,0.012351,0.001412,0.024949,0.000984,0.011026,0.020835,0.036253
Recall,0.035294,0.054902,0.054902,0.035294,0.054902,0.031373,0.043137,0.035294,0.035294
F1,0.022686,0.013342,0.017832,0.002715,0.020877,0.001909,0.008636,0.02449,0.034106


In [24]:
print()
print()
print('METRICAS CROSS VALIDATION-> 110m Vayas')
tablaExcelMetricas = pd.concat([tablaExcelMetricas, tabla1])
tabla1



METRICAS CROSS VALIDATION-> 110m Vayas


Unnamed: 0,KNN,REGLO,GNB,PERCEPTRON,MLP,SVM,DESGRA,ARBOL,BOSQUE
Accuracy,0.051765,0.036471,0.042353,0.041176,0.038824,0.018824,0.051765,0.036471,0.049412
Precission,0.028849,0.00928,0.007119,0.037271,0.016412,0.007039,0.032936,0.025505,0.047583
Recall,0.051765,0.036471,0.042353,0.041176,0.038824,0.018824,0.051765,0.036471,0.049412
F1,0.035868,0.012942,0.011856,0.028841,0.019897,0.010007,0.033987,0.028514,0.048212


In [25]:
#PRUEBA -> Lanzamiento de Disco
ddbbData = ddbbDataNorm[['Dt','Dt Points']]

ho = mainHoldOut(ddbbData)
cv = mainCrossVal(ddbbData)

tabla=pd.DataFrame(ho)
tabla1=pd.DataFrame(cv)

print()
print()
print('METRICAS HOLD OUT-> Lanzamiento de Disco')
tabla



METRICAS HOLD OUT-> Lanzamiento de Disco


Unnamed: 0,KNN,REGLO,GNB,PERCEPTRON,MLP,SVM,DESGRA,ARBOL,BOSQUE
Accuracy,0.043137,0.039216,0.043137,0.054902,0.058824,0.031373,0.015686,0.039216,0.05098
Precission,0.026397,0.009685,0.018526,0.003087,0.017104,0.000984,0.001673,0.010313,0.052909
Recall,0.043137,0.039216,0.043137,0.054902,0.058824,0.031373,0.015686,0.039216,0.05098
F1,0.029503,0.013479,0.01465,0.005845,0.021095,0.001909,0.003024,0.013956,0.050974


In [26]:
print()
print()
print('METRICAS CROSS VALIDATION-> Lanzamiento de Disco')
tablaExcelMetricas = pd.concat([tablaExcelMetricas, tabla1])
tabla1



METRICAS CROSS VALIDATION-> Lanzamiento de Disco


Unnamed: 0,KNN,REGLO,GNB,PERCEPTRON,MLP,SVM,DESGRA,ARBOL,BOSQUE
Accuracy,0.050588,0.028235,0.043529,0.035294,0.058824,0.016471,0.036471,0.057647,0.035294
Precission,0.039753,0.009915,0.016429,0.025392,0.023763,0.006181,0.024279,0.031307,0.035392
Recall,0.050588,0.028235,0.043529,0.035294,0.058824,0.016471,0.036471,0.057647,0.035294
F1,0.036868,0.01369,0.016913,0.027699,0.026191,0.008523,0.021449,0.037148,0.035125


In [27]:
#PRUEBA -> Salto de Pertiga
ddbbData = ddbbDataNorm[['Pv','Pv Points']]

ho = mainHoldOut(ddbbData)
cv = mainCrossVal(ddbbData)

tabla=pd.DataFrame(ho)
tabla1=pd.DataFrame(cv)

print()
print()
print('METRICAS HOLD OUT-> Salto de Pertiga')
tabla



METRICAS HOLD OUT-> Salto de Pertiga


Unnamed: 0,KNN,REGLO,GNB,PERCEPTRON,MLP,SVM,DESGRA,ARBOL,BOSQUE
Accuracy,0.031373,0.023529,0.035294,0.039216,0.023529,0.019608,0.031373,0.023529,0.015686
Precission,0.009553,0.004508,0.013468,0.001556,0.015084,0.001322,0.012741,0.008489,0.007059
Recall,0.031373,0.023529,0.035294,0.039216,0.023529,0.019608,0.031373,0.023529,0.015686
F1,0.013063,0.00642,0.017669,0.002994,0.01123,0.002389,0.007767,0.011623,0.009531


In [28]:
print()
print()
print('METRICAS CROSS VALIDATION-> Salto de Pertiga')
tablaExcelMetricas = pd.concat([tablaExcelMetricas, tabla1])
tabla1



METRICAS CROSS VALIDATION-> Salto de Pertiga


Unnamed: 0,KNN,REGLO,GNB,PERCEPTRON,MLP,SVM,DESGRA,ARBOL,BOSQUE
Accuracy,0.045882,0.037647,0.048235,0.042353,0.029412,0.016471,0.024706,0.057647,0.051765
Precission,0.026472,0.020605,0.012642,0.020511,0.028731,0.008131,0.010104,0.0307,0.026203
Recall,0.045882,0.037647,0.048235,0.042353,0.029412,0.016471,0.024706,0.057647,0.051765
F1,0.030488,0.017946,0.018041,0.024159,0.019777,0.00987,0.013915,0.037018,0.033898


In [29]:
#PRUEBA -> Lanzamiento de Javalina
ddbbData = ddbbDataNorm[['Jt','Jt Points']]

ho = mainHoldOut(ddbbData)
cv = mainCrossVal(ddbbData)

tabla=pd.DataFrame(ho)
tabla1=pd.DataFrame(cv)

print()
print()
print('METRICAS HOLD OUT-> Lanzamiento de Javalina')
tabla



METRICAS HOLD OUT-> Lanzamiento de Javalina


Unnamed: 0,KNN,REGLO,GNB,PERCEPTRON,MLP,SVM,DESGRA,ARBOL,BOSQUE
Accuracy,0.043137,0.054902,0.043137,0.039216,0.031373,0.031373,0.043137,0.070588,0.047059
Precission,0.061088,0.00846,0.009328,0.001538,0.015569,0.000984,0.009779,0.050418,0.054015
Recall,0.043137,0.054902,0.043137,0.039216,0.031373,0.031373,0.043137,0.070588,0.047059
F1,0.038005,0.014149,0.014346,0.00296,0.018796,0.001909,0.012127,0.057566,0.048774


In [30]:
print()
print()
print('METRICAS CROSS VALIDATION-> Lanzamiento de Javalina')
tablaExcelMetricas = pd.concat([tablaExcelMetricas, tabla1])
tabla1



METRICAS CROSS VALIDATION-> Lanzamiento de Javalina


Unnamed: 0,KNN,REGLO,GNB,PERCEPTRON,MLP,SVM,DESGRA,ARBOL,BOSQUE
Accuracy,0.042353,0.036471,0.045882,0.041176,0.054118,0.016471,0.048235,0.041176,0.054118
Precission,0.03658,0.012566,0.015672,0.024102,0.024247,0.008197,0.032532,0.03195,0.052957
Recall,0.042353,0.036471,0.045882,0.041176,0.054118,0.016471,0.048235,0.041176,0.054118
F1,0.034998,0.017388,0.019214,0.028271,0.028074,0.009914,0.032286,0.035218,0.053324


In [31]:
#PRUEBA -> 1500m lisos
ddbbData = ddbbDataNorm[['1500m','1500m Points']]

ho = mainHoldOut(ddbbData)
cv = mainCrossVal(ddbbData)

tabla=pd.DataFrame(ho)
tabla1=pd.DataFrame(cv)

print()
print()
print('METRICAS HOLD OUT-> 1500m Lisos')
tabla



METRICAS HOLD OUT-> 1500m Lisos


Unnamed: 0,KNN,REGLO,GNB,PERCEPTRON,MLP,SVM,DESGRA,ARBOL,BOSQUE
Accuracy,0.066667,0.039216,0.047059,0.043137,0.05098,0.031373,0.043137,0.07451,0.031373
Precission,0.030879,0.012938,0.010306,0.010707,0.007842,0.000984,0.005516,0.033752,0.04252
Recall,0.066667,0.039216,0.047059,0.043137,0.05098,0.031373,0.043137,0.07451,0.031373
F1,0.041072,0.010795,0.01489,0.008484,0.012947,0.001909,0.006985,0.044789,0.035625


In [32]:
print()
print()
print('METRICAS CROSS VALIDATION-> 1500m Lisos')
tablaExcelMetricas = pd.concat([tablaExcelMetricas, tabla1])
tabla1



METRICAS CROSS VALIDATION-> 1500m Lisos


Unnamed: 0,KNN,REGLO,GNB,PERCEPTRON,MLP,SVM,DESGRA,ARBOL,BOSQUE
Accuracy,0.06,0.028235,0.044706,0.054118,0.041176,0.022353,0.051765,0.044706,0.043529
Precission,0.035416,0.011369,0.012981,0.022984,0.016444,0.008464,0.034533,0.022735,0.045343
Recall,0.06,0.028235,0.044706,0.054118,0.041176,0.022353,0.051765,0.044706,0.043529
F1,0.041522,0.015069,0.017529,0.027806,0.019463,0.011246,0.037682,0.02802,0.044241


In [33]:
#PRUEBA -> Todas las pruebas
ddbbData = ddbbDataNorm[['100m','100m Points','Lj','Lj Points','Sp','Sp Points','Hj','Hj Points','400m Points','400m',
                         '110m H','110m H Points','Dt','Dt Points','Pv','Pv Points','Jt','Jt Points','1500m Points',
                         '1500m']]

ho = mainHoldOut(ddbbData)
cv = mainCrossVal(ddbbData)

tabla=pd.DataFrame(ho)
tabla1=pd.DataFrame(cv)

print()
print()
print('METRICAS HOLD OUT-> Todas las pruebas')



METRICAS HOLD OUT-> Todas las pruebas


In [34]:
tablaExcelMetricas = pd.concat([tablaExcelMetricas, tabla1])
tabla1

Unnamed: 0,KNN,REGLO,GNB,PERCEPTRON,MLP,SVM,DESGRA,ARBOL,BOSQUE
Accuracy,0.08,0.041176,0.047059,0.041176,0.023529,0.037647,0.038824,0.050588,0.098824
Precission,0.05798,0.017929,0.057976,0.056412,0.047328,0.016276,0.022926,0.051375,0.092749
Recall,0.08,0.041176,0.047059,0.041176,0.023529,0.037647,0.038824,0.050588,0.098824
F1,0.062588,0.02186,0.031178,0.031712,0.017211,0.02113,0.02653,0.04968,0.095029


Genera Excel con las metricas obtenidas

In [35]:
path = "../DecatlonEstadistics/resources/metricasAlgoritmos.xlsx"
norm_path = os.path.normpath(path) 

#tablaExcelMetricas.to_excel (norm_path, index = False, header=True)

### 7.Predicciones Juegos Olimpicos 2021

In [36]:
ddbbData = ddbbDataNorm[['100m Points','Sp Points','Hj Points','Dt Points','Jt Points','1500m Points']]

#Creamos el dataset para entrenar los algoritmos
X_train = ddbbData
y_train = ddbb['Position']

path = "../DecatlonEstadistics/resources/testResults.csv"
norm_path = os.path.normpath(path) 
predictionData = pd.read_csv(norm_path)
del predictionData['Unnamed: 11']

predictionData.columns = ['Athlete','100m','Lj','Sp','Hj','400m','110m H','Dt','Pv','Jt','1500m']

predDataset = predictionData[['100m','Sp','Hj','Dt','Jt','1500m']]

for i in range(len(predDataset['100m'])): predDataset['100m'][i] = 25.4347*math.pow((18-predDataset['100m'][i]),1.81)
for i in range(len(predDataset['1500m'])): predDataset['1500m'][i] = 0.037*math.pow((480-predDataset['1500m'][i]),1.85)
for i in range(len(predDataset['Hj'])): predDataset['Hj'][i] = 0.86*math.pow((predDataset['Hj'][i]*100-75),1.42)
for i in range(len(predDataset['Sp'])): predDataset['Sp'][i] = 51.39*math.pow((predDataset['Sp'][i]-1.5),1.05)
for i in range(len(predDataset['Dt'])): predDataset['Dt'][i] = 12.91*math.pow((predDataset['Dt'][i]-4),1.10)
for i in range(len(predDataset['Jt'])): predDataset['Jt'][i] = 10.14*math.pow((predDataset['Jt'][i]-7),1.08)

#Normaliza los datos
normalizer = preprocessing.MinMaxScaler()
predDatasetNormalized = normalizer.fit_transform(predDataset)

#Introduce los datos normalizados en un nuevo en un dataframe
predDatasetNorm = pd.DataFrame(predDatasetNormalized)
predDatasetNorm.columns  = ['100m Points','Sp Points','Hj Points','Dt Points','Jt Points','1500m Points']



X_test = predDatasetNorm

#### Predicciones Vecinos Cercanos KNN

In [37]:
#Predicciones vecinos cercanos
#del predictionData['Postion Predicted']
knn = KNeighborsClassifier(n_neighbors=10)

#Entrena el modelo
knn.fit(X_train, y_train)

#Predice para los datos de test
y_pred = knn.predict(X_test)

posPredict = [int(x) for x in y_pred]

predictionData.insert(11, "Postion Predicted", posPredict, True)
predictionData

Unnamed: 0,Athlete,100m,Lj,Sp,Hj,400m,110m H,Dt,Pv,Jt,1500m,Postion Predicted
0,Kevin Mayer,10.55,7.8,16.0,2.05,48.42,13.75,50.54,5.45,71.9,276.11,1
1,Damian Warner,10.31,7.81,14.83,2.03,47.72,13.56,47.32,4.8,61.94,266.59,5
2,Maicel Uibo,11.04,7.56,14.78,2.12,50.32,14.66,46.58,5.3,61.75,267.54,3
3,Arthur Abele,10.85,7.28,15.93,1.89,48.4,14.01,44.77,4.9,67.61,262.22,7
4,Pieter Braun,11.12,7.62,15.28,2.0,49.25,14.4,45.52,4.9,58.77,264.29,8
5,Timothy Duckworth,10.57,8.01,13.15,2.13,48.78,14.37,42.76,5.11,57.27,301.27,20
6,Kai Kazmirek,10.99,7.56,14.03,2.06,47.27,14.42,43.76,4.7,61.53,270.75,1
7,Kevin Mayer,10.68,7.4,16.2,1.97,48.87,13.54,50.32,4.65,67.66,287.74,18
8,Ashley Moloney,10.36,7.67,13.62,2.11,45.82,14.17,43.93,4.8,57.77,288.48,18
9,Cedric Dubler,10.79,7.62,13.24,2.11,47.84,14.34,41.7,5.0,62.48,281.05,18


#### Predicciones Random Forest

In [38]:
del predictionData['Postion Predicted']

rForest=RandomForestClassifier(n_estimators=10)

rForest.fit(X_train,y_train)

y_pred=rForest.predict(X_test)


posPredict = [int(x) for x in y_pred]

predictionData.insert(11, "Postion Predicted", posPredict, True)
predictionData

Unnamed: 0,Athlete,100m,Lj,Sp,Hj,400m,110m H,Dt,Pv,Jt,1500m,Postion Predicted
0,Kevin Mayer,10.55,7.8,16.0,2.05,48.42,13.75,50.54,5.45,71.9,276.11,1
1,Damian Warner,10.31,7.81,14.83,2.03,47.72,13.56,47.32,4.8,61.94,266.59,3
2,Maicel Uibo,11.04,7.56,14.78,2.12,50.32,14.66,46.58,5.3,61.75,267.54,1
3,Arthur Abele,10.85,7.28,15.93,1.89,48.4,14.01,44.77,4.9,67.61,262.22,5
4,Pieter Braun,11.12,7.62,15.28,2.0,49.25,14.4,45.52,4.9,58.77,264.29,8
5,Timothy Duckworth,10.57,8.01,13.15,2.13,48.78,14.37,42.76,5.11,57.27,301.27,18
6,Kai Kazmirek,10.99,7.56,14.03,2.06,47.27,14.42,43.76,4.7,61.53,270.75,22
7,Kevin Mayer,10.68,7.4,16.2,1.97,48.87,13.54,50.32,4.65,67.66,287.74,8
8,Ashley Moloney,10.36,7.67,13.62,2.11,45.82,14.17,43.93,4.8,57.77,288.48,18
9,Cedric Dubler,10.79,7.62,13.24,2.11,47.84,14.34,41.7,5.0,62.48,281.05,18


#### Predicciones MLP

In [39]:
del predictionData['Postion Predicted']

mlp = MLPClassifier(solver='lbfgs', alpha=1e-5,hidden_layer_sizes=(5, 4), random_state=1)

mlp.fit(X_train, y_train)

y_pred = mlp.predict(X_test)

posPredict = [int(x) for x in y_pred]

predictionData.insert(11, "Postion Predicted", posPredict, True)
predictionData

Unnamed: 0,Athlete,100m,Lj,Sp,Hj,400m,110m H,Dt,Pv,Jt,1500m,Postion Predicted
0,Kevin Mayer,10.55,7.8,16.0,2.05,48.42,13.75,50.54,5.45,71.9,276.11,1
1,Damian Warner,10.31,7.81,14.83,2.03,47.72,13.56,47.32,4.8,61.94,266.59,6
2,Maicel Uibo,11.04,7.56,14.78,2.12,50.32,14.66,46.58,5.3,61.75,267.54,24
3,Arthur Abele,10.85,7.28,15.93,1.89,48.4,14.01,44.77,4.9,67.61,262.22,11
4,Pieter Braun,11.12,7.62,15.28,2.0,49.25,14.4,45.52,4.9,58.77,264.29,15
5,Timothy Duckworth,10.57,8.01,13.15,2.13,48.78,14.37,42.76,5.11,57.27,301.27,29
6,Kai Kazmirek,10.99,7.56,14.03,2.06,47.27,14.42,43.76,4.7,61.53,270.75,24
7,Kevin Mayer,10.68,7.4,16.2,1.97,48.87,13.54,50.32,4.65,67.66,287.74,11
8,Ashley Moloney,10.36,7.67,13.62,2.11,45.82,14.17,43.93,4.8,57.77,288.48,29
9,Cedric Dubler,10.79,7.62,13.24,2.11,47.84,14.34,41.7,5.0,62.48,281.05,29


#### Predicciones Naive Bayes

In [40]:
del predictionData['Postion Predicted']

clf = svm.SVC(kernel='linear',gamma=2)

clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)

posPredict = [int(x) for x in y_pred]

predictionData.insert(11, "Postion Predicted", posPredict, True)
predictionData

Unnamed: 0,Athlete,100m,Lj,Sp,Hj,400m,110m H,Dt,Pv,Jt,1500m,Postion Predicted
0,Kevin Mayer,10.55,7.8,16.0,2.05,48.42,13.75,50.54,5.45,71.9,276.11,1
1,Damian Warner,10.31,7.81,14.83,2.03,47.72,13.56,47.32,4.8,61.94,266.59,2
2,Maicel Uibo,11.04,7.56,14.78,2.12,50.32,14.66,46.58,5.3,61.75,267.54,4
3,Arthur Abele,10.85,7.28,15.93,1.89,48.4,14.01,44.77,4.9,67.61,262.22,11
4,Pieter Braun,11.12,7.62,15.28,2.0,49.25,14.4,45.52,4.9,58.77,264.29,11
5,Timothy Duckworth,10.57,8.01,13.15,2.13,48.78,14.37,42.76,5.11,57.27,301.27,16
6,Kai Kazmirek,10.99,7.56,14.03,2.06,47.27,14.42,43.76,4.7,61.53,270.75,11
7,Kevin Mayer,10.68,7.4,16.2,1.97,48.87,13.54,50.32,4.65,67.66,287.74,3
8,Ashley Moloney,10.36,7.67,13.62,2.11,45.82,14.17,43.93,4.8,57.77,288.48,16
9,Cedric Dubler,10.79,7.62,13.24,2.11,47.84,14.34,41.7,5.0,62.48,281.05,16
