In [1]:
import os.path
import pandas as pd

import numpy as np
from numpy import * 

import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

from sklearn import preprocessing
from sklearn import metrics
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_predict
from sklearn.model_selection import train_test_split
from sklearn.metrics import plot_confusion_matrix

from sklearn.neighbors import KNeighborsClassifier
from sklearn import linear_model
from sklearn.naive_bayes import GaussianNB
from sklearn import svm
from sklearn.tree import DecisionTreeClassifier
from sklearn.cluster import KMeans
from sklearn.linear_model import Perceptron
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import SGDClassifier
from sklearn.ensemble import RandomForestClassifier

import warnings
warnings.filterwarnings('ignore')

### 1.Preparacion de los datos

#### Lee el dataset que generamos anteriormente 

In [2]:
#Lee el excel generado en el otro documento .ipynb
path = "../DecatlonEstadistics/resources/data.xlsx"
norm_path = os.path.normpath(path) 
ddbb = pd.read_excel(norm_path)

del ddbb['1500m NF']

ddbb['Country']=ddbb['Country'].str.lower()

ddbb.columns = ['Position', 'Athlete', 'Age', 'Country', 'Total Points', 'Year', 'Competition', '100m', 
                 '100m Points', 'Lj', 'Lj Points', 'Sp', 'Sp Points', 'Hj', 'Hj Points', '400m', '400m Points', 
                 '110m H', '110m H Points', 'Dt', 'Dt Points', 'Pv', 'Pv Points', 'Jt', 'Jt Points', 
                 '1500m Points', '1500m']

ddbbData = ddbb[['100m','100m Points','Lj','Lj Points','Sp','Sp Points','Hj','Hj Points','400m Points','400m',
                 '110m H','110m H Points','Dt','Dt Points','Pv','Pv Points','Jt','Jt Points','1500m Points','1500m']]


#### Normaliza los datos del dataset entre [0,1]

In [3]:
#Normaliza los datos
normalizer = preprocessing.MinMaxScaler()
ddbbNormalized = normalizer.fit_transform(ddbbData)

#Introduce los datos normalizados en un nuevo en un dataframe
ddbbDataNorm = pd.DataFrame(ddbbNormalized)
ddbbDataNorm.columns  = ['100m','100m Points','Lj','Lj Points','Sp','Sp Points','Hj','Hj Points','400m Points','400m','110m H',
                '110m H Points','Dt','Dt Points','Pv','Pv Points','Jt','Jt Points','1500m Points','1500m']

#Da valore positivos a tiempos bajos en las pruebas de pista
for i in range(len(ddbbDataNorm['100m'])): ddbbDataNorm['100m'][i] = 1 - ddbbDataNorm['100m'][i]
for i in range(len(ddbbDataNorm['400m'])): ddbbDataNorm['400m'][i] = 1 - ddbbDataNorm['400m'][i]
for i in range(len(ddbbDataNorm['1500m'])): ddbbDataNorm['1500m'][i] = 1 - ddbbDataNorm['1500m'][i]
for i in range(len(ddbbDataNorm['110m H'])): ddbbDataNorm['110m H'][i] = 1 - ddbbDataNorm['110m H'][i]

#### Algoritmos y metricas que se van a estudiar

In [4]:
metricas = {
    'Accuracy': metrics.accuracy_score,
    'MAE':  metrics.mean_absolute_error,
    'RMSE': metrics.mean_squared_error,
    #Actualmente esta funcion solo esta incluida en versiones no estables de la libreria ScikitLearn, por eso se crea esta funcion
    'MAPE': lambda y, y_pred:
          np.mean(np.abs((y - y_pred) / y)) * 100,
}

In [5]:
#Diccionario con los algoritmos que se van a estudiar
algoritmos = {
    'KNN': KNeighborsClassifier(n_neighbors=5),
    #'REGLI': linear_model.LinearRegression(),
    'REGLO': linear_model.LogisticRegression(random_state=42),
    'GNB': GaussianNB(),
    'PERCEPTRON': Perceptron(tol=1e-5, random_state=1),
    'MLP': MLPClassifier(solver='lbfgs', alpha=1e-5,hidden_layer_sizes=(5, 2), random_state=1),
    'SVM': svm.SVC(kernel='linear'),
    'DESGRA': SGDClassifier(loss="hinge", penalty="l2"),
    'ARBOL': DecisionTreeClassifier(criterion="entropy", max_depth=5),
    'BOSQUE': RandomForestClassifier(n_estimators=100)
}

In [6]:
algoritmosClust = {
    'KMEANS': KMeans(n_clusters=1 , random_state= 5),
}

In [7]:
algoritmosIA = {
    
}

### 4.Hold Out

#### Metricas para los diferentes algoritmos -> Separacion de datos HoldOut

Funcion que genera las etiquetas predecidas para los diferentes algoritmos

In [8]:
#Funcion que genera predicciones para casos de test en función de la separación hecha por holdOut
def generaModelosHO(nombre,X_train, X_test, y_train, y_test):
    
    #Estudia el algoritmo pasado
    algoritmo = algoritmos[nombre]

    #Entrena el modelo
    algoritmo.fit(X_train, y_train)

    #Predice para los datos de test
    y_pred = algoritmo.predict(X_test)
    
    #Devuelve el las predicciones para los casos de test
    return(y_pred)

Funcion que devuelve para un algoritmo (Se le pasa las etiquetas reales y las generadas por el algoritmo) dado todas sus metricas.

Se usa tanto para Hold Out como Cross Validation

In [9]:
#Funcion para la evaluacion de las diferentes metricas
def evaluaMetricas(y_test, y_pred):
    resultado = {}
    for nombre, funcion in metricas.items():
        resultado[nombre] = funcion(y_test, y_pred)
    return resultado

Funcion que devuelve el resultado de las metricas para los algoritmos definidos en la parte superior

In [10]:
def mainHoldOut(ddbbData):
    y_pred = {}
    X_train, X_test, y_train, y_test = train_test_split(ddbbData, ddbb['Position'], test_size=0.3, random_state=42)

    for nombre, funcion in algoritmos.items():
        y_pred[nombre] = generaModelosHO(nombre,X_train, X_test, y_train, y_test)
        
    resultado = {}
    for nombre, funcionA in algoritmos.items():
        resultado[nombre] = evaluaMetricas(y_test, y_pred[nombre])
    
    #Tabla de metricas para los algoritmos estudiados
    return resultado

### 5.Cross Validation

#### Metricas para los diferentes algoritmos -> Separacion de datos CrossValidation

In [11]:
def generaModelosCV(funcion, ddbbData):
    y_pred = cross_val_predict(funcion, ddbbData, ddbb['Position'], cv=KFold(n_splits=10,random_state=42,shuffle=True))
    
    return y_pred

In [12]:
def mainCrossVal(ddbbData):
    y_pred = {}

    for nombre, funcion in algoritmos.items():
        y_pred[nombre] = generaModelosCV(funcion, ddbbData)
        
    resultado = {}
    for nombre, funcionA in algoritmos.items():
        resultado[nombre] = evaluaMetricas(ddbb['Position'], y_pred[nombre])
    
    #Tabla de metricas para los algoritmos estudiados
    return resultado

### 6.Comparativas de las metricas

#### Resultados de las metricas para HoldOut y CrossValidation de cada prueba

In [13]:
#PRUEBA -> 100m Lisos
ddbbData = ddbbDataNorm[['100m','100m Points']]

ho = mainHoldOut(ddbbData)
cv = mainCrossVal(ddbbData)

tabla=pd.DataFrame(ho)
tabla1=pd.DataFrame(cv)

print()
print()
print('METRICAS HOLD OUT-> 100m lisos')
tabla



METRICAS HOLD OUT-> 100m lisos


Unnamed: 0,KNN,REGLO,GNB,PERCEPTRON,MLP,SVM,DESGRA,ARBOL,BOSQUE
Accuracy,0.062745,0.047059,0.066667,0.039216,0.07451,0.047059,0.054902,0.054902,0.05098
MAE,7.803922,7.34902,7.45098,7.741176,6.290196,6.886275,8.301961,6.564706,6.960784
RMSE,101.709804,84.556863,88.196078,85.811765,63.239216,76.156863,112.639216,70.847059,84.207843
MAPE,78.086912,74.873128,82.780491,203.016004,68.423391,71.902284,70.509682,77.142232,103.848651


In [14]:
print()
print()
print('METRICAS CROSS VALIDATION-> 100m lisos')
tablaExcelMetricas = tabla1.copy()
tabla1



METRICAS CROSS VALIDATION-> 100m lisos


Unnamed: 0,KNN,REGLO,GNB,PERCEPTRON,MLP,SVM,DESGRA,ARBOL,BOSQUE
Accuracy,0.056471,0.051765,0.044706,0.048235,0.062353,0.036471,0.04,0.055294,0.049412
MAE,7.449412,7.621176,8.328235,8.796471,6.152941,7.985882,6.872941,6.704706,6.682353
RMSE,92.512941,93.124706,110.864706,123.812941,61.051765,102.329412,73.152941,72.768235,74.105882
MAPE,81.876295,74.476023,91.62134,150.307669,73.399464,70.988431,109.528981,87.092352,103.125207


In [15]:
#PRUEBA -> Salto de longitud
ddbbData = ddbbDataNorm[['Lj','Lj Points']]

ho = mainHoldOut(ddbbData)
cv = mainCrossVal(ddbbData)

tabla=pd.DataFrame(ho)
tabla1=pd.DataFrame(cv)

print()
print()
print('METRICAS HOLD OUT-> Salto de longitud')
tabla



METRICAS HOLD OUT-> Salto de longitud


Unnamed: 0,KNN,REGLO,GNB,PERCEPTRON,MLP,SVM,DESGRA,ARBOL,BOSQUE
Accuracy,0.039216,0.027451,0.062745,0.039216,0.047059,0.031373,0.047059,0.05098,0.035294
MAE,7.329412,6.745098,7.74902,8.243137,6.223529,7.043137,9.105882,6.67451,6.94902
RMSE,94.654902,69.639216,100.988235,104.698039,60.443137,77.192157,131.796078,73.670588,79.921569
MAPE,67.466048,95.264441,96.821514,226.763157,77.942505,93.370147,91.563458,100.364,93.562775


In [16]:
print()
print()
print('METRICAS CROSS VALIDATION-> Salto de longitud')
tablaExcelMetricas = pd.concat([tablaExcelMetricas, tabla1])
tabla1



METRICAS CROSS VALIDATION-> Salto de longitud


Unnamed: 0,KNN,REGLO,GNB,PERCEPTRON,MLP,SVM,DESGRA,ARBOL,BOSQUE
Accuracy,0.042353,0.021176,0.058824,0.029412,0.048235,0.02,0.036471,0.049412,0.054118
MAE,7.612941,7.808235,7.274118,9.664706,6.176471,7.672941,8.428235,6.355294,7.197647
RMSE,94.9,100.452941,88.824706,137.707059,58.588235,94.877647,111.896471,65.197647,85.501176
MAPE,75.874096,86.008651,101.164064,214.767774,94.218413,85.389947,175.545985,83.452309,109.742524


In [17]:
#PRUEBA -> Lanzamiento de peso
ddbbData = ddbbDataNorm[['Sp','Sp Points']]

ho = mainHoldOut(ddbbData)
cv = mainCrossVal(ddbbData)

tabla=pd.DataFrame(ho)
tabla1=pd.DataFrame(cv)

print()
print()
print('METRICAS HOLD OUT-> Lanzamiento de peso')
tabla



METRICAS HOLD OUT-> Lanzamiento de peso


Unnamed: 0,KNN,REGLO,GNB,PERCEPTRON,MLP,SVM,DESGRA,ARBOL,BOSQUE
Accuracy,0.027451,0.023529,0.066667,0.054902,0.054902,0.031373,0.054902,0.047059,0.035294
MAE,8.376471,6.541176,8.447059,7.435294,6.388235,7.043137,8.729412,6.478431,6.937255
RMSE,108.243137,66.298039,119.152941,79.333333,65.133333,77.192157,120.337255,67.278431,80.207843
MAPE,81.22571,83.096114,101.298076,193.675202,70.512584,93.370147,88.036271,84.315133,111.85927


In [18]:
print()
print()
print('METRICAS CROSS VALIDATION-> Lanzamiento de peso')
tablaExcelMetricas = pd.concat([tablaExcelMetricas, tabla1])
tabla1



METRICAS CROSS VALIDATION-> Lanzamiento de peso


Unnamed: 0,KNN,REGLO,GNB,PERCEPTRON,MLP,SVM,DESGRA,ARBOL,BOSQUE
Accuracy,0.032941,0.042353,0.055294,0.042353,0.052941,0.018824,0.028235,0.048235,0.029412
MAE,8.209412,7.507059,7.911765,8.092941,6.774118,7.957647,9.1,7.003529,7.6
RMSE,105.287059,91.172941,103.843529,98.278824,72.515294,101.16,137.916471,78.311765,91.287059
MAPE,89.401403,69.489312,100.060001,166.723665,89.580605,80.420306,185.414199,95.42574,134.07307


In [19]:
#PRUEBA -> Salto de altura
ddbbData = ddbbDataNorm[['Hj','Hj Points']]

ho = mainHoldOut(ddbbData)
cv = mainCrossVal(ddbbData)

tabla=pd.DataFrame(ho)
tabla1=pd.DataFrame(cv)

print()
print()
print('METRICAS HOLD OUT-> Salto de altura')
tabla



METRICAS HOLD OUT-> Salto de altura


Unnamed: 0,KNN,REGLO,GNB,PERCEPTRON,MLP,SVM,DESGRA,ARBOL,BOSQUE
Accuracy,0.043137,0.035294,0.058824,0.035294,0.047059,0.031373,0.062745,0.05098,0.031373
MAE,7.980392,6.705882,8.952941,7.945098,6.74902,6.964706,6.811765,6.486275,6.878431
RMSE,100.929412,69.364706,133.368627,89.952941,71.768627,75.780392,71.956863,66.933333,73.32549
MAPE,84.434032,91.598992,119.788708,208.038339,83.616614,92.906259,138.14251,88.866157,122.847619


In [20]:
print()
print()
print('METRICAS CROSS VALIDATION-> Salto de altura')
tablaExcelMetricas = pd.concat([tablaExcelMetricas, tabla1])
tabla1



METRICAS CROSS VALIDATION-> Salto de altura


Unnamed: 0,KNN,REGLO,GNB,PERCEPTRON,MLP,SVM,DESGRA,ARBOL,BOSQUE
Accuracy,0.050588,0.023529,0.043529,0.032941,0.030588,0.02,0.049412,0.050588,0.032941
MAE,8.242353,7.528235,9.116471,9.269412,6.574118,7.827059,8.317647,7.327059,7.108235
RMSE,110.771765,91.323529,130.194118,135.681176,71.08,99.977647,112.127059,88.837647,81.872941
MAPE,88.207028,82.021577,121.436286,193.645145,92.656996,82.797188,136.423477,98.988892,110.266995


In [21]:
#PRUEBA -> 400m lisos
ddbbData = ddbbDataNorm[['400m','400m Points']]

ho = mainHoldOut(ddbbData)
cv = mainCrossVal(ddbbData)

tabla=pd.DataFrame(ho)
tabla1=pd.DataFrame(cv)

print()
print()
print('METRICAS HOLD OUT-> 400m lisos')
tabla



METRICAS HOLD OUT-> 400m lisos


Unnamed: 0,KNN,REGLO,GNB,PERCEPTRON,MLP,SVM,DESGRA,ARBOL,BOSQUE
Accuracy,0.062745,0.039216,0.054902,0.035294,0.05098,0.031373,0.05098,0.054902,0.05098
MAE,8.415686,6.992157,8.847059,7.972549,6.490196,7.043137,9.035294,6.941176,7.788235
RMSE,117.513725,78.372549,125.07451,90.662745,68.176471,77.192157,129.662745,76.886275,94.611765
MAPE,91.415585,78.921483,111.227651,208.584138,85.255615,93.370147,74.704115,99.521917,117.723124


In [22]:
print()
print()
print('METRICAS CROSS VALIDATION-> 400m lisos')
tablaExcelMetricas = pd.concat([tablaExcelMetricas, tabla1])
tabla1



METRICAS CROSS VALIDATION-> 400m lisos


Unnamed: 0,KNN,REGLO,GNB,PERCEPTRON,MLP,SVM,DESGRA,ARBOL,BOSQUE
Accuracy,0.055294,0.037647,0.045882,0.037647,0.042353,0.018824,0.047059,0.048235,0.062353
MAE,7.978824,8.051765,8.827059,9.891765,6.68,8.150588,7.755294,7.217647,7.725882
RMSE,104.649412,103.562353,124.991765,154.451765,71.32,107.425882,96.362353,84.457647,90.62
MAPE,83.605944,77.129255,104.341579,202.037983,79.773266,80.802562,114.790689,99.083803,122.228001


In [23]:
#PRUEBA -> 110m Vayas
ddbbData = ddbbDataNorm[['110m H','110m H Points']]

ho = mainHoldOut(ddbbData)
cv = mainCrossVal(ddbbData)

tabla=pd.DataFrame(ho)
tabla1=pd.DataFrame(cv)

print()
print()
print('METRICAS HOLD OUT-> 110m Vayas')
tabla



METRICAS HOLD OUT-> 110m Vayas


Unnamed: 0,KNN,REGLO,GNB,PERCEPTRON,MLP,SVM,DESGRA,ARBOL,BOSQUE
Accuracy,0.035294,0.054902,0.054902,0.035294,0.054902,0.031373,0.047059,0.035294,0.039216
MAE,8.176471,7.360784,8.792157,7.886275,7.231373,7.043137,6.517647,6.694118,6.913725
RMSE,107.870588,85.964706,117.372549,89.117647,83.780392,77.192157,59.411765,74.419608,77.564706
MAPE,80.060324,79.30253,137.142894,207.57623,74.365401,93.370147,133.730378,90.974987,119.844028


In [24]:
print()
print()
print('METRICAS CROSS VALIDATION-> 110m Vayas')
tablaExcelMetricas = pd.concat([tablaExcelMetricas, tabla1])
tabla1



METRICAS CROSS VALIDATION-> 110m Vayas


Unnamed: 0,KNN,REGLO,GNB,PERCEPTRON,MLP,SVM,DESGRA,ARBOL,BOSQUE
Accuracy,0.051765,0.036471,0.042353,0.041176,0.038824,0.018824,0.051765,0.036471,0.048235
MAE,7.794118,8.228235,9.642353,8.677647,6.769412,8.285882,7.491765,7.632941,7.108235
RMSE,99.871765,107.112941,142.875294,117.117647,72.407059,110.050588,91.112941,94.912941,82.425882
MAPE,85.00728,80.316425,117.806204,156.169659,82.919383,82.990458,120.748525,94.474144,113.008228


In [25]:
#PRUEBA -> Lanzamiento de Disco
ddbbData = ddbbDataNorm[['Dt','Dt Points']]

ho = mainHoldOut(ddbbData)
cv = mainCrossVal(ddbbData)

tabla=pd.DataFrame(ho)
tabla1=pd.DataFrame(cv)

print()
print()
print('METRICAS HOLD OUT-> Lanzamiento de Disco')
tabla



METRICAS HOLD OUT-> Lanzamiento de Disco


Unnamed: 0,KNN,REGLO,GNB,PERCEPTRON,MLP,SVM,DESGRA,ARBOL,BOSQUE
Accuracy,0.043137,0.039216,0.043137,0.054902,0.058824,0.031373,0.062745,0.039216,0.05098
MAE,8.188235,6.607843,7.611765,7.447059,6.411765,7.043137,6.847059,6.560784,7.427451
RMSE,108.760784,68.694118,94.301961,79.713725,67.996078,77.192157,77.035294,69.478431,89.560784
MAPE,82.187038,82.915146,85.876683,194.565725,78.700147,93.370147,81.011023,85.353794,124.149902


In [26]:
print()
print()
print('METRICAS CROSS VALIDATION-> Lanzamiento de Disco')
tablaExcelMetricas = pd.concat([tablaExcelMetricas, tabla1])
tabla1



METRICAS CROSS VALIDATION-> Lanzamiento de Disco


Unnamed: 0,KNN,REGLO,GNB,PERCEPTRON,MLP,SVM,DESGRA,ARBOL,BOSQUE
Accuracy,0.050588,0.028235,0.043529,0.035294,0.058824,0.016471,0.052941,0.056471,0.032941
MAE,7.997647,7.641176,8.164706,9.128235,6.482353,8.017647,6.88,6.582353,7.521176
RMSE,102.950588,95.22,108.152941,133.363529,70.442353,102.789412,75.703529,71.977647,90.7
MAPE,81.659991,75.268792,106.006068,165.534704,84.562062,79.940703,109.829604,89.577944,125.862159


In [27]:
#PRUEBA -> Salto de Pertiga
ddbbData = ddbbDataNorm[['Pv','Pv Points']]

ho = mainHoldOut(ddbbData)
cv = mainCrossVal(ddbbData)

tabla=pd.DataFrame(ho)
tabla1=pd.DataFrame(cv)

print()
print()
print('METRICAS HOLD OUT-> Salto de Pertiga')
tabla



METRICAS HOLD OUT-> Salto de Pertiga


Unnamed: 0,KNN,REGLO,GNB,PERCEPTRON,MLP,SVM,DESGRA,ARBOL,BOSQUE
Accuracy,0.031373,0.023529,0.035294,0.039216,0.023529,0.019608,0.007843,0.023529,0.015686
MAE,7.588235,6.756863,8.678431,7.92549,6.666667,6.984314,11.305882,6.752941,7.180392
RMSE,90.537255,70.74902,119.156863,89.619608,69.168627,75.164706,170.898039,66.980392,76.74902
MAPE,88.649169,93.976024,116.416455,207.935275,94.458136,94.411106,284.219021,132.749659,134.142648


In [28]:
print()
print()
print('METRICAS CROSS VALIDATION-> Salto de Pertiga')
tablaExcelMetricas = pd.concat([tablaExcelMetricas, tabla1])
tabla1



METRICAS CROSS VALIDATION-> Salto de Pertiga


Unnamed: 0,KNN,REGLO,GNB,PERCEPTRON,MLP,SVM,DESGRA,ARBOL,BOSQUE
Accuracy,0.045882,0.037647,0.048235,0.042353,0.029412,0.016471,0.036471,0.057647,0.055294
MAE,8.156471,8.010588,9.202353,9.12,7.738824,8.232941,7.722353,7.418824,7.14
RMSE,105.535294,101.805882,130.698824,124.169412,95.301176,108.327059,92.192941,87.491765,80.815294
MAPE,88.578987,85.473378,109.239143,183.469189,91.913335,85.086263,121.669849,85.294791,109.317719


In [29]:
#PRUEBA -> Lanzamiento de Javalina
ddbbData = ddbbDataNorm[['Jt','Jt Points']]

ho = mainHoldOut(ddbbData)
cv = mainCrossVal(ddbbData)

tabla=pd.DataFrame(ho)
tabla1=pd.DataFrame(cv)

print()
print()
print('METRICAS HOLD OUT-> Lanzamiento de Javalina')
tabla



METRICAS HOLD OUT-> Lanzamiento de Javalina


Unnamed: 0,KNN,REGLO,GNB,PERCEPTRON,MLP,SVM,DESGRA,ARBOL,BOSQUE
Accuracy,0.043137,0.054902,0.043137,0.039216,0.031373,0.031373,0.031373,0.070588,0.054902
MAE,8.717647,7.168627,7.603922,7.890196,6.74902,7.043137,6.705882,6.737255,7.023529
RMSE,118.278431,82.266667,97.933333,89.34902,73.839216,77.192157,70.690196,73.686275,82.388235
MAPE,86.342421,67.550027,105.834641,207.761056,83.955489,93.370147,90.851852,82.572242,107.747196


In [30]:
print()
print()
print('METRICAS CROSS VALIDATION-> Lanzamiento de Javalina')
tablaExcelMetricas = pd.concat([tablaExcelMetricas, tabla1])
tabla1



METRICAS CROSS VALIDATION-> Lanzamiento de Javalina


Unnamed: 0,KNN,REGLO,GNB,PERCEPTRON,MLP,SVM,DESGRA,ARBOL,BOSQUE
Accuracy,0.042353,0.036471,0.045882,0.041176,0.054118,0.016471,0.036471,0.041176,0.048235
MAE,8.216471,7.607059,8.223529,8.436471,6.683529,7.801176,8.464706,6.969412,7.790588
RMSE,105.529412,92.014118,112.988235,107.092941,71.34,96.869412,114.989412,77.411765,97.265882
MAPE,96.9045,73.866443,124.86411,167.158409,88.331324,79.596108,170.953547,99.038165,131.306751


In [31]:
#PRUEBA -> 1500m lisos
ddbbData = ddbbDataNorm[['1500m','1500m Points']]

ho = mainHoldOut(ddbbData)
cv = mainCrossVal(ddbbData)

tabla=pd.DataFrame(ho)
tabla1=pd.DataFrame(cv)

print()
print()
print('METRICAS HOLD OUT-> 1500m Lisos')
tabla



METRICAS HOLD OUT-> 1500m Lisos


Unnamed: 0,KNN,REGLO,GNB,PERCEPTRON,MLP,SVM,DESGRA,ARBOL,BOSQUE
Accuracy,0.066667,0.039216,0.047059,0.043137,0.05098,0.031373,0.039216,0.07451,0.039216
MAE,8.572549,6.996078,6.901961,7.866667,7.066667,7.043137,6.670588,7.756863,8.235294
RMSE,115.921569,77.176471,77.764706,89.160784,83.921569,77.192157,67.541176,101.6,105.082353
MAPE,90.630318,91.013762,89.641904,207.657141,89.033325,93.370147,101.251721,90.823671,150.98575


In [32]:
print()
print()
print('METRICAS CROSS VALIDATION-> 1500m Lisos')
tablaExcelMetricas = pd.concat([tablaExcelMetricas, tabla1])
tabla1



METRICAS CROSS VALIDATION-> 1500m Lisos


Unnamed: 0,KNN,REGLO,GNB,PERCEPTRON,MLP,SVM,DESGRA,ARBOL,BOSQUE
Accuracy,0.06,0.028235,0.044706,0.054118,0.041176,0.022353,0.034118,0.045882,0.043529
MAE,8.308235,7.747059,7.703529,7.857647,7.541176,8.016471,7.951765,8.370588,7.972941
RMSE,112.131765,97.629412,97.588235,92.7,91.28,104.477647,97.405882,108.695294,101.26
MAPE,95.53283,86.135921,111.611528,183.764922,96.745837,81.22541,130.962321,109.385384,151.908588


In [33]:
#PRUEBA -> Todas las pruebas
ddbbData = ddbbDataNorm[['100m','100m Points','Lj','Lj Points','Sp','Sp Points','Hj','Hj Points','400m Points','400m',
                         '110m H','110m H Points','Dt','Dt Points','Pv','Pv Points','Jt','Jt Points','1500m Points',
                         '1500m']]

ho = mainHoldOut(ddbbData)
cv = mainCrossVal(ddbbData)

tabla=pd.DataFrame(ho)
tabla1=pd.DataFrame(cv)

print()
print()
print('METRICAS HOLD OUT-> Todas las pruebas')



METRICAS HOLD OUT-> Todas las pruebas


In [34]:
tablaExcelMetricas = pd.concat([tablaExcelMetricas, tabla1])
tabla1

Unnamed: 0,KNN,REGLO,GNB,PERCEPTRON,MLP,SVM,DESGRA,ARBOL,BOSQUE
Accuracy,0.08,0.041176,0.047059,0.041176,0.023529,0.037647,0.043529,0.050588,0.082353
MAE,5.817647,7.122353,7.887059,8.256471,7.735294,7.592941,7.023529,6.017647,4.807059
RMSE,60.252941,79.863529,99.607059,110.032941,98.118824,92.835294,79.475294,59.278824,41.294118
MAPE,54.252467,73.835526,100.702965,156.488951,81.126171,72.434434,119.539133,82.7607,64.239513


Genera Excel con las metricas obtenidas

In [35]:
path = "../DecatlonEstadistics/resources/metricasAlgoritmos.xlsx"
norm_path = os.path.normpath(path) 

#tablaExcelMetricas.to_excel (norm_path, index = False, header=True)

### 7.Predicciones Juegos Olimpicos 2021

In [36]:
ddbbData = ddbbDataNorm[['100m','Sp','Hj','Dt','Jt','1500m']]

#Creamos el dataset para entrenar los algoritmos
X_train = ddbbData
y_train = ddbb['Position']

path = "../DecatlonEstadistics/resources/testResults.csv"
norm_path = os.path.normpath(path) 
predictionData = pd.read_csv(norm_path)
del predictionData['Unnamed: 11']

predictionData.columns = ['Athlete','100m','Lj','Sp','Hj','400m','110m H','Dt','Pv','Jt','1500m']

predDataset = predictionData[['100m','Sp','Hj','Dt','Jt','1500m']]

#Normaliza los datos
normalizer = preprocessing.MinMaxScaler()
predDatasetNormalized = normalizer.fit_transform(predDataset)

#Introduce los datos normalizados en un nuevo en un dataframe
predDatasetNorm = pd.DataFrame(predDatasetNormalized)
predDatasetNorm.columns  = ['100m','Sp','Hj','Dt','Jt','1500m']

#Da valore positivos a tiempos bajos en las pruebas de pista
for i in range(len(predDatasetNorm['100m'])): predDatasetNorm['100m'][i] = 1 - predDatasetNorm['100m'][i]
for i in range(len(predDatasetNorm['1500m'])): predDatasetNorm['1500m'][i] = 1 - predDatasetNorm['1500m'][i]


X_test = predDatasetNorm

#### Predicciones Vecinos Cercanos KNN

In [37]:
#Predicciones vecinos cercanos
#del predictionData['Postion Predicted']
knn = KNeighborsClassifier(n_neighbors=10)

#Entrena el modelo
knn.fit(X_train, y_train)

#Predice para los datos de test
y_pred = knn.predict(X_test)

posPredict = [int(x) for x in y_pred]

predictionData.insert(11, "Postion Predicted", posPredict, True)
predictionData

Unnamed: 0,Athlete,100m,Lj,Sp,Hj,400m,110m H,Dt,Pv,Jt,1500m,Postion Predicted
0,Kevin Mayer,10.55,7.8,16.0,2.05,48.42,13.75,50.54,5.45,71.9,276.11,1
1,Damian Warner,10.31,7.81,14.83,2.03,47.72,13.56,47.32,4.8,61.94,266.59,1
2,Maicel Uibo,11.04,7.56,14.78,2.12,50.32,14.66,46.58,5.3,61.75,267.54,15
3,Arthur Abele,10.85,7.28,15.93,1.89,48.4,14.01,44.77,4.9,67.61,262.22,21
4,Pieter Braun,11.12,7.62,15.28,2.0,49.25,14.4,45.52,4.9,58.77,264.29,8
5,Timothy Duckworth,10.57,8.01,13.15,2.13,48.78,14.37,42.76,5.11,57.27,301.27,20
6,Kai Kazmirek,10.99,7.56,14.03,2.06,47.27,14.42,43.76,4.7,61.53,270.75,11
7,Kevin Mayer,10.68,7.4,16.2,1.97,48.87,13.54,50.32,4.65,67.66,287.74,4
8,Ashley Moloney,10.36,7.67,13.62,2.11,45.82,14.17,43.93,4.8,57.77,288.48,1
9,Cedric Dubler,10.79,7.62,13.24,2.11,47.84,14.34,41.7,5.0,62.48,281.05,7


#### Predicciones Random Forest

In [123]:
del predictionData['Postion Predicted']

rForest=RandomForestClassifier(n_estimators=10)

rForest.fit(X_train,y_train)

y_pred=rForest.predict(X_test)


posPredict = [int(x) for x in y_pred]

predictionData.insert(11, "Postion Predicted", posPredict, True)
predictionData

Unnamed: 0,Athlete,100m,Lj,Sp,Hj,400m,110m H,Dt,Pv,Jt,1500m,Postion Predicted
0,Kevin Mayer,10.55,7.8,16.0,2.05,48.42,13.75,50.54,5.45,71.9,276.11,1
1,Damian Warner,10.31,7.81,14.83,2.03,47.72,13.56,47.32,4.8,61.94,266.59,2
2,Maicel Uibo,11.04,7.56,14.78,2.12,50.32,14.66,46.58,5.3,61.75,267.54,2
3,Arthur Abele,10.85,7.28,15.93,1.89,48.4,14.01,44.77,4.9,67.61,262.22,8
4,Pieter Braun,11.12,7.62,15.28,2.0,49.25,14.4,45.52,4.9,58.77,264.29,2
5,Timothy Duckworth,10.57,8.01,13.15,2.13,48.78,14.37,42.76,5.11,57.27,301.27,4
6,Kai Kazmirek,10.99,7.56,14.03,2.06,47.27,14.42,43.76,4.7,61.53,270.75,2
7,Kevin Mayer,10.68,7.4,16.2,1.97,48.87,13.54,50.32,4.65,67.66,287.74,26
8,Ashley Moloney,10.36,7.67,13.62,2.11,45.82,14.17,43.93,4.8,57.77,288.48,1
9,Cedric Dubler,10.79,7.62,13.24,2.11,47.84,14.34,41.7,5.0,62.48,281.05,2


#### Predicciones MLP

In [124]:
del predictionData['Postion Predicted']

mlp = MLPClassifier(solver='lbfgs', alpha=1e-5,hidden_layer_sizes=(5, 2), random_state=1)

mlp.fit(X_train, y_train)

y_pred = mlp.predict(X_test)

posPredict = [int(x) for x in y_pred]

predictionData.insert(11, "Postion Predicted", posPredict, True)
predictionData

Unnamed: 0,Athlete,100m,Lj,Sp,Hj,400m,110m H,Dt,Pv,Jt,1500m,Postion Predicted
0,Kevin Mayer,10.55,7.8,16.0,2.05,48.42,13.75,50.54,5.45,71.9,276.11,16
1,Damian Warner,10.31,7.81,14.83,2.03,47.72,13.56,47.32,4.8,61.94,266.59,1
2,Maicel Uibo,11.04,7.56,14.78,2.12,50.32,14.66,46.58,5.3,61.75,267.54,1
3,Arthur Abele,10.85,7.28,15.93,1.89,48.4,14.01,44.77,4.9,67.61,262.22,1
4,Pieter Braun,11.12,7.62,15.28,2.0,49.25,14.4,45.52,4.9,58.77,264.29,4
5,Timothy Duckworth,10.57,8.01,13.15,2.13,48.78,14.37,42.76,5.11,57.27,301.27,26
6,Kai Kazmirek,10.99,7.56,14.03,2.06,47.27,14.42,43.76,4.7,61.53,270.75,1
7,Kevin Mayer,10.68,7.4,16.2,1.97,48.87,13.54,50.32,4.65,67.66,287.74,26
8,Ashley Moloney,10.36,7.67,13.62,2.11,45.82,14.17,43.93,4.8,57.77,288.48,26
9,Cedric Dubler,10.79,7.62,13.24,2.11,47.84,14.34,41.7,5.0,62.48,281.05,20


#### Predicciones Naive Bayes

In [132]:
del predictionData['Postion Predicted']

clf = svm.SVC(kernel='poly',gamma=4)

clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)

posPredict = [int(x) for x in y_pred]

predictionData.insert(11, "Postion Predicted", posPredict, True)
predictionData

Unnamed: 0,Athlete,100m,Lj,Sp,Hj,400m,110m H,Dt,Pv,Jt,1500m,Postion Predicted
0,Kevin Mayer,10.55,7.8,16.0,2.05,48.42,13.75,50.54,5.45,71.9,276.11,2
1,Damian Warner,10.31,7.81,14.83,2.03,47.72,13.56,47.32,4.8,61.94,266.59,4
2,Maicel Uibo,11.04,7.56,14.78,2.12,50.32,14.66,46.58,5.3,61.75,267.54,22
3,Arthur Abele,10.85,7.28,15.93,1.89,48.4,14.01,44.77,4.9,67.61,262.22,22
4,Pieter Braun,11.12,7.62,15.28,2.0,49.25,14.4,45.52,4.9,58.77,264.29,22
5,Timothy Duckworth,10.57,8.01,13.15,2.13,48.78,14.37,42.76,5.11,57.27,301.27,12
6,Kai Kazmirek,10.99,7.56,14.03,2.06,47.27,14.42,43.76,4.7,61.53,270.75,11
7,Kevin Mayer,10.68,7.4,16.2,1.97,48.87,13.54,50.32,4.65,67.66,287.74,3
8,Ashley Moloney,10.36,7.67,13.62,2.11,45.82,14.17,43.93,4.8,57.77,288.48,1
9,Cedric Dubler,10.79,7.62,13.24,2.11,47.84,14.34,41.7,5.0,62.48,281.05,7
