In [1]:
import os.path
import pandas as pd

import numpy as np
from numpy import * 

import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

from sklearn import preprocessing
from sklearn import metrics
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_predict
from sklearn.model_selection import train_test_split
from sklearn.metrics import plot_confusion_matrix

from sklearn.neighbors import KNeighborsClassifier
from sklearn import linear_model
from sklearn.naive_bayes import GaussianNB
from sklearn import svm
from sklearn.tree import DecisionTreeClassifier
from sklearn.cluster import KMeans
from sklearn.linear_model import Perceptron
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import SGDClassifier
from sklearn.ensemble import RandomForestClassifier

import warnings
warnings.filterwarnings('ignore')

### 1.Preparacion de los datos

#### Lee el dataset que generamos anteriormente 

In [2]:
#Lee el excel generado en el otro documento .ipynb
path = "../DecatlonEstadistics/resources/data.xlsx"
norm_path = os.path.normpath(path) 
ddbb = pd.read_excel(norm_path)

del ddbb['1500m NF']

ddbb['Country']=ddbb['Country'].str.lower()

ddbb.columns = ['Position', 'Athlete', 'Age', 'Country', 'Total Points', 'Year', 'Competition', '100m', 
                 '100m Points', 'Lj', 'Lj Points', 'Sp', 'Sp Points', 'Hj', 'Hj Points', '400m', '400m Points', 
                 '110m H', '110m H Points', 'Dt', 'Dt Points', 'Pv', 'Pv Points', 'Jt', 'Jt Points', 
                 '1500m Points', '1500m']

ddbbData = ddbb[['100m','100m Points','Lj','Lj Points','Sp','Sp Points','Hj','Hj Points','400m Points','400m',
                 '110m H','110m H Points','Dt','Dt Points','Pv','Pv Points','Jt','Jt Points','1500m Points','1500m']]


#### Normaliza los datos del dataset entre [0,1]

In [3]:
#Normaliza los datos
normalizer = preprocessing.MinMaxScaler()
ddbbNormalized = normalizer.fit_transform(ddbbData)

#Introduce los datos normalizados en un nuevo en un dataframe
ddbbDataNorm = pd.DataFrame(ddbbNormalized)
ddbbDataNorm.columns  = ['100m','100m Points','Lj','Lj Points','Sp','Sp Points','Hj','Hj Points','400m Points','400m','110m H',
                '110m H Points','Dt','Dt Points','Pv','Pv Points','Jt','Jt Points','1500m Points','1500m']

#### Algoritmos y metricas que se van a estudiar

In [4]:
metricas = {
    'Accuracy': metrics.accuracy_score,
    'MAE':  metrics.mean_absolute_error,
    'RMSE': metrics.mean_squared_error,
    #Actualmente esta funcion solo esta incluida en versiones no estables de la libreria ScikitLearn, por eso se crea esta funcion
    'MAPE': lambda y, y_pred:
          np.mean(np.abs((y - y_pred) / y)) * 100,
}

In [5]:
#Diccionario con los algoritmos que se van a estudiar
algoritmos = {
    'KNN': KNeighborsClassifier(n_neighbors=5),
    #'REGLI': linear_model.LinearRegression(),
    'REGLO': linear_model.LogisticRegression(random_state=42),
    'GNB': GaussianNB(),
    'PERCEPTRON': Perceptron(tol=1e-5, random_state=1),
    'MLP': MLPClassifier(solver='lbfgs', alpha=1e-5,hidden_layer_sizes=(5, 2), random_state=1),
    'SVM': svm.SVC(kernel='linear'),
    'DESGRA': SGDClassifier(loss="hinge", penalty="l2"),
    'ARBOL': DecisionTreeClassifier(criterion="entropy", max_depth=5),
    'BOSQUE': RandomForestClassifier(n_estimators=100)
}

In [6]:
algoritmosClust = {
    'KMEANS': KMeans(n_clusters=1 , random_state= 5),
}

In [7]:
algoritmosIA = {
    
}

### 4.Hold Out

#### Metricas para los diferentes algoritmos -> Separacion de datos HoldOut

Funcion que genera las etiquetas predecidas para los diferentes algoritmos

In [8]:
#Funcion que genera predicciones para casos de test en función de la separación hecha por holdOut
def generaModelosHO(nombre,X_train, X_test, y_train, y_test):
    
    #Estudia el algoritmo pasado
    algoritmo = algoritmos[nombre]

    #Entrena el modelo
    algoritmo.fit(X_train, y_train)

    #Predice para los datos de test
    y_pred = algoritmo.predict(X_test)
    
    #Devuelve el las predicciones para los casos de test
    return(y_pred)

Funcion que devuelve para un algoritmo (Se le pasa las etiquetas reales y las generadas por el algoritmo) dado todas sus metricas.

Se usa tanto para Hold Out como Cross Validation

In [9]:
#Funcion para la evaluacion de las diferentes metricas
def evaluaMetricas(y_test, y_pred):
    resultado = {}
    for nombre, funcion in metricas.items():
        resultado[nombre] = funcion(y_test, y_pred)
    return resultado

Funcion que devuelve el resultado de las metricas para los algoritmos definidos en la parte superior

In [10]:
def mainHoldOut(ddbbData):
    y_pred = {}
    X_train, X_test, y_train, y_test = train_test_split(ddbbData, ddbb['Position'], test_size=0.3, random_state=42)

    for nombre, funcion in algoritmos.items():
        y_pred[nombre] = generaModelosHO(nombre,X_train, X_test, y_train, y_test)
        
    resultado = {}
    for nombre, funcionA in algoritmos.items():
        resultado[nombre] = evaluaMetricas(y_test, y_pred[nombre])
    
    #Tabla de metricas para los algoritmos estudiados
    return resultado

### 5.Cross Validation

#### Metricas para los diferentes algoritmos -> Separacion de datos CrossValidation

In [11]:
def generaModelosCV(funcion, ddbbData):
    y_pred = cross_val_predict(funcion, ddbbData, ddbb['Position'], cv=KFold(n_splits=10,random_state=42,shuffle=True))
    
    return y_pred

In [12]:
def mainCrossVal(ddbbData):
    y_pred = {}

    for nombre, funcion in algoritmos.items():
        y_pred[nombre] = generaModelosCV(funcion, ddbbData)
        
    resultado = {}
    for nombre, funcionA in algoritmos.items():
        resultado[nombre] = evaluaMetricas(ddbb['Position'], y_pred[nombre])
    
    #Tabla de metricas para los algoritmos estudiados
    return resultado

### 6.Comparativas de las metricas

#### Resultados de las metricas para HoldOut y CrossValidation de cada prueba

In [13]:
#PRUEBA -> 100m Lisos
ddbbData = ddbbDataNorm[['100m','100m Points']]

ho = mainHoldOut(ddbbData)
cv = mainCrossVal(ddbbData)

tabla=pd.DataFrame(ho)
tabla1=pd.DataFrame(cv)

print()
print()
print('METRICAS HOLD OUT-> 100m lisos')
tabla



METRICAS HOLD OUT-> 100m lisos


Unnamed: 0,KNN,REGLO,GNB,PERCEPTRON,MLP,SVM,DESGRA,ARBOL,BOSQUE
Accuracy,0.058824,0.047059,0.066667,0.027451,0.07451,0.047059,0.039216,0.054902,0.058824
MAE,7.788235,7.34902,7.45098,7.890196,6.207843,6.886275,6.811765,6.552941,6.470588
RMSE,101.058824,84.556863,88.196078,88.752941,61.156863,76.156863,69.627451,70.623529,72.133333
MAPE,86.358991,74.873128,82.780491,207.012584,69.490304,71.902284,102.498502,77.088757,99.089585


In [14]:
print()
print()
print('METRICAS CROSS VALIDATION-> 100m lisos')
tabla1



METRICAS CROSS VALIDATION-> 100m lisos


Unnamed: 0,KNN,REGLO,GNB,PERCEPTRON,MLP,SVM,DESGRA,ARBOL,BOSQUE
Accuracy,0.061176,0.052941,0.044706,0.050588,0.063529,0.036471,0.057647,0.055294,0.056471
MAE,7.488235,7.623529,8.328235,7.232941,6.145882,7.985882,7.422353,6.7,6.765882
RMSE,93.455294,93.148235,110.864706,82.618824,60.814118,102.329412,91.071765,72.7,75.643529
MAPE,78.154379,74.269367,91.62134,122.353372,74.896163,70.988431,95.912317,87.029793,99.742743


In [15]:
#PRUEBA -> Salto de longitud
ddbbData = ddbbDataNorm[['Lj','Lj Points']]

ho = mainHoldOut(ddbbData)
cv = mainCrossVal(ddbbData)

tabla=pd.DataFrame(ho)
tabla1=pd.DataFrame(cv)

print()
print()
print('METRICAS HOLD OUT-> Salto de longitud')
tabla



METRICAS HOLD OUT-> Salto de longitud


Unnamed: 0,KNN,REGLO,GNB,PERCEPTRON,MLP,SVM,DESGRA,ARBOL,BOSQUE
Accuracy,0.039216,0.027451,0.062745,0.039216,0.047059,0.031373,0.035294,0.05098,0.031373
MAE,7.329412,6.745098,7.74902,8.243137,6.223529,7.043137,7.035294,6.67451,7.639216
RMSE,94.654902,69.639216,100.988235,104.698039,60.443137,77.192157,81.670588,73.670588,91.176471
MAPE,67.466048,95.264441,96.821514,226.763157,77.942505,93.370147,138.969531,100.364,112.902752


In [16]:
print()
print()
print('METRICAS CROSS VALIDATION-> Salto de longitud')
tabla1



METRICAS CROSS VALIDATION-> Salto de longitud


Unnamed: 0,KNN,REGLO,GNB,PERCEPTRON,MLP,SVM,DESGRA,ARBOL,BOSQUE
Accuracy,0.042353,0.021176,0.058824,0.029412,0.048235,0.02,0.037647,0.049412,0.051765
MAE,7.612941,7.808235,7.274118,9.664706,6.176471,7.672941,7.5,6.355294,7.158824
RMSE,94.9,100.452941,88.824706,137.707059,58.588235,94.877647,88.445882,65.197647,83.398824
MAPE,75.874096,86.008651,101.164064,214.767774,94.218413,85.389947,142.577535,83.452309,114.298408


In [17]:
#PRUEBA -> Lanzamiento de peso
ddbbData = ddbbDataNorm[['Sp','Sp Points']]

ho = mainHoldOut(ddbbData)
cv = mainCrossVal(ddbbData)

tabla=pd.DataFrame(ho)
tabla1=pd.DataFrame(cv)

print()
print()
print('METRICAS HOLD OUT-> Lanzamiento de peso')
tabla



METRICAS HOLD OUT-> Lanzamiento de peso


Unnamed: 0,KNN,REGLO,GNB,PERCEPTRON,MLP,SVM,DESGRA,ARBOL,BOSQUE
Accuracy,0.027451,0.023529,0.066667,0.054902,0.054902,0.031373,0.062745,0.047059,0.035294
MAE,8.376471,6.541176,8.447059,7.435294,6.388235,7.043137,5.894118,6.501961,7.011765
RMSE,108.243137,66.298039,119.152941,79.333333,65.133333,77.192157,50.160784,67.796078,77.717647
MAPE,81.22571,83.096114,101.298076,193.675202,70.512584,93.370147,126.509744,84.798794,113.249707


In [18]:
print()
print()
print('METRICAS CROSS VALIDATION-> Lanzamiento de peso')
tabla1



METRICAS CROSS VALIDATION-> Lanzamiento de peso


Unnamed: 0,KNN,REGLO,GNB,PERCEPTRON,MLP,SVM,DESGRA,ARBOL,BOSQUE
Accuracy,0.032941,0.042353,0.055294,0.042353,0.052941,0.018824,0.036471,0.048235,0.023529
MAE,8.209412,7.507059,7.911765,8.092941,6.774118,7.957647,8.072941,7.010588,7.829412
RMSE,105.287059,91.172941,103.843529,98.278824,72.515294,101.16,100.138824,78.565882,94.057647
MAPE,89.401403,69.489312,100.060001,166.723665,89.580605,80.420306,169.156506,95.452889,136.952


In [19]:
#PRUEBA -> Salto de altura
ddbbData = ddbbDataNorm[['Hj','Hj Points']]

ho = mainHoldOut(ddbbData)
cv = mainCrossVal(ddbbData)

tabla=pd.DataFrame(ho)
tabla1=pd.DataFrame(cv)

print()
print()
print('METRICAS HOLD OUT-> Salto de altura')
tabla



METRICAS HOLD OUT-> Salto de altura


Unnamed: 0,KNN,REGLO,GNB,PERCEPTRON,MLP,SVM,DESGRA,ARBOL,BOSQUE
Accuracy,0.043137,0.035294,0.058824,0.035294,0.047059,0.031373,0.043137,0.05098,0.039216
MAE,7.980392,6.705882,8.952941,7.945098,6.74902,6.964706,8.270588,6.486275,6.827451
RMSE,100.929412,69.364706,133.368627,89.952941,71.768627,75.780392,111.886275,66.933333,75.211765
MAPE,84.434032,91.598992,119.788708,208.038339,83.616614,92.906259,70.162806,88.866157,108.138653


In [20]:
print()
print()
print('METRICAS CROSS VALIDATION-> Salto de altura')
tabla1



METRICAS CROSS VALIDATION-> Salto de altura


Unnamed: 0,KNN,REGLO,GNB,PERCEPTRON,MLP,SVM,DESGRA,ARBOL,BOSQUE
Accuracy,0.050588,0.023529,0.043529,0.032941,0.030588,0.02,0.047059,0.050588,0.038824
MAE,8.242353,7.528235,9.116471,9.269412,6.574118,7.827059,8.355294,7.312941,7.24
RMSE,110.771765,91.323529,130.194118,135.681176,71.08,99.977647,115.009412,88.414118,84.743529
MAPE,88.207028,82.021577,121.436286,193.645145,92.656996,82.797188,123.171284,98.934594,115.280558


In [21]:
#PRUEBA -> 400m lisos
ddbbData = ddbbDataNorm[['400m','400m Points']]

ho = mainHoldOut(ddbbData)
cv = mainCrossVal(ddbbData)

tabla=pd.DataFrame(ho)
tabla1=pd.DataFrame(cv)

print()
print()
print('METRICAS HOLD OUT-> 400m lisos')
tabla



METRICAS HOLD OUT-> 400m lisos


Unnamed: 0,KNN,REGLO,GNB,PERCEPTRON,MLP,SVM,DESGRA,ARBOL,BOSQUE
Accuracy,0.062745,0.039216,0.054902,0.054902,0.054902,0.031373,0.015686,0.054902,0.058824
MAE,8.32549,7.011765,8.847059,7.423529,6.537255,7.043137,10.2,6.905882,7.572549
RMSE,115.486275,78.509804,125.07451,79.133333,69.196078,77.192157,144.647059,76.2,89.980392
MAPE,89.510345,79.901875,111.227651,193.626183,84.029977,93.370147,224.18731,99.380141,124.435974


In [22]:
print()
print()
print('METRICAS CROSS VALIDATION-> 400m lisos')
tabla1



METRICAS CROSS VALIDATION-> 400m lisos


Unnamed: 0,KNN,REGLO,GNB,PERCEPTRON,MLP,SVM,DESGRA,ARBOL,BOSQUE
Accuracy,0.058824,0.037647,0.045882,0.035294,0.042353,0.018824,0.055294,0.048235,0.061176
MAE,7.948235,8.051765,8.827059,9.4,6.824706,8.150588,8.123529,7.229412,7.889412
RMSE,104.637647,103.562353,124.991765,143.157647,74.478824,107.425882,106.391765,84.525882,96.828235
MAPE,82.957247,77.129255,104.341579,182.579293,78.042791,80.802562,148.720206,99.582506,127.944924


In [23]:
#PRUEBA -> 110m Vayas
ddbbData = ddbbDataNorm[['110m H','110m H Points']]

ho = mainHoldOut(ddbbData)
cv = mainCrossVal(ddbbData)

tabla=pd.DataFrame(ho)
tabla1=pd.DataFrame(cv)

print()
print()
print('METRICAS HOLD OUT-> 110m Vayas')
tabla



METRICAS HOLD OUT-> 110m Vayas


Unnamed: 0,KNN,REGLO,GNB,PERCEPTRON,MLP,SVM,DESGRA,ARBOL,BOSQUE
Accuracy,0.035294,0.054902,0.054902,0.05098,0.039216,0.031373,0.031373,0.035294,0.027451
MAE,8.078431,7.333333,8.792157,7.454902,6.52549,7.043137,7.980392,6.717647,7.34902
RMSE,104.619608,85.058824,117.372549,79.384314,62.27451,77.192157,90.780392,74.654902,85.098039
MAPE,80.179049,79.171811,137.142894,193.898474,101.476583,93.370147,209.069428,92.151458,116.20923


In [24]:
print()
print()
print('METRICAS CROSS VALIDATION-> 110m Vayas')
tabla1



METRICAS CROSS VALIDATION-> 110m Vayas


Unnamed: 0,KNN,REGLO,GNB,PERCEPTRON,MLP,SVM,DESGRA,ARBOL,BOSQUE
Accuracy,0.052941,0.036471,0.042353,0.035294,0.043529,0.018824,0.035294,0.036471,0.050588
MAE,7.78,8.228235,9.642353,10.44,6.916471,8.285882,7.610588,7.627059,7.38
RMSE,99.864706,107.112941,142.875294,160.929412,76.161176,110.050588,88.382353,94.82,88.267059
MAPE,81.968251,80.316425,117.806204,197.081059,83.830852,82.990458,149.851326,94.269604,120.858582


In [25]:
#PRUEBA -> Lanzamiento de Disco
ddbbData = ddbbDataNorm[['Dt','Dt Points']]

ho = mainHoldOut(ddbbData)
cv = mainCrossVal(ddbbData)

tabla=pd.DataFrame(ho)
tabla1=pd.DataFrame(cv)

print()
print()
print('METRICAS HOLD OUT-> Lanzamiento de Disco')
tabla



METRICAS HOLD OUT-> Lanzamiento de Disco


Unnamed: 0,KNN,REGLO,GNB,PERCEPTRON,MLP,SVM,DESGRA,ARBOL,BOSQUE
Accuracy,0.043137,0.039216,0.043137,0.054902,0.058824,0.031373,0.047059,0.039216,0.066667
MAE,8.188235,6.607843,7.611765,7.447059,6.411765,7.043137,6.988235,6.509804,7.309804
RMSE,108.760784,68.694118,94.301961,79.713725,67.996078,77.192157,77.866667,68.713725,88.478431
MAPE,82.187038,82.915146,85.876683,194.565725,78.700147,93.370147,88.931283,85.053909,121.972335


In [26]:
print()
print()
print('METRICAS CROSS VALIDATION-> Lanzamiento de Disco')
tabla1



METRICAS CROSS VALIDATION-> Lanzamiento de Disco


Unnamed: 0,KNN,REGLO,GNB,PERCEPTRON,MLP,SVM,DESGRA,ARBOL,BOSQUE
Accuracy,0.050588,0.028235,0.043529,0.035294,0.058824,0.016471,0.043529,0.057647,0.035294
MAE,7.997647,7.641176,8.164706,9.128235,6.482353,8.017647,8.129412,6.542353,7.443529
RMSE,102.950588,95.22,108.152941,133.363529,70.442353,102.789412,101.76,71.210588,88.9
MAPE,81.659991,75.268792,106.006068,165.534704,84.562062,79.940703,137.174378,89.382277,128.336338


In [27]:
#PRUEBA -> Salto de Pertiga
ddbbData = ddbbDataNorm[['Pv','Pv Points']]

ho = mainHoldOut(ddbbData)
cv = mainCrossVal(ddbbData)

tabla=pd.DataFrame(ho)
tabla1=pd.DataFrame(cv)

print()
print()
print('METRICAS HOLD OUT-> Salto de Pertiga')
tabla



METRICAS HOLD OUT-> Salto de Pertiga


Unnamed: 0,KNN,REGLO,GNB,PERCEPTRON,MLP,SVM,DESGRA,ARBOL,BOSQUE
Accuracy,0.031373,0.023529,0.035294,0.039216,0.023529,0.019608,0.054902,0.023529,0.023529
MAE,7.588235,6.756863,8.678431,7.92549,6.666667,6.984314,7.6,6.780392,7.019608
RMSE,90.537255,70.74902,119.156863,89.619608,69.168627,75.164706,95.427451,67.501961,72.870588
MAPE,88.649169,93.976024,116.416455,207.935275,94.458136,94.411106,78.657232,133.298679,132.361465


In [28]:
print()
print()
print('METRICAS CROSS VALIDATION-> Salto de Pertiga')
tabla1



METRICAS CROSS VALIDATION-> Salto de Pertiga


Unnamed: 0,KNN,REGLO,GNB,PERCEPTRON,MLP,SVM,DESGRA,ARBOL,BOSQUE
Accuracy,0.045882,0.037647,0.048235,0.042353,0.029412,0.016471,0.047059,0.057647,0.054118
MAE,8.156471,8.010588,9.202353,9.12,7.738824,8.232941,7.811765,7.418824,7.331765
RMSE,105.535294,101.805882,130.698824,124.169412,95.301176,108.327059,96.898824,87.491765,85.228235
MAPE,88.578987,85.473378,109.239143,183.469189,91.913335,85.086263,134.30687,85.294791,107.272258


In [29]:
#PRUEBA -> Lanzamiento de Javalina
ddbbData = ddbbDataNorm[['Jt','Jt Points']]

ho = mainHoldOut(ddbbData)
cv = mainCrossVal(ddbbData)

tabla=pd.DataFrame(ho)
tabla1=pd.DataFrame(cv)

print()
print()
print('METRICAS HOLD OUT-> Lanzamiento de Javalina')
tabla



METRICAS HOLD OUT-> Lanzamiento de Javalina


Unnamed: 0,KNN,REGLO,GNB,PERCEPTRON,MLP,SVM,DESGRA,ARBOL,BOSQUE
Accuracy,0.043137,0.054902,0.043137,0.039216,0.031373,0.031373,0.031373,0.070588,0.054902
MAE,8.717647,7.168627,7.603922,7.890196,6.74902,7.043137,8.305882,6.717647,7.203922
RMSE,118.278431,82.266667,97.933333,89.34902,73.839216,77.192157,113.372549,73.352941,84.984314
MAPE,86.342421,67.550027,105.834641,207.761056,83.955489,93.370147,87.128051,82.474202,121.183516


In [30]:
print()
print()
print('METRICAS CROSS VALIDATION-> Lanzamiento de Javalina')
tabla1



METRICAS CROSS VALIDATION-> Lanzamiento de Javalina


Unnamed: 0,KNN,REGLO,GNB,PERCEPTRON,MLP,SVM,DESGRA,ARBOL,BOSQUE
Accuracy,0.042353,0.036471,0.045882,0.041176,0.054118,0.016471,0.068235,0.041176,0.048235
MAE,8.216471,7.607059,8.223529,8.436471,6.683529,7.801176,7.050588,6.971765,7.681176
RMSE,105.529412,92.014118,112.988235,107.092941,71.34,96.869412,79.747059,77.376471,94.46
MAPE,96.9045,73.866443,124.86411,167.158409,88.331324,79.596108,103.274193,99.388328,131.23777


In [31]:
#PRUEBA -> 1500m lisos
ddbbData = ddbbDataNorm[['1500m','1500m Points']]

ho = mainHoldOut(ddbbData)
cv = mainCrossVal(ddbbData)

tabla=pd.DataFrame(ho)
tabla1=pd.DataFrame(cv)

print()
print()
print('METRICAS HOLD OUT-> 1500m Lisos')
tabla



METRICAS HOLD OUT-> 1500m Lisos


Unnamed: 0,KNN,REGLO,GNB,PERCEPTRON,MLP,SVM,DESGRA,ARBOL,BOSQUE
Accuracy,0.066667,0.039216,0.047059,0.039216,0.05098,0.031373,0.031373,0.07451,0.035294
MAE,8.545098,6.996078,6.901961,7.890196,7.301961,7.043137,6.4,7.752941,8.058824
RMSE,115.12549,77.176471,77.764706,89.34902,87.294118,77.192157,56.839216,101.635294,102.082353
MAPE,90.485839,91.013762,89.641904,207.761056,95.826325,93.370147,141.251378,92.610906,150.673317


In [32]:
print()
print()
print('METRICAS CROSS VALIDATION-> 1500m Lisos')
tabla1



METRICAS CROSS VALIDATION-> 1500m Lisos


Unnamed: 0,KNN,REGLO,GNB,PERCEPTRON,MLP,SVM,DESGRA,ARBOL,BOSQUE
Accuracy,0.06,0.028235,0.044706,0.054118,0.030588,0.022353,0.036471,0.043529,0.045882
MAE,8.307059,7.74,7.703529,8.614118,7.475294,8.016471,7.843529,8.368235,8.065882
RMSE,112.041176,97.544706,97.588235,117.884706,89.08,104.477647,94.542353,108.175294,104.48
MAPE,95.97519,86.065333,111.611528,116.633505,96.690097,81.22541,150.201884,110.638123,152.742024


### 7.Predicciones Juegos Olimpicos 2021

In [54]:
ddbbData = ddbbDataNorm[['110m H', '110m H Points', '400m', '400m Points', 'Jt', 'Jt Points']]

#Creamos el dataset para entrenar los algoritmos
X_train = ddbbData
y_train = ddbb['Position']

path = "../DecatlonEstadistics/resources/testResults.csv"
norm_path = os.path.normpath(path) 
predictionData = pd.read_csv(norm_path)
del predictionData['Unnamed: 7']

predictionData.columns = ['Athlete','110m H','110m H Points','400m','400m Points','Jt','Jt Points']

predDataset = predictionData[['110m H','110m H Points','400m','400m Points','Jt','Jt Points']]

#Normaliza los datos
normalizer = preprocessing.MinMaxScaler()
predDatasetNormalized = normalizer.fit_transform(predDataset)

#Introduce los datos normalizados en un nuevo en un dataframe
predDatasetNorm = pd.DataFrame(predDatasetNormalized)
predDatasetNorm.columns  = ['110m H','110m H Points','400m','400m Points','Jt','Jt Points']

X_test = predDatasetNorm

#### Predicciones arbol de decision

In [56]:
#Predicciones árbol de decisión
rForest=RandomForestClassifier(n_estimators=500)

rForest.fit(X_train,y_train)

y_pred=rForest.predict(X_test)


posPredict = [int(x) for x in y_pred]

predictionData.insert(7, "Postion Predicted", posPredict, True)
predictionData

Unnamed: 0,Athlete,110m H,110m H Points,400m,400m Points,Jt,Jt Points,Postion Predicted,Postion Predicted.1
0,Axel Hubert,11.03,854,50.75,780,69.69,885,22,22
1,Makssim,10.95,872,49.12,859,57.38,698,26,26
2,Simon Ehammer,10.5,975,42.27,1205,52.88,631,21,21
3,Vitalig Zhuk,11.03,854,49.11,856,69.98,783,26,26
4,Risto Lillemets,10.94,874,50.3,801,63.84,796,26,26
5,Kevin Mayer,10.55,963,48.42,889,71.9,918,14,1
6,Damian Warner,10.31,1120,47.72,923,61.94,767,3,1
7,Maicel Uibo,11.04,852,50.32,800,61.75,764,23,26
8,Arthur Abele,10.85,894,48.4,890,67.61,853,22,22
9,Pieter Braun,11.12,834,49.25,849,58.77,719,26,26
