In [1]:
import os.path
import pandas as pd

import math
import numpy as np
from numpy import * 

import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

from sklearn import preprocessing
from sklearn.preprocessing import Normalizer
from sklearn import metrics
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_predict
from sklearn.model_selection import train_test_split
from sklearn.metrics import plot_confusion_matrix

from sklearn.neighbors import KNeighborsClassifier
from sklearn import linear_model
from sklearn.naive_bayes import GaussianNB
from sklearn import svm
from sklearn.tree import DecisionTreeClassifier
from sklearn.cluster import KMeans
from sklearn.linear_model import Perceptron
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import SGDClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn import datasets,metrics,linear_model

import warnings
warnings.filterwarnings('ignore')

### 1.Preparacion de los datos

#### Lee el dataset que generamos anteriormente 

In [2]:
#Lee el excel generado en el otro documento .ipynb
path = "../DecatlonEstadistics/resources/data.xlsx"
norm_path = os.path.normpath(path) 
ddbb = pd.read_excel(norm_path)

del ddbb['1500m NF']

ddbb['Country']=ddbb['Country'].str.lower()

posicionCambiada = []
for i in range(len(ddbb)):
    if(ddbb.iloc[i]['Position'] <= 3):
        posicionCambiada.append(3)
    elif(ddbb.iloc[i]['Position'] <= 10):
        posicionCambiada.append(10)
    elif(ddbb.iloc[i]['Position'] <= 20):
        posicionCambiada.append(20)
    elif(ddbb.iloc[i]['Position'] > 20):
        posicionCambiada.append(25)

ddbb['PosicionGeneral'] = posicionCambiada 
        
ddbb.columns = ['Position', 'Athlete', 'Age', 'Country', 'Total Points', 'Year', 'Competition', '100m', 
                 '100m Points', 'Lj', 'Lj Points', 'Sp', 'Sp Points', 'Hj', 'Hj Points', '400m', '400m Points', 
                 '110m H', '110m H Points', 'Dt', 'Dt Points', 'Pv', 'Pv Points', 'Jt', 'Jt Points', 
                 '1500m Points', '1500m','PosicionGeneral']

borrar = []
for i in range(len(ddbb)):
    if(ddbb.iloc[i]['Year'] < 1980 and ddbb.iloc[i]['Year'] != 0 ):
        borrar.append(i)
    
ddbb.drop(borrar,inplace=True)


ddbbData = ddbb[['100m','100m Points','Lj','Lj Points','Sp','Sp Points','Hj','Hj Points','400m Points','400m',
                 '110m H','110m H Points','Dt','Dt Points','Pv','Pv Points','Jt','Jt Points','1500m Points','1500m']]

ddbbPoints = ddbb[['100m Points','Lj Points','Sp Points','Hj Points','400m Points','110m H Points','Dt Points','Pv Points', 
             'Jt Points','1500m Points']]


#### Normaliza los datos del dataset entre [0,1]

In [3]:
#Normaliza los datos
normalizer = preprocessing.MinMaxScaler()
ddbbNormalized = normalizer.fit_transform(ddbbData)

#Introduce los datos normalizados en un nuevo en un dataframe
ddbbDataNorm = pd.DataFrame(ddbbNormalized)
ddbbDataNorm.columns  = ['100m','100m Points','Lj','Lj Points','Sp','Sp Points','Hj','Hj Points','400m Points','400m','110m H',
                '110m H Points','Dt','Dt Points','Pv','Pv Points','Jt','Jt Points','1500m Points','1500m']

ddbbPoints = ddbbDataNorm[['100m Points','Lj Points','Sp Points','Hj Points','400m Points','110m H Points','Dt Points','Pv Points', 
             'Jt Points','1500m Points']]


#### Algoritmos y metricas que se van a estudiar

In [4]:
#Metricas de clasificacion basadas en la matriz de confusion
metricas = {
    'Accuracy': metrics.accuracy_score,
    'Precission': metrics.precision_score,
    'Recall': metrics.recall_score,
    'F1': metrics.f1_score,
}

In [5]:
#Diccionario con los algoritmos que se van a estudiar
algoritmos = {
    'KNN_15': KNeighborsClassifier(n_neighbors = 15,  metric='minkowski', p=1),
    'GNB': GaussianNB(),
    'PERCEPTRON': Perceptron(tol=1e-5, random_state=1),
    'MLP': MLPClassifier(solver='lbfgs', alpha=1e-5,hidden_layer_sizes=(5, 2), random_state=1),
    'SVM': svm.SVC(decision_function_shape='ovo'),
    'DESGRA': SGDClassifier(max_iter=1000, tol=1e-3),
    'ARBOL': DecisionTreeClassifier(random_state=42, max_depth=10),
    'BOSQUE': RandomForestClassifier(n_estimators=350, random_state=42, max_depth=10)
}

In [6]:
algoritmosClust = {
    'KMEANS': KMeans(n_clusters=1 , random_state= 5),
}

In [7]:
algoritmosIA = {
    
}

### 4.Hold Out

#### Metricas para los diferentes algoritmos -> Separacion de datos HoldOut

Funcion que genera las etiquetas predecidas para los diferentes algoritmos

In [8]:
#Funcion que genera predicciones para casos de test en función de la separación hecha por holdOut
def generaModelosHO(nombre,X_train, X_test, y_train, y_test):
    
    #Estudia el algoritmo pasado
    algoritmo = algoritmos[nombre]

    #Entrena el modelo
    algoritmo.fit(X_train, y_train)

    #Predice para los datos de test
    y_pred = algoritmo.predict(X_test)
    
    #Devuelve el las predicciones para los casos de test
    return(y_pred)

Funcion que devuelve para un algoritmo (Se le pasa las etiquetas reales y las generadas por el algoritmo) dado todas sus metricas.

Se usa tanto para Hold Out como Cross Validation

In [9]:
#Funcion para la evaluacion de las diferentes metricas
def evaluaMetricas(y_test, y_pred):
    resultado = {}
    for nombre, funcion in metricas.items():
        if(nombre == 'Recall' or nombre == 'Precission' or nombre == 'F1'):
            resultado[nombre] = funcion(y_test, y_pred,average='micro')
        else:
            resultado[nombre] = funcion(y_test, y_pred)
            
    return resultado

Funcion que devuelve el resultado de las metricas para los algoritmos definidos en la parte superior

In [10]:
def mainHoldOut(ddbbData):
    y_pred = {}
    X_train, X_test, y_train, y_test = train_test_split(ddbbData, ddbb['PosicionGeneral'], test_size=0.3, random_state=42,shuffle=True)

    for nombre, funcion in algoritmos.items():
        y_pred[nombre] = generaModelosHO(nombre,X_train, X_test, y_train, y_test)
        
    resultado = {}
    for nombre, funcionA in algoritmos.items():
        resultado[nombre] = evaluaMetricas(y_test, y_pred[nombre])
    
    #Tabla de metricas para los algoritmos estudiados
    return resultado

### 5.Cross Validation

#### Metricas para los diferentes algoritmos -> Separacion de datos CrossValidation

In [11]:
def generaModelosCV(funcion, ddbbData):
    y_pred = cross_val_predict(funcion, ddbbData, ddbb['PosicionGeneral'], cv=KFold(n_splits=7,random_state=42,shuffle=True))
    
    return y_pred

In [12]:
def mainCrossVal(ddbbData):
    y_pred = {}

    for nombre, funcion in algoritmos.items():
        y_pred[nombre] = generaModelosCV(funcion, ddbbData)
        
    resultado = {}
    for nombre, funcionA in algoritmos.items():
        resultado[nombre] = evaluaMetricas(y_pred[nombre], ddbb['PosicionGeneral'])
    
    #Tabla de metricas para los algoritmos estudiados
    return resultado

### 6.Comparativas de las metricas

#### Resultados de las metricas para HoldOut y CrossValidation de cada prueba

In [13]:
ddbbData = ddbbPoints[['100m Points']]

ho = mainHoldOut(ddbbData)
cv = mainCrossVal(ddbbData)

tabla=pd.DataFrame(ho)
tabla1=pd.DataFrame(cv)

print('METRICAS HOLD OUT-> 100m lisos')
tabla

METRICAS HOLD OUT-> 100m lisos


Unnamed: 0,KNN_15,GNB,PERCEPTRON,MLP,SVM,DESGRA,ARBOL,BOSQUE
Accuracy,0.38764,0.455056,0.308989,0.38764,0.477528,0.38764,0.398876,0.438202
Precission,0.38764,0.455056,0.308989,0.38764,0.477528,0.38764,0.398876,0.438202
Recall,0.38764,0.455056,0.308989,0.38764,0.477528,0.38764,0.398876,0.438202
F1,0.38764,0.455056,0.308989,0.38764,0.477528,0.38764,0.398876,0.438202


In [14]:
print('METRICAS CROSS VALIDATION-> 100m lisos')
tabla1

METRICAS CROSS VALIDATION-> 100m lisos


Unnamed: 0,KNN_15,GNB,PERCEPTRON,MLP,SVM,DESGRA,ARBOL,BOSQUE
Accuracy,0.390863,0.448393,0.380711,0.382403,0.451777,0.357022,0.433164,0.423012
Precission,0.390863,0.448393,0.380711,0.382403,0.451777,0.357022,0.433164,0.423012
Recall,0.390863,0.448393,0.380711,0.382403,0.451777,0.357022,0.433164,0.423012
F1,0.390863,0.448393,0.380711,0.382403,0.451777,0.357022,0.433164,0.423012


In [15]:
ddbbData = ddbbPoints[['Lj Points']]

ho = mainHoldOut(ddbbData)
cv = mainCrossVal(ddbbData)

tabla=pd.DataFrame(ho)
tabla1=pd.DataFrame(cv)

print('METRICAS HOLD OUT-> Longitud')
tabla

METRICAS HOLD OUT-> Longitud


Unnamed: 0,KNN_15,GNB,PERCEPTRON,MLP,SVM,DESGRA,ARBOL,BOSQUE
Accuracy,0.44382,0.449438,0.331461,0.404494,0.483146,0.38764,0.426966,0.44382
Precission,0.44382,0.449438,0.331461,0.404494,0.483146,0.38764,0.426966,0.44382
Recall,0.44382,0.449438,0.331461,0.404494,0.483146,0.38764,0.426966,0.44382
F1,0.44382,0.449438,0.331461,0.404494,0.483146,0.38764,0.426966,0.44382


In [16]:
print('METRICAS CROSS VALIDATION-> Longitud')
tabla1

METRICAS CROSS VALIDATION-> Longitud


Unnamed: 0,KNN_15,GNB,PERCEPTRON,MLP,SVM,DESGRA,ARBOL,BOSQUE
Accuracy,0.451777,0.455161,0.28088,0.441624,0.514382,0.385787,0.42978,0.42978
Precission,0.451777,0.455161,0.28088,0.441624,0.514382,0.385787,0.42978,0.42978
Recall,0.451777,0.455161,0.28088,0.441624,0.514382,0.385787,0.42978,0.42978
F1,0.451777,0.455161,0.28088,0.441624,0.514382,0.385787,0.42978,0.42978


In [17]:
ddbbData = ddbbPoints[['100m Points']]

ho = mainHoldOut(ddbbData)
cv = mainCrossVal(ddbbData)

tabla=pd.DataFrame(ho)
tabla1=pd.DataFrame(cv)

print('METRICAS HOLD OUT-> 100m lisos')
tabla

METRICAS HOLD OUT-> 100m lisos


Unnamed: 0,KNN_15,GNB,PERCEPTRON,MLP,SVM,DESGRA,ARBOL,BOSQUE
Accuracy,0.38764,0.455056,0.308989,0.38764,0.477528,0.410112,0.398876,0.438202
Precission,0.38764,0.455056,0.308989,0.38764,0.477528,0.410112,0.398876,0.438202
Recall,0.38764,0.455056,0.308989,0.38764,0.477528,0.410112,0.398876,0.438202
F1,0.38764,0.455056,0.308989,0.38764,0.477528,0.410112,0.398876,0.438202


In [18]:
print('METRICAS CROSS VALIDATION-> 100m lisos')
tabla1

METRICAS CROSS VALIDATION-> 100m lisos


Unnamed: 0,KNN_15,GNB,PERCEPTRON,MLP,SVM,DESGRA,ARBOL,BOSQUE
Accuracy,0.390863,0.448393,0.380711,0.382403,0.451777,0.392555,0.433164,0.423012
Precission,0.390863,0.448393,0.380711,0.382403,0.451777,0.392555,0.433164,0.423012
Recall,0.390863,0.448393,0.380711,0.382403,0.451777,0.392555,0.433164,0.423012
F1,0.390863,0.448393,0.380711,0.382403,0.451777,0.392555,0.433164,0.423012


In [19]:
ddbbData = ddbbPoints[['Sp Points']]

ho = mainHoldOut(ddbbData)
cv = mainCrossVal(ddbbData)

tabla=pd.DataFrame(ho)
tabla1=pd.DataFrame(cv)

print('METRICAS HOLD OUT-> Peso')
tabla

METRICAS HOLD OUT-> Peso


Unnamed: 0,KNN_15,GNB,PERCEPTRON,MLP,SVM,DESGRA,ARBOL,BOSQUE
Accuracy,0.466292,0.466292,0.359551,0.38764,0.426966,0.432584,0.38764,0.398876
Precission,0.466292,0.466292,0.359551,0.38764,0.426966,0.432584,0.38764,0.398876
Recall,0.466292,0.466292,0.359551,0.38764,0.426966,0.432584,0.38764,0.398876
F1,0.466292,0.466292,0.359551,0.38764,0.426966,0.432584,0.38764,0.398876


In [20]:
print('METRICAS CROSS VALIDATION-> Peso')
tabla1

METRICAS CROSS VALIDATION-> Peso


Unnamed: 0,KNN_15,GNB,PERCEPTRON,MLP,SVM,DESGRA,ARBOL,BOSQUE
Accuracy,0.385787,0.416244,0.252115,0.382403,0.417936,0.362098,0.373942,0.370558
Precission,0.385787,0.416244,0.252115,0.382403,0.417936,0.362098,0.373942,0.370558
Recall,0.385787,0.416244,0.252115,0.382403,0.417936,0.362098,0.373942,0.370558
F1,0.385787,0.416244,0.252115,0.382403,0.417936,0.362098,0.373942,0.370558


In [21]:
ddbbData = ddbbPoints[['Hj Points']]

ho = mainHoldOut(ddbbData)
cv = mainCrossVal(ddbbData)

tabla=pd.DataFrame(ho)
tabla1=pd.DataFrame(cv)

print('METRICAS HOLD OUT-> Altura')
tabla

METRICAS HOLD OUT-> Altura


Unnamed: 0,KNN_15,GNB,PERCEPTRON,MLP,SVM,DESGRA,ARBOL,BOSQUE
Accuracy,0.359551,0.449438,0.337079,0.38764,0.404494,0.337079,0.421348,0.398876
Precission,0.359551,0.449438,0.337079,0.38764,0.404494,0.337079,0.421348,0.398876
Recall,0.359551,0.449438,0.337079,0.38764,0.404494,0.337079,0.421348,0.398876
F1,0.359551,0.449438,0.337079,0.38764,0.404494,0.337079,0.421348,0.398876


In [22]:
print('METRICAS CROSS VALIDATION-> Altura')
tabla1

METRICAS CROSS VALIDATION-> Altura


Unnamed: 0,KNN_15,GNB,PERCEPTRON,MLP,SVM,DESGRA,ARBOL,BOSQUE
Accuracy,0.402707,0.448393,0.362098,0.382403,0.43824,0.409475,0.419628,0.419628
Precission,0.402707,0.448393,0.362098,0.382403,0.43824,0.409475,0.419628,0.419628
Recall,0.402707,0.448393,0.362098,0.382403,0.43824,0.409475,0.419628,0.419628
F1,0.402707,0.448393,0.362098,0.382403,0.43824,0.409475,0.419628,0.419628


In [23]:
ddbbData = ddbbPoints[['400m Points']]

ho = mainHoldOut(ddbbData)
cv = mainCrossVal(ddbbData)

tabla=pd.DataFrame(ho)
tabla1=pd.DataFrame(cv)

print('METRICAS HOLD OUT-> 400m lisos')
tabla

METRICAS HOLD OUT-> 400m lisos


Unnamed: 0,KNN_15,GNB,PERCEPTRON,MLP,SVM,DESGRA,ARBOL,BOSQUE
Accuracy,0.438202,0.393258,0.342697,0.38764,0.410112,0.38764,0.348315,0.376404
Precission,0.438202,0.393258,0.342697,0.38764,0.410112,0.38764,0.348315,0.376404
Recall,0.438202,0.393258,0.342697,0.38764,0.410112,0.38764,0.348315,0.376404
F1,0.438202,0.393258,0.342697,0.38764,0.410112,0.38764,0.348315,0.376404


In [24]:
print('METRICAS CROSS VALIDATION-> 400m lisos')
tabla1

METRICAS CROSS VALIDATION-> 400m lisos


Unnamed: 0,KNN_15,GNB,PERCEPTRON,MLP,SVM,DESGRA,ARBOL,BOSQUE
Accuracy,0.441624,0.404399,0.274112,0.382403,0.42132,0.324873,0.382403,0.37225
Precission,0.441624,0.404399,0.274112,0.382403,0.42132,0.324873,0.382403,0.37225
Recall,0.441624,0.404399,0.274112,0.382403,0.42132,0.324873,0.382403,0.37225
F1,0.441624,0.404399,0.274112,0.382403,0.42132,0.324873,0.382403,0.37225


In [25]:
ddbbData = ddbbPoints[['110m H Points']]

ho = mainHoldOut(ddbbData)
cv = mainCrossVal(ddbbData)

tabla=pd.DataFrame(ho)
tabla1=pd.DataFrame(cv)

print('METRICAS HOLD OUT-> 110m vayas')
tabla

METRICAS HOLD OUT-> 110m vayas


Unnamed: 0,KNN_15,GNB,PERCEPTRON,MLP,SVM,DESGRA,ARBOL,BOSQUE
Accuracy,0.438202,0.421348,0.168539,0.44382,0.505618,0.382022,0.393258,0.398876
Precission,0.438202,0.421348,0.168539,0.44382,0.505618,0.382022,0.393258,0.398876
Recall,0.438202,0.421348,0.168539,0.44382,0.505618,0.382022,0.393258,0.398876
F1,0.438202,0.421348,0.168539,0.44382,0.505618,0.382022,0.393258,0.398876


In [26]:
print('METRICAS CROSS VALIDATION-> 110m vayas')
tabla1

METRICAS CROSS VALIDATION-> 110m vayas


Unnamed: 0,KNN_15,GNB,PERCEPTRON,MLP,SVM,DESGRA,ARBOL,BOSQUE
Accuracy,0.439932,0.370558,0.248731,0.472081,0.48731,0.360406,0.42132,0.419628
Precission,0.439932,0.370558,0.248731,0.472081,0.48731,0.360406,0.42132,0.419628
Recall,0.439932,0.370558,0.248731,0.472081,0.48731,0.360406,0.42132,0.419628
F1,0.439932,0.370558,0.248731,0.472081,0.48731,0.360406,0.42132,0.419628


In [27]:
ddbbData = ddbbPoints[['100m Points']]

ho = mainHoldOut(ddbbData)
cv = mainCrossVal(ddbbData)

tabla=pd.DataFrame(ho)
tabla1=pd.DataFrame(cv)

print('METRICAS HOLD OUT-> 100m lisos')
tabla

METRICAS HOLD OUT-> 100m lisos


Unnamed: 0,KNN_15,GNB,PERCEPTRON,MLP,SVM,DESGRA,ARBOL,BOSQUE
Accuracy,0.38764,0.455056,0.308989,0.38764,0.477528,0.404494,0.398876,0.438202
Precission,0.38764,0.455056,0.308989,0.38764,0.477528,0.404494,0.398876,0.438202
Recall,0.38764,0.455056,0.308989,0.38764,0.477528,0.404494,0.398876,0.438202
F1,0.38764,0.455056,0.308989,0.38764,0.477528,0.404494,0.398876,0.438202


In [28]:
print('METRICAS CROSS VALIDATION-> 100m lisos')
tabla1

METRICAS CROSS VALIDATION-> 100m lisos


Unnamed: 0,KNN_15,GNB,PERCEPTRON,MLP,SVM,DESGRA,ARBOL,BOSQUE
Accuracy,0.390863,0.448393,0.380711,0.382403,0.451777,0.333333,0.433164,0.423012
Precission,0.390863,0.448393,0.380711,0.382403,0.451777,0.333333,0.433164,0.423012
Recall,0.390863,0.448393,0.380711,0.382403,0.451777,0.333333,0.433164,0.423012
F1,0.390863,0.448393,0.380711,0.382403,0.451777,0.333333,0.433164,0.423012


In [29]:
ddbbData = ddbbPoints[['Dt Points']]

ho = mainHoldOut(ddbbData)
cv = mainCrossVal(ddbbData)

tabla=pd.DataFrame(ho)
tabla1=pd.DataFrame(cv)

print('METRICAS HOLD OUT-> Disco')
tabla

METRICAS HOLD OUT-> Disco


Unnamed: 0,KNN_15,GNB,PERCEPTRON,MLP,SVM,DESGRA,ARBOL,BOSQUE
Accuracy,0.426966,0.466292,0.168539,0.38764,0.455056,0.219101,0.325843,0.353933
Precission,0.426966,0.466292,0.168539,0.38764,0.455056,0.219101,0.325843,0.353933
Recall,0.426966,0.466292,0.168539,0.38764,0.455056,0.219101,0.325843,0.353933
F1,0.426966,0.466292,0.168539,0.38764,0.455056,0.219101,0.325843,0.353933


In [30]:
print('METRICAS CROSS VALIDATION-> Disco')
tabla1

METRICAS CROSS VALIDATION-> Disco


Unnamed: 0,KNN_15,GNB,PERCEPTRON,MLP,SVM,DESGRA,ARBOL,BOSQUE
Accuracy,0.390863,0.43824,0.270728,0.382403,0.42978,0.401015,0.375635,0.370558
Precission,0.390863,0.43824,0.270728,0.382403,0.42978,0.401015,0.375635,0.370558
Recall,0.390863,0.43824,0.270728,0.382403,0.42978,0.401015,0.375635,0.370558
F1,0.390863,0.43824,0.270728,0.382403,0.42978,0.401015,0.375635,0.370558


In [31]:
ddbbData = ddbbPoints[['Pv Points']]

ho = mainHoldOut(ddbbData)
cv = mainCrossVal(ddbbData)

tabla=pd.DataFrame(ho)
tabla1=pd.DataFrame(cv)

print('METRICAS HOLD OUT-> Pertiga')
tabla

METRICAS HOLD OUT-> Pertiga


Unnamed: 0,KNN_15,GNB,PERCEPTRON,MLP,SVM,DESGRA,ARBOL,BOSQUE
Accuracy,0.438202,0.348315,0.258427,0.38764,0.421348,0.432584,0.455056,0.449438
Precission,0.438202,0.348315,0.258427,0.38764,0.421348,0.432584,0.455056,0.449438
Recall,0.438202,0.348315,0.258427,0.38764,0.421348,0.432584,0.455056,0.449438
F1,0.438202,0.348315,0.258427,0.38764,0.421348,0.432584,0.455056,0.449438


In [32]:
print('METRICAS CROSS VALIDATION-> Pertiga')
tabla1

METRICAS CROSS VALIDATION-> Pertiga


Unnamed: 0,KNN_15,GNB,PERCEPTRON,MLP,SVM,DESGRA,ARBOL,BOSQUE
Accuracy,0.377327,0.377327,0.28934,0.382403,0.428088,0.379019,0.428088,0.426396
Precission,0.377327,0.377327,0.28934,0.382403,0.428088,0.379019,0.428088,0.426396
Recall,0.377327,0.377327,0.28934,0.382403,0.428088,0.379019,0.428088,0.426396
F1,0.377327,0.377327,0.28934,0.382403,0.428088,0.379019,0.428088,0.426396


In [33]:
ddbbData = ddbbPoints[['Jt Points']]

ho = mainHoldOut(ddbbData)
cv = mainCrossVal(ddbbData)

tabla=pd.DataFrame(ho)
tabla1=pd.DataFrame(cv)

print('METRICAS HOLD OUT-> Javalina')
tabla

METRICAS HOLD OUT-> Javalina


Unnamed: 0,KNN_15,GNB,PERCEPTRON,MLP,SVM,DESGRA,ARBOL,BOSQUE
Accuracy,0.370787,0.41573,0.348315,0.38764,0.404494,0.224719,0.426966,0.353933
Precission,0.370787,0.41573,0.348315,0.38764,0.404494,0.224719,0.426966,0.353933
Recall,0.370787,0.41573,0.348315,0.38764,0.404494,0.224719,0.426966,0.353933
F1,0.370787,0.41573,0.348315,0.38764,0.404494,0.224719,0.426966,0.353933


In [34]:
print('METRICAS CROSS VALIDATION-> Javalina')
tabla1

METRICAS CROSS VALIDATION-> Javalina


Unnamed: 0,KNN_15,GNB,PERCEPTRON,MLP,SVM,DESGRA,ARBOL,BOSQUE
Accuracy,0.367174,0.411168,0.296108,0.382403,0.404399,0.380711,0.34687,0.37225
Precission,0.367174,0.411168,0.296108,0.382403,0.404399,0.380711,0.34687,0.37225
Recall,0.367174,0.411168,0.296108,0.382403,0.404399,0.380711,0.34687,0.37225
F1,0.367174,0.411168,0.296108,0.382403,0.404399,0.380711,0.34687,0.37225


In [35]:
ddbbData = ddbbPoints[['1500m Points']]

ho = mainHoldOut(ddbbData)
cv = mainCrossVal(ddbbData)

tabla=pd.DataFrame(ho)
tabla1=pd.DataFrame(cv)

print('METRICAS HOLD OUT-> 1500m')
tabla

METRICAS HOLD OUT-> 1500m


Unnamed: 0,KNN_15,GNB,PERCEPTRON,MLP,SVM,DESGRA,ARBOL,BOSQUE
Accuracy,0.410112,0.376404,0.129213,0.38764,0.38764,0.398876,0.393258,0.410112
Precission,0.410112,0.376404,0.129213,0.38764,0.38764,0.398876,0.393258,0.410112
Recall,0.410112,0.376404,0.129213,0.38764,0.38764,0.398876,0.393258,0.410112
F1,0.410112,0.376404,0.129213,0.38764,0.38764,0.398876,0.393258,0.410112


In [36]:
print('METRICAS CROSS VALIDATION-> 1500m')
tabla1

METRICAS CROSS VALIDATION-> 1500m


Unnamed: 0,KNN_15,GNB,PERCEPTRON,MLP,SVM,DESGRA,ARBOL,BOSQUE
Accuracy,0.424704,0.411168,0.245347,0.382403,0.395939,0.2978,0.360406,0.358714
Precission,0.424704,0.411168,0.245347,0.382403,0.395939,0.2978,0.360406,0.358714
Recall,0.424704,0.411168,0.245347,0.382403,0.395939,0.2978,0.360406,0.358714
F1,0.424704,0.411168,0.245347,0.382403,0.395939,0.2978,0.360406,0.358714


In [37]:
#PRUEBA -> Con todas las pruebas
ho = mainHoldOut(ddbbPoints)
cv = mainCrossVal(ddbbPoints)

tabla=pd.DataFrame(ho)
tabla1=pd.DataFrame(cv)

print('METRICAS HOLD OUT CON TODOS LOS DATOS')
tabla

Unnamed: 0,KNN_15,GNB,PERCEPTRON,MLP,SVM,DESGRA,ARBOL,BOSQUE
Accuracy,0.651685,0.668539,0.449438,0.370787,0.691011,0.578652,0.432584,0.629213
Precission,0.651685,0.668539,0.449438,0.370787,0.691011,0.578652,0.432584,0.629213
Recall,0.651685,0.668539,0.449438,0.370787,0.691011,0.578652,0.432584,0.629213
F1,0.651685,0.668539,0.449438,0.370787,0.691011,0.578652,0.432584,0.629213


In [38]:
print('METRICAS CROSS VALIDATION CON TODOS LOS DATOS')
tabla1

Unnamed: 0,KNN_15,GNB,PERCEPTRON,MLP,SVM,DESGRA,ARBOL,BOSQUE
Accuracy,0.632826,0.617597,0.436548,0.697124,0.683587,0.551607,0.539763,0.648054
Precission,0.632826,0.617597,0.436548,0.697124,0.683587,0.551607,0.539763,0.648054
Recall,0.632826,0.617597,0.436548,0.697124,0.683587,0.551607,0.539763,0.648054
F1,0.632826,0.617597,0.436548,0.697124,0.683587,0.551607,0.539763,0.648054


In [39]:
#PRUEBA -> Con pruebas seleccionadas

ddbbData = ddbbPoints[['100m Points','Sp Points','Hj Points','Dt Points',
                    'Jt Points','1500m Points']]

ho = mainHoldOut(ddbbData)
cv = mainCrossVal(ddbbData)

tabla=pd.DataFrame(ho)
tabla1=pd.DataFrame(cv)

print('METRICAS HOLD OUT CON PRUEBAS SELECCIONADAS')
tabla

METRICAS HOLD OUT CON PRUEBAS SELECCIONADAS


Unnamed: 0,KNN_15,GNB,PERCEPTRON,MLP,SVM,DESGRA,ARBOL,BOSQUE
Accuracy,0.578652,0.595506,0.410112,0.646067,0.651685,0.623596,0.505618,0.601124
Precission,0.578652,0.595506,0.410112,0.646067,0.651685,0.623596,0.505618,0.601124
Recall,0.578652,0.595506,0.410112,0.646067,0.651685,0.623596,0.505618,0.601124
F1,0.578652,0.595506,0.410112,0.646067,0.651685,0.623596,0.505618,0.601124


In [40]:
print('METRICAS CROSS VALIDATION CON PRUEBAS SELECCIONADAS')
tabla1

METRICAS CROSS VALIDATION CON PRUEBAS SELECCIONADAS


Unnamed: 0,KNN_15,GNB,PERCEPTRON,MLP,SVM,DESGRA,ARBOL,BOSQUE
Accuracy,0.582064,0.588832,0.526227,0.659898,0.631134,0.531303,0.489002,0.598985
Precission,0.582064,0.588832,0.526227,0.659898,0.631134,0.531303,0.489002,0.598985
Recall,0.582064,0.588832,0.526227,0.659898,0.631134,0.531303,0.489002,0.598985
F1,0.582064,0.588832,0.526227,0.659898,0.631134,0.531303,0.489002,0.598985


### 7.Predicciones Juegos Olimpicos 2021

In [41]:
ddbbData = ddbbDataNorm[['100m Points','Sp Points','Hj Points','Dt Points','Jt Points','1500m Points']]

#Creamos el dataset para entrenar los algoritmos
X_train = ddbbPoints
y_train = ddbb['PosicionGeneral']

path = "../DecatlonEstadistics/resources/testResults.csv"
norm_path = os.path.normpath(path) 
predictionData = pd.read_csv(norm_path)
del predictionData['Unnamed: 11']

predictionData.columns = ['Athlete','100m','Lj','Sp','Hj','400m','110m H','Dt','Pv','Jt','1500m']
predDataset = predictionData[['100m','Lj','Sp','Hj','400m','110m H','Dt','Pv','Jt','1500m']]

for i in range(len(predDataset['100m'])): predDataset['100m'][i] = 25.4347*math.pow((18-predDataset['100m'][i]),1.81)
for i in range(len(predDataset['1500m'])): predDataset['1500m'][i] = 0.037*math.pow((480-predDataset['1500m'][i]),1.85)
for i in range(len(predDataset['Hj'])): predDataset['Hj'][i] = 0.86*math.pow((predDataset['Hj'][i]*100-75),1.42)
for i in range(len(predDataset['Sp'])): predDataset['Sp'][i] = 51.39*math.pow((predDataset['Sp'][i]-1.5),1.05)
for i in range(len(predDataset['Dt'])): predDataset['Dt'][i] = 12.91*math.pow((predDataset['Dt'][i]-4),1.10)
for i in range(len(predDataset['Jt'])): predDataset['Jt'][i] = 10.14*math.pow((predDataset['Jt'][i]-7),1.08)   
for i in range(len(predDataset['Lj'])): predDataset['Lj'][i] = 0.14*math.pow((predDataset['Lj'][i]*100-220),1.4)
for i in range(len(predDataset['Pv'])): predDataset['Pv'][i] = 0.27*math.pow((predDataset['Pv'][i]*100-100),1.35)
for i in range(len(predDataset['400m'])): predDataset['400m'][i] = 1.5377*math.pow((82-predDataset['400m'][i]),1.81)
for i in range(len(predDataset['110m H'])): predDataset['110m H'][i] = 5.74*math.pow((28.5-predDataset['110m H'][i]),1.92)

#Normaliza los datos
normalizer = preprocessing.MinMaxScaler()
predDatasetNormalized = normalizer.fit_transform(predDataset)

#Introduce los datos normalizados en un nuevo en un dataframe
predDatasetNorm = pd.DataFrame(predDatasetNormalized)



X_test = predDatasetNorm

#### Predicciones Vecinos Cercanos MLP

In [43]:
#del predictionData['Postion Predicted']
mlp = algoritmos['MLP']

mlp.fit(X_train,y_train)

y_pred=mlp.predict(X_test)


posPredict = [int(x) for x in y_pred]

predictionData.insert(11, "Postion Predicted", posPredict, True)
predictionData

Unnamed: 0,Athlete,100m,Lj,Sp,Hj,400m,110m H,Dt,Pv,Jt,1500m,Postion Predicted
0,Kevin Mayer,10.55,7.8,16.0,2.05,48.42,13.75,50.54,5.45,71.9,276.11,10
1,Damian Warner,10.31,7.81,14.83,2.03,47.72,13.56,47.32,4.8,61.94,266.59,20
2,Maicel Uibo,11.04,7.56,14.78,2.12,50.32,14.66,46.58,5.3,61.75,267.54,25
3,Arthur Abele,10.85,7.28,15.93,1.89,48.4,14.01,44.77,4.9,67.61,262.22,25
4,Pieter Braun,11.12,7.62,15.28,2.0,49.25,14.4,45.52,4.9,58.77,264.29,25
5,Timothy Duckworth,10.57,8.01,13.15,2.13,48.78,14.37,42.76,5.11,57.27,301.27,25
6,Kai Kazmirek,10.99,7.56,14.03,2.06,47.27,14.42,43.76,4.7,61.53,270.75,25
7,Kevin Mayer,10.68,7.4,16.2,1.97,48.87,13.54,50.32,4.65,67.66,287.74,25
8,Ashley Moloney,10.36,7.67,13.62,2.11,45.82,14.17,43.93,4.8,57.77,288.48,25
9,Cedric Dubler,10.79,7.62,13.24,2.11,47.84,14.34,41.7,5.0,62.48,281.05,25


#### Predicciones Vecinos Cercanos SVM

In [44]:
del predictionData['Postion Predicted']
svm = algoritmos['SVM']

svm.fit(X_train,y_train)

y_pred=svm.predict(X_test)


posPredict = [int(x) for x in y_pred]

predictionData.insert(11, "Postion Predicted", posPredict, True)
predictionData

Unnamed: 0,Athlete,100m,Lj,Sp,Hj,400m,110m H,Dt,Pv,Jt,1500m,Postion Predicted
0,Kevin Mayer,10.55,7.8,16.0,2.05,48.42,13.75,50.54,5.45,71.9,276.11,10
1,Damian Warner,10.31,7.81,14.83,2.03,47.72,13.56,47.32,4.8,61.94,266.59,20
2,Maicel Uibo,11.04,7.56,14.78,2.12,50.32,14.66,46.58,5.3,61.75,267.54,25
3,Arthur Abele,10.85,7.28,15.93,1.89,48.4,14.01,44.77,4.9,67.61,262.22,25
4,Pieter Braun,11.12,7.62,15.28,2.0,49.25,14.4,45.52,4.9,58.77,264.29,25
5,Timothy Duckworth,10.57,8.01,13.15,2.13,48.78,14.37,42.76,5.11,57.27,301.27,25
6,Kai Kazmirek,10.99,7.56,14.03,2.06,47.27,14.42,43.76,4.7,61.53,270.75,25
7,Kevin Mayer,10.68,7.4,16.2,1.97,48.87,13.54,50.32,4.65,67.66,287.74,25
8,Ashley Moloney,10.36,7.67,13.62,2.11,45.82,14.17,43.93,4.8,57.77,288.48,25
9,Cedric Dubler,10.79,7.62,13.24,2.11,47.84,14.34,41.7,5.0,62.48,281.05,25


#### Predicciones Vecinos Cercanos Bosques aleatorios

In [45]:
del predictionData['Postion Predicted']

rForest= algoritmos['BOSQUE']

rForest.fit(X_train,y_train)

y_pred=rForest.predict(X_test)


posPredict = [int(x) for x in y_pred]

predictionData.insert(11, "Postion Predicted", posPredict, True)
predictionData

Unnamed: 0,Athlete,100m,Lj,Sp,Hj,400m,110m H,Dt,Pv,Jt,1500m,Postion Predicted
0,Kevin Mayer,10.55,7.8,16.0,2.05,48.42,13.75,50.54,5.45,71.9,276.11,20
1,Damian Warner,10.31,7.81,14.83,2.03,47.72,13.56,47.32,4.8,61.94,266.59,10
2,Maicel Uibo,11.04,7.56,14.78,2.12,50.32,14.66,46.58,5.3,61.75,267.54,25
3,Arthur Abele,10.85,7.28,15.93,1.89,48.4,14.01,44.77,4.9,67.61,262.22,20
4,Pieter Braun,11.12,7.62,15.28,2.0,49.25,14.4,45.52,4.9,58.77,264.29,25
5,Timothy Duckworth,10.57,8.01,13.15,2.13,48.78,14.37,42.76,5.11,57.27,301.27,20
6,Kai Kazmirek,10.99,7.56,14.03,2.06,47.27,14.42,43.76,4.7,61.53,270.75,25
7,Kevin Mayer,10.68,7.4,16.2,1.97,48.87,13.54,50.32,4.65,67.66,287.74,20
8,Ashley Moloney,10.36,7.67,13.62,2.11,45.82,14.17,43.93,4.8,57.77,288.48,20
9,Cedric Dubler,10.79,7.62,13.24,2.11,47.84,14.34,41.7,5.0,62.48,281.05,25
