# Men's team unlikely to medal in floor

In [1]:
import pandas as pd
import numpy as np
from scipy import stats
from mlxtend.preprocessing import minmax_scaling
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn.datasets import make_circles, make_classification, make_moons
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.ensemble import AdaBoostClassifier, RandomForestClassifier
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.inspection import DecisionBoundaryDisplay
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn import svm
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

In [2]:
df = pd.read_csv('../../Combine_Data/men/fx_encoded.csv')
olymp = pd.read_csv('../../Data/cleandata22-23/encoded_m_olympics_fx.csv')
fxnames = pd.read_csv('../../Data/cleandata22-23/encoded_m_olympics_fxnames.csv')
fxolymp = pd.read_csv('../../Data/cleandata22-23/men22_23.csv')

In [3]:
df.head()

Unnamed: 0,Rank,D,E,ND,Total,year,medal,Name,Nation,round_final,round_qual
0,1,6.2,8.366,0.0,14.566,2019,0,49,54,True,False
1,2,5.9,8.6,0.0,14.5,2019,0,97,39,True,False
2,3,6.1,8.4,0.0,14.5,2019,1,223,25,True,False
3,3,6.1,8.4,0.0,14.5,2019,0,72,79,True,False
4,5,6.2,8.0,0.0,14.2,2019,0,144,62,True,False


In [4]:
y = df['medal']
X = df.drop(columns=['medal'])

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize classifiers
classifiers = {
    "Random Forest": RandomForestClassifier(),
    "AdaBoost": AdaBoostClassifier(),
    "SVM": SVC(),
    "K-Nearest Neighbors": KNeighborsClassifier(),
    "Decision Tree": DecisionTreeClassifier(),
    "Naive Bayes": GaussianNB(),
    "Neural Network": MLPClassifier(),
}

results = {}

# Iterate through each classifier
for name, clf in classifiers.items():
    # Create a pipeline with StandardScaler for classifiers that require it
    if name in ["SVM", "K-Nearest Neighbors", "Neural Network"]:
        clf = make_pipeline(StandardScaler(), clf)

    # Fit the model to the training data
    clf.fit(X_train, y_train)

    # Make predictions on the test data
    y_pred = clf.predict(X_test)

    # Evaluate the model's performance
    accuracy = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred)
    conf_matrix = confusion_matrix(y_test, y_pred)

    results[name] = {
        "accuracy": accuracy,
        "classification_report": report,
        "confusion_matrix": conf_matrix,
    }

# Print the results
for name, result in results.items():
    print(f"Classifier: {name}")
    print(f"Accuracy: {result['accuracy']:.4f}")
    print(f"Classification Report:\n{result['classification_report']}")
    print(f"Confusion Matrix:\n{result['confusion_matrix']}\n")


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Classifier: Random Forest
Accuracy: 1.0000
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        83
           1       1.00      1.00      1.00         2

    accuracy                           1.00        85
   macro avg       1.00      1.00      1.00        85
weighted avg       1.00      1.00      1.00        85

Confusion Matrix:
[[83  0]
 [ 0  2]]

Classifier: AdaBoost
Accuracy: 1.0000
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        83
           1       1.00      1.00      1.00         2

    accuracy                           1.00        85
   macro avg       1.00      1.00      1.00        85
weighted avg       1.00      1.00      1.00        85

Confusion Matrix:
[[83  0]
 [ 0  2]]

Classifier: SVM
Accuracy: 0.9765
Classification Report:
              precision    recall  f1-score   support

           0       0.98      1.0



Classifier: Decision Tree
Accuracy: 1.0000

Classifier: Random Forest
Accuracy: 1.0000

Classifier: AdaBoost
Accuracy: 1.0000


In [5]:
X_train.head()

Unnamed: 0,Rank,D,E,ND,Total,year,Name,Nation,round_final,round_qual
131,2,6.4,8.8,0.0,15.2,2019,31,35,True,False
31,24,5.4,7.333,-0.3,12.433,2019,247,62,False,True
84,5,6.0,8.35,0.0,14.35,2019,273,39,False,True
289,152,4.5,8.433,-0.4,12.533,2019,86,49,False,True
408,15,5.7,8.0,-0.1,27.0,2020,13,75,True,False


In [8]:
olymp.head()


Unnamed: 0,Rank,D,E,ND,Total,year,Name,Nation,round_AAfinal,round_TeamFinal,round_TeamQual,round_final,round_qual
0,54.0,4.0,8.566,0.0,12.566,2023,236,67,False,False,False,False,True
1,13.0,5.5,8.3,0.0,13.8,2022,527,30,True,False,False,False,False
2,48.0,5.5,7.9,0.0,13.4,2022,527,30,False,False,False,False,True
3,86.0,5.4,7.133,0.1,12.433,2022,527,30,False,False,False,False,True
4,21.0,5.5,8.666,0.0,14.166,2023,527,30,False,False,False,False,True


In [7]:
olymp.rename(columns={'D Score': 'D', 'E Score': 'E',
                     'Pen.': 'ND', 'nation': 'Nation'}, inplace=True)

In [9]:
olymp=olymp.drop(columns=['round_TeamFinal', 'round_AAfinal', 'round_TeamQual']) 

In [10]:
y = df['medal']
X = df.drop(columns=['medal'])

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.12, random_state=42)

# Initialize the AdaBoost classifier
clf = AdaBoostClassifier()

# Fit the model to the training data
clf.fit(X_train, y_train)

# Make predictions on the test data
y_pred = clf.predict(X_test)

# Evaluate the model's performance
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

# Print the results
print("AdaBoost Classifier")
print(f"Accuracy: {accuracy:.4f}")
print(f"Classification Report:\n{report}")
print(f"Confusion Matrix:\n{conf_matrix}")

AdaBoost Classifier
Accuracy: 1.0000
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        49
           1       1.00      1.00      1.00         2

    accuracy                           1.00        51
   macro avg       1.00      1.00      1.00        51
weighted avg       1.00      1.00      1.00        51

Confusion Matrix:
[[49  0]
 [ 0  2]]


In [11]:
y_pred = clf.predict(olymp)


In [12]:
ypreds  = pd.Series(y_pred)
olymp['ypred']=ypreds


In [13]:
olymp['ypred'].value_counts()

ypred
0    2335
1     104
Name: count, dtype: int64

In [14]:
filtered_df = olymp[olymp['ypred'] == 1]

In [16]:
filtered_df['Name'].unique()

array([431, 294, 482, 440, 538, 634, 303, 739, 468, 766, 767, 313, 264,
       414, 308, 578, 506, 582, 287, 745, 358, 390, 540, 541, 768, 640,
       542, 337, 341, 249, 678, 746, 265, 733, 731, 344, 448, 353, 451,
       333, 603])

In [17]:
fxnames[fxnames['Name_encoded']==432]

Unnamed: 0,Name,Name_encoded
1373,leonardo mamani betancourt,432


In [18]:
fxnames

Unnamed: 0,Name,Name_encoded
0,fredrik aas,236
1,nestor abad,527
2,nestor abad,527
3,nestor abad,527
4,nestor abad,527
...,...,...
2434,ahmet önder,19
2435,ahmet önder,19
2436,ahmet önder,19
2437,ahmet önder,19


In [19]:
name_encoded_values = [431, 294, 482, 440, 538, 634, 303, 739, 468, 766, 767, 313, 264,
       414, 308, 578, 506, 582, 287, 745, 358, 390, 540, 541, 768, 640,
       542, 337, 341, 249, 678, 746, 265, 733, 731, 344, 448, 353, 451,
       333, 603]

# Filter the DataFrame to get the corresponding "Name" values
result = fxnames.loc[fxnames['Name_encoded'].isin(name_encoded_values), 'Name']


In [20]:
result.unique()

array(['leonardo armijo subaique', 'israel fernando chiriboga guerrero',
       'matt cormier', 'lucas de souza bitencourt', 'nicolas diez',
       'ryosuke doi', 'jacob guest edwards', 'william emard',
       'marios georgiou', 'yuri guimaraes', 'yuri guimarães',
       'james hall', 'harry hepworth', 'koga hiramatsu', 'jake jarman',
       'pau jimenez i fernandez', 'milad karimi', 'pavel karnejenko',
       'illia kovtun', 'xingyu lan', 'jose manuel martinez moreno',
       'kazuki minami', 'nicolau mir', 'nicolau mir rossello',
       'yuri monteverde', 'sam mostowfi', 'nicolo mozzato',
       'joan leonardo pilay saigua', 'joel plata', 'giarnni regini moran',
       'sunghyun ryu', 'yahor sharamkou', 'hayden skinner', 'weide su',
       'wei sun', 'johnny adrian valencia zambrano',
       'luka van den keybus', 'jorge vega', 'luke whitehouse',
       'jiaxing yang', 'rayderley zapata'], dtype=object)

In [21]:
fxolymp["Name"]=fxolymp["Name"].str.lower()

In [22]:
fxolymp

Unnamed: 0,Name,Country,Date,Gender,Competition,Round,Location,Apparatus,Rank,D_Score,E_Score,Penalty,Score,Total_Score
0,fredrik aas,NOR,2022-02-27,m,2022 Cottbus World Cup,qual,"Cottbus, Germany",HB,18.0,3.9,8.266,0.0,12.166,12.166
1,fredrik aas,NOR,2022-02-27,m,2022 Cottbus World Cup,qual,"Cottbus, Germany",HB,18.0,3.9,8.266,0.0,12.166,12.166
2,fredrik aas,NOR,2022-02-27,m,2022 Cottbus World Cup,qual,"Cottbus, Germany",PB,23.0,3.9,6.900,0.0,10.800,10.800
3,fredrik aas,NOR,2022-02-27,m,2022 Cottbus World Cup,qual,"Cottbus, Germany",PB,23.0,3.9,6.900,0.0,10.800,10.800
4,fredrik aas,NOR,2022-02-27,m,2022 Cottbus World Cup,qual,"Cottbus, Germany",PH,33.0,4.2,6.666,0.0,10.866,10.866
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15128,ahmet önder,TUR,2023-06-04,m,2023 Tel Aviv World Challenge Cup,final,"Tel Aviv, Israel",FX,8.0,4.8,7.050,0.0,11.850,11.850
15129,ahmet önder,TUR,2023-06-04,m,2023 Tel Aviv World Challenge Cup,qual,"Tel Aviv, Israel",FX,3.0,5.8,7.950,0.1,13.650,13.650
15130,ahmet önder,TUR,2023-06-04,m,2023 Tel Aviv World Challenge Cup,qual,"Tel Aviv, Israel",HB,10.0,5.0,6.250,0.0,11.250,11.250
15131,ahmet önder,TUR,2023-06-04,m,2023 Tel Aviv World Challenge Cup,final,"Tel Aviv, Israel",PB,1.0,6.3,8.050,0.0,14.350,14.350


In [23]:
names_to_filter = ['leonardo armijo subaique', 'israel fernando chiriboga guerrero',
       'matt cormier', 'lucas de souza bitencourt', 'nicolas diez',
       'ryosuke doi', 'jacob guest edwards', 'william emard',
       'marios georgiou', 'yuri guimaraes', 'yuri guimarães',
       'james hall', 'harry hepworth', 'koga hiramatsu', 'jake jarman',
       'pau jimenez i fernandez', 'milad karimi', 'pavel karnejenko',
       'illia kovtun', 'xingyu lan', 'jose manuel martinez moreno',
       'kazuki minami', 'nicolau mir', 'nicolau mir rossello',
       'yuri monteverde', 'sam mostowfi', 'nicolo mozzato',
       'joan leonardo pilay saigua', 'joel plata', 'giarnni regini moran',
       'sunghyun ryu', 'yahor sharamkou', 'hayden skinner', 'weide su',
       'wei sun', 'johnny adrian valencia zambrano',
       'luka van den keybus', 'jorge vega', 'luke whitehouse',
       'jiaxing yang', 'rayderley zapata']

# Filter the DataFrame to get rows with the specified "Name" values
result = fxolymp[fxolymp['Name'].isin(names_to_filter)]


In [24]:
result['Country'].value_counts()

Country
GBR    364
ESP    157
UKR    151
KAZ     99
CHN     84
BRA     60
CAN     49
CYP     48
KOR     48
JPN     38
USA     38
ENG     37
BEL     24
ECU     13
SCO     13
BLR     10
ITA     10
COL      7
CCS      5
CHI      3
FRA      1
Name: count, dtype: int64

In [26]:
usa = result[result['Country']=='USA']

In [29]:
usa['Name'].value_counts()

Name
matt cormier    38
Name: count, dtype: int64