# Men's team unlikely to medal in floor

In [1]:
import pandas as pd
import numpy as np
from scipy import stats
from mlxtend.preprocessing import minmax_scaling
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn.datasets import make_circles, make_classification, make_moons
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.ensemble import AdaBoostClassifier, RandomForestClassifier
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.inspection import DecisionBoundaryDisplay
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn import svm
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

In [2]:
df = pd.read_csv('../../Combine_Data/men/fx_encoded.csv')
olymp = pd.read_csv('../../Data/cleandata22-23/encoded_m_olympics_fx.csv')
fxnames = pd.read_csv('../../Data/cleandata22-23/encoded_m_olympics_fxnames.csv')
fxolymp = pd.read_csv('../../Data/cleandata22-23/men22_23.csv')

In [33]:
df.head()

Unnamed: 0,Rank,D,E,ND,Total,year,medal,Name,Nation,round_final,round_qual
0,1,6.2,8.366,0.0,14.566,2019,0,49,54,True,False
1,2,5.9,8.6,0.0,14.5,2019,0,97,39,True,False
2,3,6.1,8.4,0.0,14.5,2019,1,223,25,True,False
3,3,6.1,8.4,0.0,14.5,2019,0,72,79,True,False
4,5,6.2,8.0,0.0,14.2,2019,0,144,62,True,False


In [34]:

# Assuming you already have your DataFrame 'df' and the target variable is 'medal'
y = df['medal']
X = df.drop(columns=['medal'])

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize classifiers
classifiers = {
    "Random Forest": RandomForestClassifier(),
    "AdaBoost": AdaBoostClassifier(),
    "SVM": SVC(),
    "K-Nearest Neighbors": KNeighborsClassifier(),
    "Decision Tree": DecisionTreeClassifier(),
    "Naive Bayes": GaussianNB(),
    "Neural Network": MLPClassifier(),
}

results = {}

# Iterate through each classifier
for name, clf in classifiers.items():
    # Create a pipeline with StandardScaler for classifiers that require it
    if name in ["SVM", "K-Nearest Neighbors", "Neural Network"]:
        clf = make_pipeline(StandardScaler(), clf)

    # Fit the model to the training data
    clf.fit(X_train, y_train)

    # Make predictions on the test data
    y_pred = clf.predict(X_test)

    # Evaluate the model's performance
    accuracy = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred)
    conf_matrix = confusion_matrix(y_test, y_pred)

    results[name] = {
        "accuracy": accuracy,
        "classification_report": report,
        "confusion_matrix": conf_matrix,
    }

# Print the results
for name, result in results.items():
    print(f"Classifier: {name}")
    print(f"Accuracy: {result['accuracy']:.4f}")
    print(f"Classification Report:\n{result['classification_report']}")
    print(f"Confusion Matrix:\n{result['confusion_matrix']}\n")


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Classifier: Random Forest
Accuracy: 1.0000
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        83
           1       1.00      1.00      1.00         2

    accuracy                           1.00        85
   macro avg       1.00      1.00      1.00        85
weighted avg       1.00      1.00      1.00        85

Confusion Matrix:
[[83  0]
 [ 0  2]]

Classifier: AdaBoost
Accuracy: 1.0000
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        83
           1       1.00      1.00      1.00         2

    accuracy                           1.00        85
   macro avg       1.00      1.00      1.00        85
weighted avg       1.00      1.00      1.00        85

Confusion Matrix:
[[83  0]
 [ 0  2]]

Classifier: SVM
Accuracy: 0.9765
Classification Report:
              precision    recall  f1-score   support

           0       0.98      1.0



Classifier: Decision Tree
Accuracy: 1.0000

Classifier: Random Forest
Accuracy: 1.0000

Classifier: AdaBoost
Accuracy: 1.0000


In [35]:
X_train.head()

Unnamed: 0,Rank,D,E,ND,Total,year,Name,Nation,round_final,round_qual
131,2,6.4,8.8,0.0,15.2,2019,31,35,True,False
31,24,5.4,7.333,-0.3,12.433,2019,247,62,False,True
84,5,6.0,8.35,0.0,14.35,2019,273,39,False,True
289,152,4.5,8.433,-0.4,12.533,2019,86,49,False,True
408,15,5.7,8.0,-0.1,27.0,2020,13,75,True,False


In [42]:
olymp.head()


Unnamed: 0,Rank,D,E,ND,Total,year,Name,Nation,round_final,round_qual
0,54.0,4.0,8.566,0.0,12.566,2023,154,59,False,True
1,13.0,5.5,8.3,0.0,13.8,2022,354,27,False,False
2,48.0,5.5,7.9,0.0,13.4,2022,354,27,False,True
3,86.0,5.4,7.133,0.1,12.433,2022,354,27,False,True
4,24.0,5.4,7.233,0.0,12.633,2023,354,27,False,False


In [38]:
olymp.rename(columns={'D Score': 'D', 'E Score': 'E',
                     'Pen.': 'ND', 'nation': 'Nation'}, inplace=True)

In [41]:
olymp=olymp.drop(columns=['round_TeamFinal', 'round_AAfinal'])

In [43]:

# Assuming you already have your DataFrame 'df' and the target variable is 'medal'
y = df['medal']
X = df.drop(columns=['medal'])

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.12, random_state=42)

# Initialize the AdaBoost classifier
clf = AdaBoostClassifier()

# Fit the model to the training data
clf.fit(X_train, y_train)

# Make predictions on the test data
y_pred = clf.predict(X_test)

# Evaluate the model's performance
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

# Print the results
print("AdaBoost Classifier")
print(f"Accuracy: {accuracy:.4f}")
print(f"Classification Report:\n{report}")
print(f"Confusion Matrix:\n{conf_matrix}")

AdaBoost Classifier
Accuracy: 1.0000
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        49
           1       1.00      1.00      1.00         2

    accuracy                           1.00        51
   macro avg       1.00      1.00      1.00        51
weighted avg       1.00      1.00      1.00        51

Confusion Matrix:
[[49  0]
 [ 0  2]]


In [44]:
y_pred = clf.predict(olymp)


In [45]:
ypreds  = pd.Series(y_pred)
olymp['ypred']=ypreds


In [46]:
olymp['ypred'].value_counts()

ypred
0    1330
1      35
Name: count, dtype: int64

In [47]:
filtered_df = olymp[olymp['ypred'] == 1]

In [48]:
filtered_df['Name'].unique()

array([432, 502, 175, 343, 206, 394, 342, 398, 364, 434, 363, 232, 163,
       289, 299, 301])

In [50]:
fxnames[fxnames['Name_encoded']==432]

Unnamed: 0,Name,Name_encoded
304,ryosuke doi,432
305,ryosuke doi,432
306,ryosuke doi,432
307,ryosuke doi,432


In [68]:
fxnames

Unnamed: 0,Name,Name_encoded
0,fredrik bjornevik aas,154
1,nestor abad,354
2,nestor abad,354
3,nestor abad,354
4,nestor abad,354
...,...,...
1360,uri zeidel,484
1361,ahmet önder,14
1362,ahmet önder,14
1363,ahmet önder,14


In [51]:
name_encoded_values = [432, 502, 175, 343, 206, 394, 342, 398, 364, 434, 363, 232, 163, 289, 299, 301]

# Filter the DataFrame to get the corresponding "Name" values
result = fxnames.loc[fxnames['Name_encoded'].isin(name_encoded_values), 'Name']


In [53]:
result.unique()

array(['ryosuke doi', 'william emard', 'harry hepworth', 'milan hosseini',
       'jake jarman', 'pau jimenez i fernandez', 'milad karimi',
       'pavel karnejenko', 'nicolau mir', 'nicolau mir rossello',
       'sam mostowfi', 'joel plata', 'giarnni regini moran',
       'levan skhiladze', 'luka van den keybus', 'luke whitehouse'],
      dtype=object)

In [63]:
fxolymp["Name"]=fxolymp["Name"].str.lower()

In [67]:
fxolymp

Unnamed: 0,Name,Country,Date,Gender,Competition,Round,Location,Apparatus,Rank,D_Score,E_Score,Penalty,Score,Total_Score
0,fredrik aas,NOR,2023-02-26,m,FIG Apparatus World Cup 2023,qual,"Cottbus, Germany",HB,39.0,4.6,6.700,0.0,11.300,11.300
1,fredrik aas,NOR,2023-02-26,m,FIG Apparatus World Cup 2023,qual,"Cottbus, Germany",PH,44.0,4.4,7.800,0.0,12.200,12.200
2,fredrik bjornevik aas,NOR,2023-08-05,m,2023 FISU World University Games,qual,"Chengdu, China",FX,54.0,4.0,8.566,0.0,12.566,12.566
3,fredrik bjornevik aas,NOR,2023-08-05,m,2023 FISU World University Games,qual,"Chengdu, China",HB,44.0,4.6,8.166,0.0,12.766,12.766
4,fredrik bjornevik aas,NOR,2023-08-05,m,2023 FISU World University Games,qual,"Chengdu, China",PB,54.0,4.4,8.066,0.0,12.466,12.466
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8573,ahmet önder,TUR,2023-06-04,m,2023 Tel Aviv Challenge Cup,final,"Tel Aviv, Israel",FX,8.0,4.8,7.050,0.0,11.850,11.850
8574,ahmet önder,TUR,2023-06-04,m,2023 Tel Aviv Challenge Cup,qual,"Tel Aviv, Israel",FX,3.0,5.8,7.950,0.1,13.650,13.650
8575,ahmet önder,TUR,2023-06-04,m,2023 Tel Aviv Challenge Cup,qual,"Tel Aviv, Israel",HB,10.0,5.0,6.250,0.0,11.250,11.250
8576,ahmet önder,TUR,2023-06-04,m,2023 Tel Aviv Challenge Cup,final,"Tel Aviv, Israel",PB,1.0,6.3,8.050,0.0,14.350,14.350


In [75]:
names_to_filter = ['ryosuke doi', 'william emard', 'harry hepworth', 'milan hosseini',
                   'jake jarman', 'pau jimenez i fernandez', 'milad karimi',
                   'pavel karnejenko', 'nicolau mir', 'nicolau mir rossello',
                   'sam mostowfi', 'joel plata', 'giarnni regini moran',
                   'levan skhiladze', 'luka van den keybus', 'luke whitehouse']

# Filter the DataFrame to get rows with the specified "Name" values
result = fxolymp[fxolymp['Name'].isin(names_to_filter)]


In [79]:
result['Country'].value_counts()

Country
GBR    225
ESP     86
KAZ     38
BEL     24
ENG     23
JPN     14
SCO     13
GEO     13
CAN     11
GE2      7
GER      6
Name: count, dtype: int64