# curran phillips

In [1]:
import pandas as pd
import numpy as np
from scipy import stats
from mlxtend.preprocessing import minmax_scaling
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn.datasets import make_circles, make_classification, make_moons
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.ensemble import AdaBoostClassifier, RandomForestClassifier
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.inspection import DecisionBoundaryDisplay
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn import svm
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

In [2]:
df = pd.read_csv('../../Combine_Data/men/pb_encoded.csv')
olymp = pd.read_csv('../../Data/cleandata22-23/encoded_m_olympics_pb.csv')
pbnames = pd.read_csv('../../Data/cleandata22-23/encoded_m_olympics_pbnames.csv')
pbolymp = pd.read_csv('../../Data/cleandata22-23/men22_23.csv')


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 506 entries, 0 to 505
Data columns (total 11 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Rank         506 non-null    int64  
 1   D            506 non-null    float64
 2   E            506 non-null    float64
 3   ND           506 non-null    float64
 4   Total        506 non-null    float64
 5   year         506 non-null    int64  
 6   medal        506 non-null    int64  
 7   Name         506 non-null    int64  
 8   Nation       506 non-null    int64  
 9   round_final  506 non-null    bool   
 10  round_qual   506 non-null    bool   
dtypes: bool(2), float64(4), int64(5)
memory usage: 36.7 KB


In [4]:
y = df['medal']
X = df.drop(columns=['medal'])

# Split the data into training and testing sets
# CHANGED RANDOM STATE TO HAVE A LARGER NUMBER OF VALUES IN TEST DF
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=19)

# Initialize classifiers
classifiers = {
    "Random Forest": RandomForestClassifier(),
    "AdaBoost": AdaBoostClassifier(),
    "SVM": SVC(),
    "K-Nearest Neighbors": KNeighborsClassifier(),
    "Decision Tree": DecisionTreeClassifier(),
    "Naive Bayes": GaussianNB(),
    "Neural Network": MLPClassifier(),
}

results = {}

# Iterate through each classifier
for name, clf in classifiers.items():
    # Create a pipeline with StandardScaler for classifiers that require it
    if name in ["SVM", "K-Nearest Neighbors", "Neural Network"]:
        clf = make_pipeline(StandardScaler(), clf)

    # Fit the model to the training data
    clf.fit(X_train, y_train)

    # Make predictions on the test data
    y_pred = clf.predict(X_test)

    # Evaluate the model's performance
    accuracy = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred)
    conf_matrix = confusion_matrix(y_test, y_pred)

    results[name] = {
        "accuracy": accuracy,
        "classification_report": report,
        "confusion_matrix": conf_matrix,
    }

# Print the results
for name, result in results.items():
    print(f"Classifier: {name}")
    print(f"Accuracy: {result['accuracy']:.4f}")
    print(f"Classification Report:\n{result['classification_report']}")
    print(f"Confusion Matrix:\n{result['confusion_matrix']}\n")


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Classifier: Random Forest
Accuracy: 0.9608
Classification Report:
              precision    recall  f1-score   support

           0       0.97      0.99      0.98        96
           1       0.75      0.50      0.60         6

    accuracy                           0.96       102
   macro avg       0.86      0.74      0.79       102
weighted avg       0.96      0.96      0.96       102

Confusion Matrix:
[[95  1]
 [ 3  3]]

Classifier: AdaBoost
Accuracy: 0.9902
Classification Report:
              precision    recall  f1-score   support

           0       0.99      1.00      0.99        96
           1       1.00      0.83      0.91         6

    accuracy                           0.99       102
   macro avg       0.99      0.92      0.95       102
weighted avg       0.99      0.99      0.99       102

Confusion Matrix:
[[96  0]
 [ 1  5]]

Classifier: SVM
Accuracy: 0.9412
Classification Report:
              precision    recall  f1-score   support

           0       0.94      1.0

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Classifier: AdaBoost
Accuracy: 0.9902

In [5]:
X_train.head()

Unnamed: 0,Rank,D,E,ND,Total,year,Name,Nation,round_final,round_qual
84,25,6.2,8.266,0.0,14.466,2018,141,39,False,True
257,1,6.3,8.733,0.0,15.033,2019,166,76,False,True
417,161,4.9,7.233,0.0,12.133,2019,64,2,False,True
461,5,6.0,7.966,0.0,13.966,2019,189,4,False,True
392,136,4.7,8.066,0.0,12.766,2019,149,30,False,True


In [6]:
olymp.head()


Unnamed: 0,Rank,D Score,E Score,Pen.,Total,year,Name,nation,round_AAfinal,round_TeamFinal,round_TeamQual,round_final,round_qual
0,23.0,3.9,6.9,0.0,10.8,2022,236,61,False,False,False,False,True
1,23.0,3.9,6.9,0.0,10.8,2022,236,61,False,False,False,False,True
2,54.0,4.4,8.066,0.0,12.466,2023,236,61,False,False,False,False,True
3,12.0,5.8,8.4,0.0,14.2,2022,531,26,True,False,False,False,False
4,26.0,5.8,8.5,0.0,14.3,2022,531,26,False,False,False,False,True


In [7]:
olymp.rename(columns={'D Score': 'D', 'E Score': 'E',
                     'Pen.': 'ND', 'nation': 'Nation'}, inplace=True)

In [8]:
olymp=olymp.drop(columns=['round_TeamFinal', 'round_AAfinal', 'round_TeamQual'])

In [9]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import RandomForestClassifier

base_estimator = RandomForestClassifier(n_estimators=100, max_depth=3)
ada_clf = AdaBoostClassifier(base_estimator=base_estimator, n_estimators=1000)


In [10]:
y = df['medal']
X = df.drop(columns=['medal'])

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=19)

# Create a custom base estimator (e.g., RandomForestClassifier)
base_estimator = RandomForestClassifier(n_estimators=10, max_depth=30)

# Initialize the AdaBoost classifier with the custom base estimator
clf = AdaBoostClassifier(base_estimator=base_estimator, n_estimators=1000)

# Fit the model to the training data
clf.fit(X_train, y_train)

# Make predictions on the test data
y_pred = clf.predict(X_test)

# Evaluate the model's performance
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

# Print the results
print("AdaBoost Classifier with a Custom Base Estimator (RandomForest)")
print(f"Accuracy: {accuracy:.4f}")
print(f"Classification Report:\n{report}")
print(f"Confusion Matrix:\n{conf_matrix}")

AdaBoost Classifier with a Custom Base Estimator (RandomForest)
Accuracy: 0.9608
Classification Report:
              precision    recall  f1-score   support

           0       0.96      1.00      0.98        96
           1       1.00      0.33      0.50         6

    accuracy                           0.96       102
   macro avg       0.98      0.67      0.74       102
weighted avg       0.96      0.96      0.95       102

Confusion Matrix:
[[96  0]
 [ 4  2]]




In [11]:
y_pred = clf.predict(olymp)


In [12]:
ypreds  = pd.Series(y_pred)
olymp['ypred']=ypreds


In [13]:
olymp['ypred'].value_counts()

ypred
0    2374
Name: count, dtype: int64

In [16]:
filtered_df = olymp[olymp['ypred'] == 1]

In [17]:
filtered_df['Name'].unique()

array([145,  94,  74])

In [23]:
name_encoded_values = [145,  94,  74]

# Filter the DataFrame to get the corresponding "Name" values
result = pbnames.loc[pbnames['Name_encoded'].isin(name_encoded_values), 'Name']


In [24]:
result.unique()

array(['ferhat arican', 'curran phillips', 'carlos edriel yulo'],
      dtype=object)

In [20]:
pbolymp["Name"]=pbolymp["Name"].str.lower()

In [25]:
names_to_filter = ['ferhat arican', 'curran phillips', 'carlos edriel yulo']

# Filter the DataFrame to get rows with the specified "Name" values
result = pbolymp[pbolymp['Name'].isin(names_to_filter)]


In [26]:
result['Country'].value_counts()

Country
TUR    41
PHI    40
USA    21
Name: count, dtype: int64

In [28]:
result[result['Country']=='USA']

Unnamed: 0,Name,Country,Date,Gender,Competition,Round,Location,Apparatus,Rank,D_Score,E_Score,Penalty,Score,Total_Score
6153,curran phillips,USA,2023-03-12,m,FIG Apparatus World Cup 2023,final,"Baku, Azerbaijan",HB,5.0,5.5,7.633,0.0,13.133,13.133
6154,curran phillips,USA,2023-03-12,m,FIG Apparatus World Cup 2023,qual,"Baku, Azerbaijan",HB,2.0,5.5,8.633,0.0,14.133,14.133
6155,curran phillips,USA,2023-03-12,m,FIG Apparatus World Cup 2023,final,"Baku, Azerbaijan",PB,5.0,6.8,7.7,0.0,14.5,14.5
6156,curran phillips,USA,2023-03-12,m,FIG Apparatus World Cup 2023,qual,"Baku, Azerbaijan",PB,1.0,6.8,8.7,0.0,15.5,15.5
6352,curran phillips,USA,2022-08-20,m,2022 U.S. Championships,qual,"Tampa, FL",HB,0.0,5.5,6.45,0.0,11.95,11.95
6353,curran phillips,USA,2022-08-20,m,2022 U.S. Championships,final,"Tampa, FL",HB,0.0,5.7,7.55,0.0,13.25,13.25
6354,curran phillips,USA,2022-08-20,m,2022 U.S. Championships,qual,"Tampa, FL",PB,0.0,6.9,8.45,0.0,15.35,15.35
6355,curran phillips,USA,2022-08-20,m,2022 U.S. Championships,final,"Tampa, FL",PB,0.0,6.9,8.4,0.0,15.3,15.3
6356,curran phillips,USA,2022-08-20,m,2022 U.S. Championships,qual,"Tampa, FL",VT,0.0,5.2,9.2,0.0,14.4,14.4
6357,curran phillips,USA,2022-08-20,m,2022 U.S. Championships,final,"Tampa, FL",VT,0.0,5.2,8.85,0.0,14.05,14.05
