In [4]:
from sklearn import tree
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.model_selection import KFold, cross_val_score
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings("ignore")


height_data = pd.read_csv('athlete_events.csv')
height_data_new = height_data[["Sex", "Age","Height","Weight", "Sport"]].dropna()


# Encode categorical
label_encoder_sex = LabelEncoder()
height_data_new["Sex"] = label_encoder_sex.fit_transform(height_data_new["Sex"])


# Encode the target variable 'Sport'
label_encoder_sport = LabelEncoder()
height_data_new["Sport"] = label_encoder_sport.fit_transform(height_data_new["Sport"])

X_data = height_data_new[["Sex", "Age","Height","Weight"]]
Y_data = height_data_new["Sport"]

# Split data into training and testing sets
X_train, X_test, Y_train, Y_test = train_test_split(X_data, Y_data, test_size=0.2, random_state=30)

#Set CLF 
clf = tree.DecisionTreeClassifier()

#One out Model Accuracy
clf_one = clf.fit(X_train, Y_train)
Y_pred = clf_one.predict(X_test)
accuracy = accuracy_score(Y_test, Y_pred)
print(f"One Out Model Accuracy: {accuracy:.3f}%")

#Bootstrap Method
bagging_clf = BaggingClassifier(DecisionTreeClassifier(), n_estimators=10, random_state=42)
bagging_clf.fit(X_train, Y_train)
score = bagging_clf.score(X_test, Y_test)
print(f"Bootstrap Test accuracy: {score:.3f}%")

##K_fold Cross Validation
kf = KFold(n_splits=7, shuffle=True)  # 7 folds
scores = cross_val_score(clf, X_data, Y_data, cv=kf)
print(f"Cross-validation scores: {scores}")
print(f"CV Mean accuracy: {scores.mean():.3f}")

#Input for Sex 
while True:
    try:
        sex = int(input("Enter sex (0 for male and 1 for female): "))
        if 0 == sex or 1 == sex:
            break
        else:
            print("Please enter a number within the range.")
    except ValueError:
        print("That's not a valid number!")

#input for Age
while True:
    try:
        age = int(input("Enter age (10-50): "))
        if 0 <= age <= 50:
            break
        else:
            print("Please enter a number within the range.")
    except ValueError:
        print("That's not a valid number!")

#Input for Height 
while True:
    try:
        height = int(input("Enter Height (100 - 250cm): "))
        if 100 <= height <= 250:
            break
        else:
            print("Please enter a number within the range.")
    except ValueError:
        print("That's not a valid number!")
        
    
#Input for Weigth
while True:
    try:
        weight = int(input("Enter Weight (30-150 (kg)): "))
        if 30 <= weight <= 150:
            break
        else:
            print("Please enter a number within the range.")
    except ValueError:
        print("That's not a valid number!")
        
# Create an input array with the user's values
user_input = np.array([[sex, age, height, weight]])

# Make predictions with one out model
dt_prediction = clf_one.predict(user_input)
sport_prediction_dt = label_encoder_sport.inverse_transform(dt_prediction)



print(f"Prediction using One out Model: {sport_prediction_dt[0]}")




One Out Model Accuracy: 0.330%
Bootstrap Test accuracy: 0.327%
Cross-validation scores: [0.33269956 0.33114899 0.33294853 0.32934945 0.32839875 0.3317262
 0.33630993]
CV Mean accuracy: 0.332
Enter sex (0 for male and 1 for female): 0
Enter age (10-50): 21
Enter Height (100 - 250cm): 180
Enter Weight (30-150 (kg)): 80
Prediction using One out Model: Judo
