In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.ensemble import RandomForestRegressor
import seaborn as sns
import matplotlib.pyplot as plt
from tqdm import tqdm
import xgboost as xgb

In [None]:
for i in tqdm(range(1)):
    filepath="Openpowerlifting.csv"
    keepcols=['Sex','Event','Equipment','Age','AgeClass','BodyweightKg','Best3SquatKg','Best3BenchKg','Best3DeadliftKg','TotalKg','Tested']
    df=pd.read_csv(filepath, usecols=keepcols,low_memory=False)

    print(df.head())
    print(df.columns)

In [None]:
print(df.isnull().sum())
# print(df.head())
# print(df.shape)
# print(df.columns)
# for i in tqdm(range(1)):
   

In [None]:
corecols= ['Sex','Event','Equipment','BodyweightKg','Best3SquatKg','Best3BenchKg','Best3DeadliftKg','TotalKg','Tested']
dfnew=df.dropna(subset=corecols)
print(dfnew.isnull().sum())
print(len(dfnew))

In [None]:
dfnew=dfnew.dropna(subset=['Age','AgeClass'],how='all')
print(dfnew.isnull().sum())
print(len(dfnew))

In [None]:
#Given the result we ought to use the mean of each age class to estimate the age of the lifter
print(dfnew['AgeClass'].nunique())
print(dfnew['AgeClass'].unique())


In [None]:
Means= dfnew.groupby('AgeClass')['Age'].mean()
print(Means)

In [None]:
dfnew['Age']= dfnew['Age'].fillna(dfnew['AgeClass'].map(Means))
#Mapping the expected ages onto the gaps in the age columns
print(dfnew.isnull().sum())
dfnew.dropna(subset='AgeClass')
print(dfnew.isnull().sum())

In [None]:
print(dfnew.describe().round(3))

In [None]:
print(dfnew['Equipment'].unique())
dfnew = dfnew[dfnew['Equipment'] == 'Raw'].copy()
print(dfnew['Equipment'].unique())


In [None]:
dfnew=dfnew.drop(columns=['Equipment','Event'])
dfnew=dfnew.drop(columns=['AgeClass'])
dfnew=dfnew.drop(columns=['Tested'])
#dfnew.describe().round(3)

In [None]:
dfnew['Sex']=dfnew['Sex'].map({'M':1,'F':2})
dfnew.head()

In [None]:
dfnew['Sex'].unique()

In [None]:
dfnew= dfnew.dropna()

In [None]:
dfnew.isnull().sum()

In [None]:
print(len(dfnew))
dfnew.head()
dfnew.describe()

In [None]:
dfsample = dfnew.sample(10000, random_state=13)
plt.figure()
x='BodyweightKg'
y=(dfsample['Best3SquatKg']/dfsample['Best3DeadliftKg'])
sns.scatterplot(data=dfsample, x=x, y=y)
plt.xlabel(x)
plt.ylabel('Total in KG')
plt.savefig(f'{x} vs Squat to Deadlift Ratio')
plt.show()


In [None]:
print((dfnew['Best3SquatKg']/dfsample['Best3DeadliftKg']).mean())
print((dfnew['Best3SquatKg']/dfsample['Best3BenchKg']).mean())
print((dfnew['Best3SquatKg'] / dfnew['Best3DeadliftKg']).median())

In [None]:
x='BodyweightKg'
y=(dfsample['Best3SquatKg']/dfsample['Best3DeadliftKg'])
g=sns.jointplot(data=dfsample, x=x, y=y,kind='hex')
plt.xlabel(x)
plt.ylabel('Deadlift vs Squat')
g.set_axis_labels('Bodyweight (kg)', 'Squat/Deadlift Ratio')
plt.show()

In [None]:
dfsamp=dfnew[(dfnew['Age']<=80) & (dfnew['Age']>=50)].copy()
corr=dfsamp.corr()
sns.heatmap(corr,annot=True,fmt='.2f')

In [None]:
dfsampt=dfnew.sample(10000,random_state=14)
plt.figure
sns.pairplot(data=dfsamp)
plt.show

In [None]:
Hyperpar=['Age', 'Sex', 'BodyweightKg', 'Best3SquatKg', 'Best3BenchKg']
depvar=['Best3DeadliftKg']
Hyperpar2=['Age', 'Sex', 'BodyweightKg','Best3BenchKg','Best3DeadliftKg']
depvar2=['Best3SquatKg']
Hyperpar3=['Age', 'Sex', 'BodyweightKg', 'Best3SquatKg','Best3DeadliftKg']
depvar3=[ 'Best3BenchKg']

x,x2,x3=dfnew[Hyperpar],dfnew[Hyperpar2],dfnew[Hyperpar3]
y,y2,y3=dfnew[depvar],dfnew[depvar2],dfnew[depvar3]

trees=1500
rate=0.01

x_train,x_test,y_train,y_test=train_test_split(x,y, test_size=0.2, random_state=15)
x2_train,x2_test,y2_train,y2_test=train_test_split(x2,y2, test_size=0.2, random_state=16)
x3_train,x3_test,y3_train,y3_test=train_test_split(x3,y3, test_size=0.2, random_state=17)

model=xgb.XGBRegressor(n_estimators=trees,learning_rate=rate,max_depth=7,sub_sample=0.9,colsample_bytree=0.8)
model.fit(x_train,y_train, eval_set=[(x_test, y_test)],verbose=100)
         
model2=xgb.XGBRegressor(n_estimators=trees,learning_rate=rate,max_depth=7,sub_sample=0.9,colsample_bytree=0.8)
model2.fit(x2_train,y2_train, eval_set=[(x2_test, y2_test)],verbose=100)

model3=xgb.XGBRegressor(n_estimators=trees,learning_rate=rate,max_depth=7,sub_sample=0.9,colsample_bytree=0.8)
model3.fit(x3_train,y3_train, eval_set=[(x3_test, y3_test)],verbose=100)


In [None]:
predictions = model.predict(x_test)
predictions2 = model2.predict(x2_test)
predictions3 = model3.predict(x3_test)

print(f'For the Deadlift')
print(f'RMSE:{(mean_squared_error(y_test,predictions)**0.5):.1f} kg')
print(f"R2 Score (Accuracy): {r2_score(y_test, predictions):.3f}")

print(f'For the Squat')
print(f'RMSE:{(mean_squared_error(y2_test,predictions2)**0.5):.1f} kg')
print(f"R2 Score (Accuracy): {r2_score(y2_test, predictions2):.3f}")

print(f'For the Bench')
print(f'RMSE:{(mean_squared_error(y3_test,predictions3)**0.5):.1f} kg')
print(f"R2 Score (Accuracy): {r2_score(y3_test, predictions3):.3f}")

In [None]:
xgb.plot_importance(model)
plt.title('Feature importance for Deadlifts')
plt.show()
plt.figure
xgb.plot_importance(model2)
plt.title('Feature importance for Squats')
plt.show()
plt.figure()
xgb.plot_importance(model3)
plt.title('Feature importance for Bench')
plt.show()


In [None]:
plt.figure
plt.scatter(y_test, predictions,alpha=0.2)
plt.plot([0,400], [0,400], 'r--', lw=2) # 45-degree line
plt.xlabel('Actual Deadlift (kg)')
plt.ylabel('Predicted Deadlift (kg)')
plt.title('Prediction Accuracy: Actual vs. Predicted')
plt.show()

In [None]:
i=True
while i==True:
    Category=int(input(f'''which of the three lifts would you like to predict?
    
    1: Deadlift
    2: Squat
    3: Bench Press 
    '''))
    if Category !=1 and Category !=2 and Category !=3 and Category !=4:
        break
    age = float(input("Enter Age: "))
    sex = int(input("Enter Sex (1 for Male, 2 for Female): ")) #should add validation here
    bw = float(input("Enter Bodyweight (kg): "))
    if Category==1:
        sq = float(input("Enter Squat 1rm (Kg): "))
        bp = float(input("Enter Bench 1rm (kg): "))
        user_data= pd.DataFrame([[age, sex, bw, sq, bp]], columns=Hyperpar)
        prediction= model.predict(user_data)[0]
        print(f"\n Predicted Deadlift: {prediction:.1f} kg")
        i=False
    elif Category==2:
        dl = float(input("Enter Deadlift 1rm (Kg): "))
        bp = float(input("Enter Bench 1rm (kg): "))
        user_data= pd.DataFrame([[age, sex, bw, bp, dl]], columns=Hyperpar2)
        prediction= model2.predict(user_data)[0]
        print(f"\n Predicted Squat: {prediction:.1f} kg")
        i=False
    elif Category==3:
        dl = float(input("Enter Deadlift 1rm (Kg): "))
        sq = float(input("Enter Squat 1rm (kg): "))
        user_data= pd.DataFrame([[age, sex, bw, sq, dl]], columns=Hyperpar3)
        prediction= model3.predict(user_data)[0]
        print(f"\n Predicted Bench Press: {prediction:.1f} kg")
        i=False
    else:
        print('invalid input')
        i=False
        

