In [59]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
import pandas as pd

In [60]:
#loading data
df = pd.read_csv("powerlifting.csv")

#getting rid of irrelevant info ('playerId' and 'Name')
df = df.iloc[:, 2: ]
df.head()

Unnamed: 0,Sex,Equipment,Age,BodyweightKg,BestSquatKg,BestDeadliftKg,BestBenchKg
0,M,Raw,23.0,87.3,205.0,235.0,125.0
1,M,Wraps,23.0,73.48,220.0,260.0,157.5
2,M,Raw,26.0,112.4,142.5,220.0,145.0
3,F,Raw,35.0,59.42,95.0,102.5,60.0
4,F,Raw,26.5,61.4,105.0,127.5,60.0


In [61]:
#encoding data ('Sex' and 'Equipment')
from sklearn.preprocessing import OrdinalEncoder

columns_to_encode = ['Sex', 'Equipment']
encoder = OrdinalEncoder(
    categories = [['F', 'M'],
    ['Raw', 'Wraps', 'Single-ply', 'Multi-ply']]
)


df[columns_to_encode] = encoder.fit_transform(df[columns_to_encode])

df


Unnamed: 0,Sex,Equipment,Age,BodyweightKg,BestSquatKg,BestDeadliftKg,BestBenchKg
0,1.0,0.0,23.0,87.30,205,235.00,125.00
1,1.0,1.0,23.0,73.48,220,260.00,157.50
2,1.0,0.0,26.0,112.40,142.5,220.00,145.00
3,0.0,0.0,35.0,59.42,95,102.50,60.00
4,0.0,0.0,26.5,61.40,105,127.50,60.00
...,...,...,...,...,...,...,...
18895,0.0,0.0,20.0,65.70,92.5,135.00,55.00
18896,1.0,0.0,23.5,88.85,190,230.00,125.00
18897,1.0,3.0,43.0,106.78,206.38,229.06,151.95
18898,1.0,0.0,22.5,82.00,235,272.50,135.00


In [74]:
#fixing non-float inputs / incorect format 
import numpy as np
def fixing_format(column):
    for index, row_value in df[column].items():
        try:
            df.loc[index, column] = float(row_value)
        except (TypeError, ValueError):
            df.loc[index, column] = np.nan
    return df
fixing_format('BodyweightKg')
fixing_format('BestSquatKg')
fixing_format('BestDeadliftKg')
fixing_format('BestBenchKg')

df.dropna(inplace=True)


In [75]:
#setting the last column (benech) as the target 
X, y = df.iloc[:, : -1], df.iloc[:, -1]

#splitting data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=10)

In [131]:
#scaling data

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [77]:
#training the model

reg = LinearRegression()

reg.fit(X_train_scaled, y_train)

0,1,2
,fit_intercept,True
,copy_X,True
,tol,1e-06
,n_jobs,
,positive,False


In [83]:
#scoring
reg.score(X_test_scaled,y_test)

0.8380006507948679

In [148]:
#user input
user_input = []
#adding the sex
while len(user_input) == 0:
    sex = input("Please inpute your sex Male [M] or Female [F]: ")
    if sex in ['F', 'f', '[F]', '[f]']:
        user_input.append(float(0))
    elif sex in ['M', 'm', '[M]', '[m]']:
        user_input.append(float(1))
    else:
        print(f"{sex} is not a valid input.")
#adding equipment    
while len(user_input) == 1:
    equipment = input("Please input equipment Raw [R], Wraps [W], Single-ply [S] or Multiply [M]: ")
    if equipment in ['R', 'r', '[R]', '[r]']:
        user_input.append(float(0))
    elif equipment in ['W', 'w', '[W]', '[w]']:
        user_input.append(float(1))
    elif equipment in ['S', 's', '[S]', '[s]']:
        user_input.append(float(2))
    elif equipment in ['M', 'm', '[M]', '[m]']:
        user_input.append(float(1))
    else:
        print(f"{equipment} is not a valid input.")

#adding age
while len(user_input) == 2:
    age = input("Please input age in years: ")
    try:
        user_input.append(float(age))
    except (TypeError, ValueError):
        print(f"{age} is not a valid input.")

#adding bodyweight
while len(user_input) == 3:
    BW = input("Please input bodyweight in Kg: ")
    try:
        user_input.append(float(BW))
    except (TypeError, ValueError):
        print(f"{BW} is not a valid input.")

#adding squat
while len(user_input) == 4:
    Squat = input("Please input your best squat in Kg: ")
    try:
        user_input.append(float(Squat))
    except (TypeError, ValueError):
        print(f"{Squat} is not a valid input.")

#adding deadlift
while len(user_input) == 5:
    Dead = input("Please input best deadlift in Kg: ")
    try:
        user_input.append(float(Dead))
    except (TypeError, ValueError):
        print(f"{Dead} is not a valid input.")

In [149]:
#scaling user input
user_input = (np.array(user_input)).reshape(1,-1)
user_input_scaled = scaler.transform(user_input)



In [159]:
reg.predict(user_input_scaled)

array([77.80128138])

In [167]:
#testing to find best parameters

param_grid = {
    'copy_X' : [False],
    'tol' : [1e-10,1e-8,1e-7, 1e-5],
    'n_jobs' : [1,2],
    'positive' : [True]
}

from sklearn.model_selection import GridSearchCV

grid = GridSearchCV(reg, param_grid, cv=3)

In [168]:
grid.fit(X_train_scaled, y_train)
grid.best_params_

{'copy_X': False, 'n_jobs': 1, 'positive': True, 'tol': 1e-10}

In [169]:
best_reg = grid.best_estimator_
best_reg.predict(user_input_scaled)

array([77.80128138])

In [172]:
best_reg.score(X_test_scaled, y_test)

0.8380006507948679