In [7]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression

In [140]:
data = pd.read_csv("cleaned_data.csv")
data["WRIST_OVER_THIGH"] = data["WRIST"]/data["THIGH"]
y = data["BODYFAT"]
X = data.loc[:,"AGE":]

### Model based on Stepwise selection method 

In [124]:
def stepwise_model(columns_stepwise,y):
    X_stepwise = X.loc[:,columns_stepwise]
    model = LinearRegression()
    stepwise_reg = model.fit(X_stepwise,y)   
    return stepwise_reg


In [141]:
columns_stepwise = ["ABDOMEN","WRIST","FOREARM","WEIGHT"]
[stepwise_model(columns_stepwise,y).coef_,stepwise_model(columns_stepwise,y).intercept_]


[array([ 0.91809023, -1.40679831,  0.43023399, -0.12513728]),
 -30.311872083456024]

### Model based on Lasso method

In [127]:
def lasso_model(columns_lasso ,y):
    X_lasso = X.loc[:,columns_lasso]
    model = LinearRegression()
    lasso_reg = model.fit(X_lasso,y)
    return lasso_reg


In [142]:
columns_lasso = ["WEIGHT","HEIGHT","ABDOMEN","THIGH"]
[lasso_model(columns_lasso,y).coef_,stepwise_model(columns_lasso,y).intercept_]


[array([-0.15392043, -0.07889177,  0.89130143,  0.1717058 ]),
 -40.66300643430044]

### Model Performance Measure

In [129]:
from sklearn.model_selection import cross_validate

In [145]:
lasso_reg = lasso_model(columns_lasso ,y)
scores_lasso = cross_validate(lasso_reg, X.loc[:,columns_lasso], y, cv=6,scoring=('r2', 'neg_mean_squared_error'),return_train_score=True)
lasso_test_mse = -scores_lasso['test_neg_mean_squared_error']
#print(scores_lasso['test_neg_mean_squared_error'])
print(f'The mean square error on test set based on lasso method is {lasso_test_mse.mean()}.')

The mean square error on test set is 18.227794243147486.


In [148]:
step_reg = stepwise_model(columns_stepwise ,y)
scores_stepwise = cross_validate(step_reg, X.loc[:,columns_stepwise], y, cv=6,scoring=('r2', 'neg_mean_squared_error'),return_train_score=True)
stepwise_test_mse = -scores_stepwise['test_neg_mean_squared_error']
#print(scores_stepwise['test_neg_mean_squared_error'])
print(f'The mean square error on test set based on stepwise regression is {stepwise_test_mse.mean()}.')

The mean square error on test set based on stepwise regression is 17.20866144114553.
