In [2]:
import warnings as w
w.filterwarnings('ignore')
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.linear_model import LinearRegression, Lasso, Ridge
from sklearn.model_selection import train_test_split

In [4]:
# Step 1: Import the data from the "patient_health_data.csv" file and store it in a variable 'df'
df = pd.read_csv("patient_health_data.csv")

# Step 2: Display the number of rows and columns in the dataset
print("Number of rows and columns:", df.shape)

# Step 3: Display the first few rows of the dataset to get an overview
print("First few rows of the dataset:")
df.head()

Number of rows and columns: (250, 12)
First few rows of the dataset:


Unnamed: 0,age,bmi,blood_pressure,cholesterol,glucose,insulin,heart_rate,activity_level,diet_quality,smoking_status,alcohol_intake,health_risk_score
0,58,24.865215,122.347094,165.730375,149.289441,22.306844,75.866391,1.180237,7.675409,No,0.824123,150.547752
1,71,19.103168,136.852028,260.610781,158.584646,13.869817,69.481114,7.634622,8.933057,No,0.85291,160.32035
2,48,22.316562,137.592457,177.342582,178.760166,22.849816,69.386962,7.917398,3.501119,Yes,4.740542,187.487398
3,34,22.196893,153.164775,234.594764,136.351714,15.140336,95.348387,3.19291,2.745585,No,2.226231,148.773138
4,62,29.837173,92.768973,276.106498,158.753516,17.228576,77.680975,7.044026,8.918348,No,3.944011,170.609655


In [5]:
df.smoking_status = df['smoking_status'].apply(lambda x : 1 if x == 'Yes' else 0)

In [6]:
# Step 1: Select the features and target variable for modeling
X = df.drop(['health_risk_score'], axis=1)
y = df['health_risk_score']

# Step 2: Split the data into training and test sets with a test size of 25%
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

In [7]:
# Step 3: Initialize and train a Linear Regression model, and evaluate its performance using R-squared
linear_model = LinearRegression()
linear_model.fit(X_train, y_train)
linear_r2 = linear_model.score(X_test, y_test)
print("Linear Regression R-squared:", linear_r2)

Linear Regression R-squared: 0.764362090675749


In [8]:
# Step 4: Initialize and train a Lasso Regression model with various alpha values provided in a list, and evaluate its performance using R-squared
lasso_alphas = [0.01, 0.1, 1.0, 10.0]
for alpha in lasso_alphas:
    lasso_model = Lasso(alpha=alpha)
    lasso_model.fit(X_train, y_train)
    lasso_r2 = lasso_model.score(X_test, y_test)
    print(f"Lasso Regression R-squared (alpha={alpha}):", lasso_r2)

Lasso Regression R-squared (alpha=0.01): 0.7645437646395713
Lasso Regression R-squared (alpha=0.1): 0.7660509914802164
Lasso Regression R-squared (alpha=1.0): 0.781976368357514
Lasso Regression R-squared (alpha=10.0): 0.7873364302158369


In [9]:
# Step 5: Initialize and train a Ridge Regression model with various alpha values provided in a list, and evaluate its performance using R-squared
ridge_alphas = [0.01, 0.1, 1.0, 10.0]
for alpha in ridge_alphas:
    ridge_model = Ridge(alpha=alpha)
    ridge_model.fit(X_train, y_train)
    ridge_r2 = ridge_model.score(X_test, y_test)
    print(f"Ridge Regression R-squared (alpha={alpha}):", ridge_r2)

Ridge Regression R-squared (alpha=0.01): 0.764363158939054
Ridge Regression R-squared (alpha=0.1): 0.7643727707489341
Ridge Regression R-squared (alpha=1.0): 0.7644686367656155
Ridge Regression R-squared (alpha=10.0): 0.7654030812954538
