In [None]:

#Data training, Splitting and Evaluation
import pandas as pd
from sklearn.model_selection import train_test_split

# Load data
df = pd.read_csv("heart.csv")
# Prepare data
X = df.drop('target', axis=1) 
y = df['target'] 

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [3]:
from sklearn.linear_model import LinearRegression
#initialize the model
model = LinearRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

In [4]:
from sklearn.metrics import mean_squared_error, r2_score
# Calculate evaluation metrics for the new model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'Mean Squared Error: {mse}')
print(f'R² Score: {r2}')

Mean Squared Error: 0.11627071992880013
R² Score: 0.5337894947682487


In [5]:
#Trying using another model(Random Forest) including normalization and cross validation
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
#Load data
df = pd.read_csv("heart.csv")

#Prepare data
X = df.drop('target', axis=1) 
y = df['target'] 

# Normalize the features using Min-Max Scaling
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

In [6]:
from sklearn.model_selection import train_test_split
# Split the data using the normalized features
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

In [7]:
from sklearn.ensemble import RandomForestRegressor

model_rf = RandomForestRegressor()
model_rf.fit(X_train, y_train)
y_pred_rf = model_rf.predict(X_test)

In [8]:
from sklearn.metrics import make_scorer, mean_squared_error, r2_score
# Calculate evaluation metrics for the new model
mse_rf = mean_squared_error(y_test, y_pred_rf)
r2_rf = r2_score(y_test, y_pred_rf)

print(f'Mean Squared Error (Random Forest): {mse_rf}')
print(f'R² Score (Random Forest): {r2_rf}')

Mean Squared Error (Random Forest): 0.11866229508196723
R² Score (Random Forest): 0.5242


In [9]:
from sklearn.model_selection import KFold, cross_val_score
# Perform cross-validation
kf = KFold(n_splits=5, shuffle=True, random_state=42)
mse_scorer = make_scorer(mean_squared_error)
r2_scorer = make_scorer(r2_score)

mse_scores = cross_val_score(model_rf, X, y, cv=kf, scoring=mse_scorer)
r2_scores = cross_val_score(model_rf, X, y, cv=kf, scoring=r2_scorer)

print(f'Mean MSE across 5 folds: {mse_scores.mean()}')
print(f'Mean R² across 5 folds: {r2_scores.mean()}')

Mean MSE across 5 folds: 0.13423908196721313
Mean R² across 5 folds: 0.44911176107600853
