In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.datasets import load_diabetes

# Load the diabetes dataset
diabetes = load_diabetes()
X = diabetes.data
y = diabetes.target

# Convert to DataFrame for ease of manipulation
df = pd.DataFrame(X, columns=diabetes.feature_names)
df['target'] = y

# Split into training and testing sets (80/20 split)
X_train, X_test, y_train, y_test = train_test_split(df[['bmi', 's5']], df['target'], test_size=0.2, random_state=42)

# Train model with bmi and s5
model_1 = LinearRegression()
model_1.fit(X_train, y_train)

# Make predictions and evaluate
y_pred_1 = model_1.predict(X_test)
rmse_1 = np.sqrt(mean_squared_error(y_test, y_pred_1))
r2_1 = r2_score(y_test, y_pred_1)

# Now add 'age' to the model
X_train_extended = X_train.assign(age=df.loc[X_train.index, 'age'])
X_test_extended = X_test.assign(age=df.loc[X_test.index, 'age'])

model_2 = LinearRegression()
model_2.fit(X_train_extended, y_train)

# Make predictions and evaluate
y_pred_2 = model_2.predict(X_test_extended)
rmse_2 = np.sqrt(mean_squared_error(y_test, y_pred_2))
r2_2 = r2_score(y_test, y_pred_2)

print(f"Model 1 (bmi and s5) RMSE: {rmse_1}, R2: {r2_1}")
print(f"Model 2 (bmi, s5, and age) RMSE: {rmse_2}, R2: {r2_2}")

""" 
Explanation:
- If the R2 score improves and RMSE decreases when adding 'age', it means 'age' is helping in improving the model's performance.
- If the improvement is minimal, it might suggest that 'age' is not a significant predictor for this dataset. 
"""


Model 1 (bmi and s5) RMSE: 53.8687009455092, R2: 0.4522925957397932
Model 2 (bmi, s5, and age) RMSE: 54.038191262423574, R2: 0.4488406045123452


" \nExplanation:\n- If the R2 score improves and RMSE decreases when adding 'age', it means 'age' is helping in improving the model's performance.\n- If the improvement is minimal, it might suggest that 'age' is not a significant predictor for this dataset. \n"