First Regression Model & Back to the Code

In [19]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn import datasets

In [20]:
data = datasets.load_diabetes()
df = pd.DataFrame(data.data, columns=data.feature_names)
df['target'] = data.target

In [21]:
X = df.drop('target', axis=1)
y = df['target']

In [22]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [23]:
model = LinearRegression()
model.fit(X_train, y_train)

In [24]:
# Create an array with all features set to 0 (their mean value in the scaled dataset)
new_data = np.zeros((1, 10))  # 10 features in diabetes dataset
# Find the index of 'bmi' in your features
bmi_index = df.columns.get_loc('bmi')
new_data[0, bmi_index] = 0.05  # Set just the BMI value

predicted_value = model.predict(new_data)
print(f"Predicted target: {predicted_value[0]}")

Predicted target: 178.4670424656744




In [25]:
# Check the model's coefficients (y = mx + b)
print(f"Intercept (b0): {model.intercept_}")
print(f"Coefficient for BMI (b1): {model.coef_[0]}")

Intercept (b0): 151.34560453985995
Coefficient for BMI (b1): 37.90402135007501


In [26]:
from sklearn.metrics import r2_score
y_pred = model.predict(X_test)
print("R^2 Score:", r2_score(y_test, y_pred))

R^2 Score: 0.4526027629719196


In [27]:
train_score = model.score(X_train, y_train)
test_score = model.score(X_test, y_test)
print(f"Training R²: {train_score:.3f}")
print(f"Test R²: {test_score:.3f}")

Training R²: 0.528
Test R²: 0.453


In [28]:
# Create a DataFrame to show feature names and their coefficients
coef_df = pd.DataFrame({
    'Feature': X.columns,
    'Coefficient': model.coef_
})
print(coef_df)

  Feature  Coefficient
0     age    37.904021
1     sex  -241.964362
2     bmi   542.428759
3      bp   347.703844
4      s1  -931.488846
5      s2   518.062277
6      s3   163.419983
7      s4   275.317902
8      s5   736.198859
9      s6    48.670657
