In [2]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.datasets import load_diabetes  # Built-in dataset

# Load simple diabetes data (predict disease progression from features)
data = load_diabetes()
X = data.data[:, np.newaxis, 2]  # Use BMI as feature (column 2)
y = data.target  # Progression measure as target
print("Data loaded! X shape:", X.shape, "y shape:", y.shape)


Data loaded! X shape: (442, 1) y shape: (442,)


In [3]:
# Split into train/test (80/20)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create & train model
model = LinearRegression()
model.fit(X_train, y_train)

print("Model trained! Coefficient (slope):", model.coef_[0])
print("Intercept:", model.intercept_)

Model trained! Coefficient (slope): 998.5776891375593
Intercept: 152.00335421448167


In [4]:
# Predict on test set
pred = model.predict(X_test)
from sklearn.metrics import mean_squared_error
error = mean_squared_error(y_test, pred)
print("Error (MSE):", error)

# Test a new prediction: BMI=0.5 â†’ Progression?
new_pred = model.predict([[0.5]])[0]
print("Predicted progression for BMI=0.5:", new_pred)

Error (MSE): 4061.8259284949268
Predicted progression for BMI=0.5: 651.2921987832614
