In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn import metrics

diabetes = load_diabetes()

X = diabetes.data
y = diabetes.target

df = pd.DataFrame(X, columns=diabetes.feature_names)
df['Target'] = y

print("First 5 rows of the dataset:")
print(df.head())
print("\nSummary statistics:")
print(df.describe())

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=11)

print("\nTrain set shape:", X_train.shape)
print("Test  set shape:", X_test.shape)

model = LinearRegression()
model.fit(X_train, y_train)

print("\nIntercept:", model.intercept_)
print("\nCoefficients:")
for name, coef in zip(diabetes.feature_names, model.coef_):
    print(f"{name:>8}: {coef:.4f}")

y_pred = model.predict(X_test)

print("\nFirst 5 predicted values:", y_pred[:5])
print("First 5 actual values   :", y_test[:5])

plt.figure(figsize=(8, 8))
sns.scatterplot(x=y_test, y=y_pred)
plt.plot([min(y_test), max(y_test)], [min(y_test), max(y_test)], 'k--')  # perfect prediction line
plt.xlabel("Expected Target")
plt.ylabel("Predicted Target")
plt.title("Expected vs Predicted (Test Set)")
plt.show()

r2 = metrics.r2_score(y_test, y_pred)
mse = metrics.mean_squared_error(y_test, y_pred)

print("\nRÂ² score:", r2)
print("Mean Squared Error:", mse)