# Package

In [45]:
from sklearn.datasets import load_diabetes
import pandas as pd
from sklearn.linear_model import LinearRegression
import numpy as np
from sklearn.utils import resample
from sklearn.metrics import mean_squared_error
from scipy.stats import t

# Load Data

In [2]:
diabetes = load_diabetes()
df = pd.DataFrame(data = diabetes.data, columns= diabetes.feature_names)
df["target"] = diabetes.target
df_bmi_target = df.copy()
df_bmi_target = df_bmi_target[["bmi", "target"]]

# Fit a Simple Linear Regression Model

In [19]:
# Put variables into 02 dimensions
X = df_bmi_target[["bmi"]].to_numpy()
Y = df_bmi_target[["target"]].to_numpy()

In [20]:
X.ndim

2

In [21]:
# Adjust regression
reg = LinearRegression()
reg.fit (X, Y)

In [22]:
print("When the body Mass index increase 1 kg/m², the risk to attend diabetes increase", reg.coef_, "units.")
print(reg.intercept_)

When the body Mass index increase 1 kg/m², the risk to attend diabetes increase [[949.43526038]] units.
[152.13348416]


# Calculate the Point Prediction

In [31]:
X0, Y0 = resample(X, Y, replace=True, n_samples=1, random_state=42)

In [32]:
X0.ndim

2

In [33]:
Y0_hat = reg.predict(X0)

In [34]:
Y0_hat

array([[178.98734167]])

# Calculate the Standard Error of Prediction

In [39]:
# Calculate the ingredients
MSE = mean_squared_error(Y0, Y0_hat)
n = len(X)
Sum_Xi_minus_Xmean_sqrd = np.sum((X-np.mean(X))**2)
X0_minus_Xmean_sqrd = (X0 - np.mean(X))**2

In [42]:
# Calcul
SE = np.sqrt(MSE * ((1/n) + (X0_minus_Xmean_sqrd / Sum_Xi_minus_Xmean_sqrd)))
SE

array([[6.80742528]])

# Choose the Confidence Level

To be more practical, our confidence level is : 95%.

# Find the t-Statistic

In [43]:
confidence_interval = 0.95
alpha = 0.05

In [50]:
t_score_95 = t.ppf(1-(alpha/2), df = n-2)
t_score_95 = round(t_score_95, 2)
print(t_score_95)

1.97


# Calculate the Prediction Interval

In [54]:
Lower_Bound = Y0_hat - t_score_95 * SE
Upper_Bound = Y0_hat + t_score_95 * SE
print("lower bound :", Lower_Bound)
print("upper bound :", Upper_Bound)

lower bound : [[165.57671386]]
upper bound : [[192.39796947]]


# Plot the Prediction Interval