In [138]:
from sklearn.datasets import load_diabetes
import numpy as np
import pandas as pd
import plotly.express as px

In [139]:
data = load_diabetes()
print(data.DESCR)

.. _diabetes_dataset:

Diabetes dataset
----------------

Ten baseline variables, age, sex, body mass index, average blood
pressure, and six blood serum measurements were obtained for each of n =
442 diabetes patients, as well as the response of interest, a
quantitative measure of disease progression one year after baseline.

**Data Set Characteristics:**

  :Number of Instances: 442

  :Number of Attributes: First 10 columns are numeric predictive values

  :Target: Column 11 is a quantitative measure of disease progression one year after baseline

  :Attribute Information:
      - age     age in years
      - sex
      - bmi     body mass index
      - bp      average blood pressure
      - s1      tc, total serum cholesterol
      - s2      ldl, low-density lipoproteins
      - s3      hdl, high-density lipoproteins
      - s4      tch, total cholesterol / HDL
      - s5      ltg, possibly log of serum triglycerides level
      - s6      glu, blood sugar level

Note: Each of these 1

In [140]:
x = data["data"]
y = data["target"]

In [141]:
from sklearn.model_selection import train_test_split
xTrain, xTest, yTrain, yTest = train_test_split(
    x, y, test_size=0.2, random_state=45)

In [142]:
from sklearn.linear_model import LinearRegression
L = LinearRegression()
L.fit(xTrain, yTrain)

In [143]:
yPred = L.predict(xTest)

In [144]:
from sklearn.metrics import mean_squared_error, r2_score
print(f"RMSE : {np.sqrt(mean_squared_error(yTest, yPred))}")
print(f"R2 Score : {r2_score(yTest, yPred)}")

RMSE : 48.72713760953253
R2 Score : 0.5188113124539249


In [145]:
from sklearn.linear_model import Ridge
R = Ridge(alpha=0.0001)

In [146]:
R.fit(xTrain, yTrain)

In [147]:
yPred1 = R.predict(xTest)

In [148]:
print(f"RMSE : {np.sqrt(mean_squared_error(yTest, yPred1))}")
print(f"R2 Score : {r2_score(yTest, yPred1)}")

RMSE : 48.718937001819555
R2 Score : 0.518973263588495


In [149]:
m = 100
x1 = 5 * np.random.rand(m, 1) - 2
x2 = 0.7*x1**2-2*x1+3+np.random.randn(m, 1)
px.scatter(x= x1.ravel(), y=x2.ravel())

In [150]:
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import Pipeline


def getPredRidge(x1, x2, alpha):
    model = Pipeline([("poly_feats", PolynomialFeatures(
        degree=16)), ("Ridge", Ridge(alpha=alpha))])
    model.fit(x1, x2)
    return model.predict(x1)

In [151]:
import plotly.graph_objects as go
alphas = [0, 20, 200]
cs = ["r", "g", "b"]
fig = px.scatter(x=x1.ravel(), y=x2.ravel())
for i in alphas:
    fig.add_trace(go.Scatter(x=np.sort(x1.ravel()), y=getPredRidge(x1, x2, i).ravel()[
                  x1.argsort(axis=0).ravel()], mode="lines", name=f"Lambda {i}"))
fig.show()

# End