# How the coefficents get affacted?

In [None]:
from sklearn.datasets import load_diabetes

import pandas as pd
import numpy as np

import matplotlib.pyplot as plt

In [None]:
data = load_diabetes()
data

In [None]:
df = pd.DataFrame(data.data, columns = data.feature_names)
df['Target'] = data.target
df.head()

In [None]:
df.shape

In [None]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(data.data,data.target,test_size = 0.1, random_state = 2)

In [None]:
X = df.iloc[:,2].values 
y = df.iloc[:, -1].values

plt.scatter(X, y)  # Use the third feature (column index 2) for x-axis
plt.xlabel("Feature 2")
plt.ylabel("Target")
plt.title("Diabetes Dataset Scatter Plot")
plt.show()

In [None]:
from sklearn.linear_model import Ridge
from sklearn.metrics import r2_score

In [None]:
coef=[]
r2=[]

for i in [0,10,100,1000]:
    reg = Ridge(alpha=i)
    reg.fit(X_train,y_train)
    coef.append(reg.coef_.tolist())
    
    y_predict = reg.predict(X_test)
    r2.append(r2_score(y_test,y_predict))

In [None]:
plt.figure(figsize=(14,9))
plt.subplot(221)
plt.bar(data.feature_names,coef[0])
plt.title('Alpha =0, R2_score={}'.format(round(r2[0],2)))

plt.subplot(222)
plt.bar(data.feature_names,coef[1])
plt.title('Alpha = 10, R2_score={}'.format(round(r2[1],2)))

plt.subplot(223)
plt.bar(data.feature_names,coef[2])
plt.title('Alpha = 100, R2_score={}'.format(round(r2[2],2)))

plt.subplot(224)
plt.bar(data.feature_names,coef[3])
plt.title('Alpha = 1000, R2_score={}'.format(round(r2[3],2)))
plt.show()

# 2. Higher Coefficients are affected more

In [None]:

coefs = []

for i in [0,0.0001,0.001,0.01,0.1,1,10,100,1000,10000]:
    reg = Ridge(alpha=i)
    reg.fit(X_train,y_train)
    
    coefs.append(reg.coef_.tolist())

In [None]:
input_array = np.array(coefs)

coef_df = pd.DataFrame(input_array,columns=data.feature_names)
coef_df['alpha'] = [0,0.0001,0.001,0.01,0.1,1,10,100,1000,10000]
coef_df.set_index('alpha')

In [None]:
alphas = [0,0.0001,0.0005,0.001,0.005,0.1,0.5,1,5,10]

coefs = []

for i in alphas:
    reg = Ridge(alpha=i)
    reg.fit(X_train,y_train)
    
    coefs.append(reg.coef_.tolist())
    
    
input_array = np.array(coefs).T


plt.figure(figsize=(15,8))
plt.plot(alphas,np.zeros(len(alphas)),color='black',linewidth=5)
for i in range(input_array.shape[0]):
    plt.plot(alphas,input_array[i],label=data.feature_names[i])
plt.legend()

# 3. Impact on Bias and Variance

In [None]:
m = 100
X = 5 * np.random.rand(m, 1) - 2
y = 0.7 * X ** 2 - 2 * X + 3 + np.random.randn(m, 1)

plt.scatter(X, y)
plt.show()

In [None]:
X_train,X_test,y_train,y_test = train_test_split(X.reshape(100,1),y.reshape(100),test_size=0.2,random_state=2)

from sklearn.preprocessing import PolynomialFeatures
poly = PolynomialFeatures(degree=15)

X_train = poly.fit_transform(X_train)
X_test = poly.transform(X_test)

In [None]:
#! pip install mlxtend
from mlxtend.evaluate import bias_variance_decomp

alphas = np.linspace(0,30,100)

loss = []
bias = []
variance = []

for i in alphas:
    reg = Ridge(alpha=i)
    avg_expected_loss, avg_bias, avg_var = bias_variance_decomp(
        reg, X_train, y_train, X_test, y_test, 
        loss='mse',
        random_seed=123)
    loss.append(avg_expected_loss)
    bias.append(avg_bias)
    variance.append(avg_var)

In [None]:
plt.plot(alphas,loss,label='loss')
plt.plot(alphas,bias,label='Bias')
plt.plot(alphas,variance,label='Variance')
plt.ylim(0,5)
plt.xlabel('Alpha')
plt.legend();

# 4. Effect of Regularization on Loss Function

In [None]:
from sklearn.datasets import make_regression

X,y = make_regression(n_samples=100, n_features=1, n_informative=1, n_targets=1,noise=20,random_state=13)

plt.scatter(X,y)

from sklearn.linear_model import LinearRegression

reg = LinearRegression()
reg.fit(X,y)
print(reg.coef_)
print(reg.intercept_)

In [None]:
def cal_loss(m,alpha):
    return np.sum((y - m*X.ravel() + 2.29)**2) + alpha*m*m

In [None]:
def predict(m):
    return m*X - 2.29

In [None]:
m = np.linspace(-45,100,100)
plt.figure(figsize=(4,6))
for j in [0,10,20,30,40,50,100]:
    loss = []
    for i in range(m.shape[0]):
        loss_i = cal_loss(m[i],j)
        loss.append(loss_i)
    plt.plot(m,loss,label='alpha = {}'.format(j))
plt.legend()
plt.xlabel('Alpha')
plt.ylabel('Loss')
plt.show()
