## Importing Libraries

In [234]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_diabetes

## Data Preprocessing

In [235]:
X, y = load_diabetes(return_X_y=True)
X=X.T
y=y.reshape(-1,1)

# set the random seed value to 0
np.random.seed(0)

### Printing Shapes

In [236]:
print(X.shape)
print(y.shape)

(10, 442)
(442, 1)


### Shuffle Function

In [237]:
def shuffle(X,y):
    indices = np.arange(X.shape[1])
    np.random.shuffle(indices)
    return X[:,indices],y[indices]

In [238]:
X,y=shuffle(X,y)
print(X.shape)
print(y.shape)

(10, 442)
(442, 1)


### Extracting data

In [239]:
def dataSplit(X,y,testDataFraction):
    trainingDataSize=int(len(y)-len(y)*testDataFraction)
    X_train,X_test=X[:,:trainingDataSize],X[:,trainingDataSize:]
    y_train,y_test=y[:trainingDataSize],y[trainingDataSize:]
    return X_train,y_train,X_test,y_test

In [240]:
X_train,y_train,X_test,y_test=dataSplit(X,y,0.25)

### Adding dummy feature to training and test data

In [241]:
dummy=np.ones(X_train.shape[1])
X_train=np.row_stack((dummy,X_train))
dummy=np.ones(X_test.shape[1])
X_test=np.row_stack((dummy,X_test))

### Print Shape of the test and train data

In [242]:
print(f"X train {X_train.shape}")
print(f"Y train {y_train.shape}")
print(f"X test {X_test.shape}")
print(f"Y test {y_test.shape}")

X train (11, 331)
Y train (331, 1)
X test (11, 111)
Y test (111, 1)


## Model 1 : Equating loss to zero

In [243]:
def weights(X,y):
    return np.linalg.pinv(X@X.T)@X@y

### Predict Function

In [244]:
def predict(X,w):
    return X.T@w

### Loss Function (Mean  Squared Error)

In [245]:
def loss(y_hat,y):
    return np.mean((y_hat-y)**2)

### Generate weights

In [246]:
w=weights(X_train,y_train)
y_hat=predict(X_train,w)

### Print loss

In [247]:
print(f"Mean Squared loss : {loss(y_hat,y_train)}")
print(f"Root Mean Squared loss : {np.sqrt(loss(y_hat,y_train))}")

Mean Squared loss : 2930.5966982870527
Root Mean Squared loss : 54.13498589901962


## Ridge Regression

In [248]:
def ridge_weights(X, y,l):
    return (np.linalg.pinv((X@X.T) + l*np.eye(X.shape[0])))@X@y

In [249]:
r_w=ridge_weights(X_train,y_train,0.3)
r_y_hat=predict(X_train,r_w)

### Print Ridge loss

In [250]:
print(f"Mean Squared loss : {loss(r_y_hat,y_train)}")
print(f"Root Mean Squared loss : {np.sqrt(loss(r_y_hat,y_train))}")

Mean Squared loss : 3068.292718609828
Root Mean Squared loss : 55.39217199758309


## Loss on Test data

### Linear Regression

In [251]:
y_hat=predict(X_test,w)
print(f"Mean Squared loss : {loss(y_hat,y_test)}")
print(f"Root Mean Squared loss : {np.sqrt(loss(y_hat,y_test))}")

Mean Squared loss : 2719.699432760726
Root Mean Squared loss : 52.150737605145395


### Ridge Regression

In [252]:
r_y_hat=predict(X_test,r_w)
print(f"Mean Squared loss : {loss(r_y_hat,y_test)}")
print(f"Root Mean Squared loss : {np.sqrt(loss(r_y_hat,y_test))}")

Mean Squared loss : 2934.6703704183456
Root Mean Squared loss : 54.17259796629977
