In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_diabetes


In [50]:
# Load the dataset

X,y = load_diabetes(return_X_y=True)

In [51]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [52]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)

X_test = scaler.transform(X_test)

In [67]:
class PCA_Custom():

    def __init__(self, n_components):

        self.n_components = n_components
        self.variance = None
        self.components = None
        self.explained_variance_ratio = None
    
    def fit(self, X):

        # Center the data

        X_centered = X  - X.mean()

        # Calculate the Covariance Matrix

        cov_matrix = np.cov(X_centered, rowvar=False)

        # Find the eigenvalues and eigenvectors

        eigenvalues, eigenvectors = np.linalg.eigh(cov_matrix)

        # Sort the Eigenvalues and Eigenvectors

        sort_indices = np.argsort(eigenvalues)[::-1]
        eigenvalues = eigenvalues[sort_indices]
        eigenvectors = eigenvectors[:, sort_indices]

        # Top components

        self.variance = eigenvalues[:self.n_components]
        self.components = eigenvectors[:, :self.n_components]

        total_variance = np.sum(self.variance)

        self.explained_variance_ratio = eigenvalues / total_variance
    
    def trasform(self, X):

        X_mean = X.mean()

        X_centered = X - X_mean

        return np.dot(X_centered, self.components)
    

    def fit_transform(self, X):

        self.fit(X)

        return self.trasform(X)
        

In [68]:
pca_c = PCA_Custom(n_components=6)

In [69]:
X_train = pca_c.fit_transform(X_train)

X_test = pca_c.trasform(X_test)

In [172]:
class Batch_Gradient_Descent():

    def __init__(self, learning_rate, epochs):


        self.coef_ = None
        self.lr = learning_rate
        self.epochs = epochs
    
    def fit(self, X_train, y_train):

        X_train = np.insert(X_train, 0, 1, axis=1)

        self.coef_ = np.random.randn(X_train.shape[1]) * 0.01

        for i in range(self.epochs):

            y_hat = np.dot(X_train, self.coef_)

            error = y_train - y_hat

            coef_slope = np.dot(error, X_train)
            self.coef_ = self.coef_ + (self.lr * coef_slope)

        print(f"Coef_: {self.coef_}")
    
    def predict(self, X_test):

        X_test = np.insert(X_test, 0, 1, axis=1)
        y_pred = np.dot(X_test, self.coef_)
        return y_pred

In [178]:
batch = Batch_Gradient_Descent(learning_rate=0.001, epochs=2000)

In [179]:
batch.fit(X_train, y_train)

Coef_: [153.73654391 -21.57690805  11.57717432 -17.8748975   24.86195804
  -1.24006554 -11.90611684]


In [180]:
y_pred = batch.predict(X_test)

In [181]:
from sklearn.metrics import r2_score

r2_score(y_test, y_pred)

0.44091505446613755

In [182]:
X_train.shape

(353, 6)