In [1]:
from sklearn.datasets import load_diabetes
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import numpy as np
from time import time
import mlflow


In [None]:
#experiment tracking
import mlflow
import os
# This the dockerized method.
# We build two docker containers, one for python/jupyter and another for mlflow.
# The url `mlflow` is resolved into another container within the same composer.
mlflow.set_tracking_uri("http://localhost:8080")
# In the dockerized way, the user who runs this code will be `root`.
# The MLflow will also log the run user_id as `root`.
# To change that, we need to set this environ["LOGNAME"] to your name.
os.environ["LOGNAME"] = "HMH"
# mlflow.create_experiment(name="chaky-diabetes-example")  #create if you haven't create
mlflow.set_experiment(experiment_name="HMH-regularization-example")

In [None]:
from sklearn.model_selection import train_test_split
import numpy as np
from time import time

diabetes = load_diabetes()
print("Features: ", diabetes.feature_names)
X = diabetes.data
y = diabetes.target
m = X.shape[0]  #number of samples
n = X.shape[1]  #number of features

#polynomial transformation
# X   = PolynomialFeatures(degree = 3, include_bias=False).fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test  = scaler.transform(X_test)

# actually you can do like this too
# X = np.insert(X, 0, 1, axis=1)
intercept = np.ones((X_train.shape[0], 1))
X_train   = np.concatenate((intercept, X_train), axis=1)
intercept = np.ones((X_test.shape[0], 1))
X_test    = np.concatenate((intercept, X_test), axis=1)

Features:  ['age', 'sex', 'bmi', 'bp', 's1', 's2', 's3', 's4', 's5', 's6']


In [12]:
X_train.shape

(309, 11)

In [31]:
from sklearn.model_selection import KFold

class LinearRegression(object):
    
    #in this class, we add cross validation as well for some spicy code....
    kfold = KFold(n_splits=5)
            
    def __init__(self, regularization=None, lr=0.001, method='batch', num_epochs=500, batch_size=50, cv=kfold, use_momentum=False, momentum=0.0, init_method= 'xavier'): #mdified init_method
        self.lr         = lr
        self.num_epochs = num_epochs
        self.batch_size = batch_size
        self.method     = method
        self.cv         = cv
        self.use_momentum = use_momentum  # New flag to enable/disable momentum
        self.momentum = momentum  # Momentum value
        self.init_method = init_method
        
        self.regularization = regularization if regularization else NormalPenalty()
    def mse(self, y_true, y_pred):
        return ((y_pred - y_true) ** 2).sum() / y_true.shape[0]
    
    def r2(self, y_true, y_pred):
   
        ss_total = np.sum((y_true - np.mean(y_true)) ** 2)  # Total sum of squares
        ss_residual = np.sum((y_true - y_pred) ** 2)  # Sum of squared residuals
        return 1 - (ss_residual / ss_total)
    
    def xavier_init(self, input_dim):              #based on pseudocode, xavier initializing
        lower, upper = -(1.0 / np.sqrt(input_dim)), (1.0 / np.sqrt(input_dim))
        numbers = np.random.uniform(lower,upper, 1000)                 #gernerating 1000 random numbers between lower and upper
        scaled = lower + numbers * (upper - lower)
        
        return scaled[:input_dim]
    
    
# Fitting Polynomial Regression to the dataset
#     from sklearn.preprocessing import PolynomialFeatures
 
# poly = PolynomialFeatures(degree=4)
# X_poly = poly.fit_transform(X)
 
# poly.fit(X_train, y_train)
# lin2 = LinearRegression()
# lin2.fit(X_train, y_train)
    
    def fit(self, X_train, y_train):
            
        #create a list of kfold scores
        self.kfold_scores = list()
        
        #reset val loss
        self.val_loss_old = np.inf

        #kfold.split in the sklearn.....
        #5 splits
        for fold, (train_idx, val_idx) in enumerate(self.cv.split(X_train)):
            
            X_cross_train = X_train[train_idx]
            y_cross_train = y_train[train_idx]
            X_cross_val   = X_train[val_idx]
            y_cross_val   = y_train[val_idx]

            if self.init_method == 'xavier':   #condition check to use xavier
                self.theta = self.xavier_init(X_cross_train.shape[1]) 
            else:
                self.theta = np.zeros(X_cross_train.shape[1]) #else use zeros
                #self.theta = np.zeros(X_cross_train.shape[1], 1)
            
            #define X_cross_train as only a subset of the data
            #how big is this subset?  => mini-batch size ==> 50
            
            #one epoch will exhaust the WHOLE training set
            with mlflow.start_run(run_name=f"Fold-{fold}", nested=True):
                
                params = {"method": self.method, "lr": self.lr, "reg": type(self).__name__, "init_method": self.init_method, "momentum": self.momentum}
                mlflow.log_params(params=params)
                
                for epoch in range(self.num_epochs):
                    #val_loss_new =  None 
                
                    #with replacement or no replacement
                    #with replacement means just randomize
                    #with no replacement means 0:50, 51:100, 101:150, ......300:323
                    #shuffle your index
                    perm = np.random.permutation(X_cross_train.shape[0])
                            
                    X_cross_train = X_cross_train[perm]
                    y_cross_train = y_cross_train[perm]
                    
                    if self.method == 'sto':
                        for batch_idx in range(X_cross_train.shape[0]):
                            X_method_train = X_cross_train[batch_idx].reshape(1, -1) #(11,) ==> (1, 11) ==> (m, n)
                            y_method_train = y_cross_train[batch_idx] 
                            train_loss = self._train(X_method_train, y_method_train)
                    elif self.method == 'mini':
                        for batch_idx in range(0, X_cross_train.shape[0], self.batch_size):
                        #batch_idx = 0, 50, 100, 150
                            X_method_train = X_cross_train[batch_idx:batch_idx+self.batch_size, :]
                            y_method_train = y_cross_train[batch_idx:batch_idx+self.batch_size]
                            train_loss = self._train(X_method_train, y_method_train)
                    else:
                        X_method_train = X_cross_train
                        y_method_train = y_cross_train
                        train_loss = self._train(X_method_train, y_method_train)

                mlflow.log_metric(key="train_loss", value=train_loss, step=epoch)

                yhat_val = self.predict(X_cross_val)
                val_loss_new = self.mse(y_cross_val, yhat_val)
                mlflow.log_metric(key="val_loss", value=val_loss_new, step=epoch)
                    
                #early stopping
                if np.allclose(val_loss_new, self.val_loss_old):
                    break
                self.val_loss_old = val_loss_new

                y_pred = self.predict(X_cross_val)          #compute and print r2_score after training
                r2 = self.r2(y_cross_val, y_pred)
                # print(f"Fold {fold} - R² Score: {r2:.4f}")
            
            self.kfold_scores.append(val_loss_new)
            print(f"Fold {fold}: {val_loss_new}")
            print(f"Fold {fold} - R² Score: {r2:.4f}")
            
                    
    # def _train(self, X, y):
    #     yhat = self.predict(X)
    #     m    = X.shape[0]        
    #     grad = (1/m) * X.T @(yhat - y) + self.regularization.derivation(self.theta)
    #     self.theta = self.theta - self.lr * grad
    #     return self.mse(y, yhat)
    
    def _train(self, X, y):
        yhat = self.predict(X)
        m = X.shape[0]
        grad = (1/m) * X.T @ (yhat - y) + self.regularization.derivation(self.theta)
        
        if not hasattr(self, 'prev_step'): # Initialize momentum if not set
            self.prev_step = np.zeros_like(grad)

        step = self.lr * grad   # Update weights using momentum

        if self.use_momentum:
            self.theta -= step + self.momentum * self.prev_step
            self.prev_step = step  # Store the previous step for momentum
        else:
            self.theta -= step   #normal grad

        return self.mse(y, yhat)

    
    def predict(self, X):
        return X @ self.theta  #===>(m, n) @ (n, )
    
    def _coef(self):
        return self.theta[1:]  #remind that theta is (w0, w1, w2, w3, w4.....wn)
                               #w0 is the bias or the intercept
                               #w1....wn are the weights / coefficients / theta
    def _bias(self):
     return self.theta[0]
    
    def feature_importance_plot(self): #Plot the feature importance based on the coefficients (theta).
    
        # Extract the absolute values of the coefficients and sort them
        # abs_coeff = np.abs(self.theta)
        # sorted_idx = np.argsort(abs_coeff)[::-1]  # Sort in descending order
        feature_names = ["age", "sex", "bmi", "bp"]
        importance_values = [abs(self._coef()[0]), abs(self._coef()[1]), abs(self._coef()[2]), abs(self._coef()[3])]
        # Plot the coefficients
        plt.figure(figsize=(10, 6))
        plt.barh(feature_names, importance_values, color='green')
        #plt.yticks(range(len(sorted_idx)), np.array(feature_names)[sorted_idx])
        plt.xlabel('Feature Importance (Magnitude of Coefficient)')
        plt.title('Feature Importance Based on Coefficients')
        plt.gca().invert_yaxis()  # To show the largest importance at the top
        plt.show()

In [32]:
class LassoPenalty:
    
    def __init__(self, l):
        self.l = l # lambda value
        
    def __call__(self, theta): #__call__ allows us to call class as method
        return self.l * np.sum(np.abs(theta))
        
    def derivation(self, theta):
        return self.l * np.sign(theta)
    
class RidgePenalty:
    
    def __init__(self, l):
        self.l = l
        
    def __call__(self, theta): #__call__ allows us to call class as method
        return self.l * np.sum(np.square(theta))
        
    def derivation(self, theta):
        return self.l * 2 * theta
    
class ElasticPenalty:
    
    def __init__(self, l = 0.1, l_ratio = 0.5):
        self.l = l 
        self.l_ratio = l_ratio

    def __call__(self, theta):  #__call__ allows us to call class as method
        l1_contribution = self.l_ratio * self.l * np.sum(np.abs(theta))
        l2_contribution = (1 - self.l_ratio) * self.l * 0.5 * np.sum(np.square(theta))
        return (l1_contribution + l2_contribution)

    def derivation(self, theta):
        l1_derivation = self.l * self.l_ratio * np.sign(theta)
        l2_derivation = self.l * (1 - self.l_ratio) * theta
        return (l1_derivation + l2_derivation)
    
class PolynomialPenalty:
    def __init__(self, l, p=1):
        self.l, self.p = l, p
    def __call__(self, theta): return self.l * np.sum(np.abs(theta)**self.p)
    def derivation(self, theta): return self.l * self.p * np.sign(theta) * np.abs(theta)**(self.p-1)

class NormalPenalty:
    def __call__(self, theta): return 0
    def derivation(self, theta): return np.zeros_like(theta)


    
class Lasso(LinearRegression):
    
    def __init__(self, method, lr, l=0.1, init_method='xavier', momentum=0.0):
        self.regularization = LassoPenalty(l)
        super().__init__(self.regularization, lr, method, init_method=init_method, momentum=momentum)
        
class Ridge(LinearRegression):
    
    def __init__(self, method, lr, l=0.1, init_method='xavier', momentum=0.0):
        self.regularization = RidgePenalty(l)
        super().__init__(self.regularization, lr, method, init_method=init_method, momentum=momentum)

class Normal(LinearRegression):
    def __init__(self, method, lr, init_method='xavier', momentum=0.0):
        super().__init__(None, lr, method, init_method=init_method, momentum=momentum)
        
# class ElasticNet(LinearRegression):
    
#     def __init__(self, method, lr, l, l_ratio=0.5):
#         self.regularization = ElasticPenalty(l, l_ratio)
#         super().__init__(self.regularization, lr, method)


### Experiment

In [33]:
#helper function for looping classnames
import sys

def str_to_class(classname):
    return getattr(sys.modules[__name__], classname)

In [34]:
import mlflow
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PolynomialFeatures

# Define the hyperparameters to loop over
polynomials = ["True", "False"]
regs = ["Ridge", "Lasso", "Normal"]
grad_methods = ["stochastic", "mini-batch", "batch"]
init_methods = ["zeros", "xavier"]
momentum_options = [0.0, 0.9]
learning_rates = [0.01, 0.001, 0.0001]

POLY_DEGREE = 1 #PolynomialFeature_Transform_For_X_train_data
poly_transformer = PolynomialFeatures(degree=POLY_DEGREE, include_bias=False)
X_train_poly = poly_transformer.fit_transform(X_train)
X_test_poly = poly_transformer.transform(X_test)



# for poly in polynomial:
# Run experiments
for poly in polynomials: #outtest loop for Polynomial_regularization
    for reg in regs:
        for grad_method in grad_methods:
            for init_method in init_methods:
                for momentum in momentum_options:
                    for lr in learning_rates:
                    
                        # print("="*35)
                        # print(f"Using Polynomial Degree: {POLY_DEGREE}" if poly == "True" else "Polynomial: False")
                        # print(f"===== Regularization: {reg} =====")
                        # print(f"===== Grad_method: {grad_method} =====")
                        # print(f"===== Init_Method: {init_method} =====")
                        # print(f"===== Momentum: {momentum} =====")
                        # print(f"===== Learning Rate: {lr} =====")
                        # print("="*35)

                        type_of_regression = str_to_class(reg)
                        params = {"method": grad_method,"lr": lr,"init_method": init_methods,"momentum": momentum} # Set hyperparameters

                        if poly == "True":
                            X_train_used = X_train_poly
                            X_test_used = X_test_poly
                        else:
                            X_train_used == X_train
                            X_test_used == X_test


                        with mlflow.start_run(run_name=f"Reg-{reg}_LR-{lr}_Momentum-{momentum}_Init-{init_method}_Method-{grad_method}_poly--{poly}", nested=True):

                            print(f"===== Running {reg} with {reg}, lr={lr}, Grad_method={grad_method},init={init_method}, momentum={momentum}, Poly_Dregree={poly} =====")

                    # if reg == "Normal":
                    #     model = LinearRegression(**params)
                    # # elif reg == "Normal":
                    # #     model = LinearRegression(**params)
                    # else:
                    # # Get the correct regression type
                            mlflow.log_params(params=params)      
                            model = type_of_regression(**params)  

                    # Train the model
                            model.fit(X_train_used, y_train)

                    # Make predictions
                            yhat = model.predict(X_test_used)

                    # Compute metrics
                            mse = model.mse(y_test, yhat)
                            r2 = model.r2(y_test, yhat)

                            print(f"Test MSE: {mse:.4f}, R² Score: {r2:.4f}")

                        # Log metrics in MLflow
                            mlflow.log_params(params)
                            mlflow.log_metric("Test MSE", mse)
                            mlflow.log_metric("Test R²", r2)

                    # Log the model
                            signature = mlflow.models.infer_signature(X_train_used, model.predict(X_train_used))
                            mlflow.sklearn.log_model(model, artifact_path='model', signature=signature)

                            mlflow.end_run()


===== Running Ridge with Ridge, lr=0.01, Grad_method=stochastic,init=zeros, momentum=0.0, Poly_Dregree=True =====
Fold 0: 3842.849510270647
Fold 0 - R² Score: 0.3502
Fold 1: 4651.545383106083
Fold 1 - R² Score: 0.2201
Fold 2: 3107.4700456591822
Fold 2 - R² Score: 0.5107
Fold 3: 2491.8465949436095
Fold 3 - R² Score: 0.5097
Fold 4: 4215.07897044458
Fold 4 - R² Score: 0.3100
Test MSE: 3585.2936, R² Score: 0.3773
===== Running Ridge with Ridge, lr=0.001, Grad_method=stochastic,init=zeros, momentum=0.0, Poly_Dregree=True =====
Fold 0: 13863.07155973834
Fold 0 - R² Score: -1.3442
Fold 1: 15613.089020647043
Fold 1 - R² Score: -1.6176
Fold 2: 10582.780564110797
Fold 2 - R² Score: -0.6664
Fold 3: 9156.573167843597
Fold 3 - R² Score: -0.8015
Fold 4: 13454.44739498861
Fold 4 - R² Score: -1.2025
Test MSE: 12162.4915, R² Score: -1.1123
===== Running Ridge with Ridge, lr=0.0001, Grad_method=stochastic,init=zeros, momentum=0.0, Poly_Dregree=True =====
Fold 0: 28028.135591356615
Fold 0 - R² Score: -3.

KeyboardInterrupt: 

In [None]:
X_train.shape

(309, 11)

In [None]:
X_train_used.shape


(309, 11)

In [None]:
X_train_poly.shape

(309, 11)

In [None]:
import matplotlib as plt

In [None]:
model.feature_importance_plot()

NameError: name 'plt' is not defined