**Practical-4**

**Aim: Linear Regression with Regularization (without using sklearn or equivalent library) and Simple and Multiple Linear Regression with and without regularization using Sklearn 
Apply it on datasets used in experiment 3.  
Compare outcome of experiment 3 and 4 and derive conclusions.** 

**Batch Gradient Descant, Stochastic Gradient Descant and Mini Batch Gradient Descant with Regularization**

In [1]:
import numpy as np
from sklearn.metrics import mean_squared_error

In [7]:
print("Choose which Linear Regression you want to go for:")
print("1. Simple Linear Regression")
print("2. Multiple Linear Regression")

def switch_example(value):
    if value == 1:
        # Simple Linear Regression: y = 4 + 3X + noise
        X = 2 * np.random.rand(100, 1)
        true_theta_simple = np.array([4, 3]).reshape(-1, 1)
        y = 4 + 3 * X + np.random.randn(100, 1)
        return (X, y, true_theta_simple)

    elif value == 2:
        # Multiple Linear Regression: y = 4 + 3x1 + 3x2 + 3x3 + noise
        X = 2 * np.random.rand(100, 3)
        true_theta_multiple = np.array([4, 3, 3, 3]).reshape(-1, 1)
        y = 4 + 3*X[:, 0] + 3*X[:, 1] + 3*X[:, 2] + np.random.randn(100)
        y = y.reshape(-1, 1)
        return (X, y, true_theta_multiple)

    else:
        return "Invalid choice. Please select 1 or 2."

# Get user input
choice = int(input("Enter 1 or 2: "))

result = switch_example(choice)

if isinstance(result, tuple):
    X, y, true_theta = result
    print("\nSelected Option:", choice)
    print("X shape:", X.shape)
    print("y shape:", y.shape)
    print("True Theta:\n", true_theta)
else:
    print(result)

Choose which Linear Regression you want to go for:
1. Simple Linear Regression
2. Multiple Linear Regression


Enter 1 or 2:  1



Selected Option: 1
X shape: (100, 1)
y shape: (100, 1)
True Theta:
 [[4]
 [3]]


In [8]:
m = len(y)

In [9]:
# --- Split into train (80) and test (20) ---
split_idx = int(0.8 * m)
X_train, X_test = X[:split_idx], X[split_idx:]
y_train, y_test = y[:split_idx], y[split_idx:]

In [10]:
# Add bias term
X_train_b = np.c_[np.ones((len(X_train), 1)), X_train]  # (80, 4)
X_test_b = np.c_[np.ones((len(X_test), 1)), X_test]    # (20, 4)

In [11]:
from sklearn.linear_model import Lasso
class GDRegressor:

    def __init__(self, learning_rate=0.1, epochs=100, l1_lambda=0.01,use_sklearn=False):
        self.learning_rate = learning_rate
        self.epochs = epochs
        self.l1_lambda = l1_lambda
        self.use_sklearn = use_sklearn
        self.theta = None
        self.model = None

    def fit(self, X_train, y_train):
        if self.use_sklearn:
            # Use scikit-learn's Lasso
            self.model = Lasso(alpha=self.l1_lambda, max_iter=10000)
            self.model.fit(X_train, y_train.ravel())
            self.theta = np.r_[self.model.intercept_, self.model.coef_]  # combine bias + weights
            print("\n[scikit-learn] Theta:", self.theta)
        
        else:
            # Initialize theta
            self.theta = np.random.randn(X_train.shape[1], 1)
            
            for i in range(self.epochs):
                y_pred_train = X_train.dot(self.theta)
                
                # Gradient of MSE loss
                g = (2 / len(y_train)) * X_train.T.dot(y_pred_train - y_train)
                
                # Add L1 regularization term (subgradient)
                l1_penalty = self.l1_lambda * np.sign(self.theta)
                
                # Update theta
                self.theta = self.theta - self.learning_rate * (g + l1_penalty)
                
            print("\nPredicted Theta:\n", self.theta)
        
    def predict(self, X_test):
        if self.use_sklearn:
            return self.model.predict(X_test)
        else:
            return X_test.dot(self.theta)
      


**With Regularization using Sklearn as well without using Sklearn**

In [12]:
# With your gradient descent
gd_model = GDRegressor(learning_rate=0.1, epochs=200, l1_lambda=0.01, use_sklearn=False)
gd_model.fit(X_train_b, y_train)   
y_pred_gd = gd_model.predict(X_test_b)

# With scikit-learn’s Lasso
sk_model = GDRegressor(l1_lambda=0.01, use_sklearn=True)
sk_model.fit(X_train, y_train)     
y_pred_sk = sk_model.predict(X_test)


mse_gd = mean_squared_error(y_test, y_pred_gd)
mse_sk = mean_squared_error(y_test, y_pred_sk)

print("MSE (Custom GD with L1):", mse_gd)
print("MSE (scikit-learn Lasso):", mse_sk)


Predicted Theta:
 [[3.6676112 ]
 [3.22877455]]

[scikit-learn] Theta: [3.70182767 3.19940434]
MSE (Custom GD with L1): 0.899613940591393
MSE (scikit-learn Lasso): 0.8916619491384082


In [13]:
print("True theta:\n", true_theta)

True theta:
 [[4]
 [3]]


In [14]:
from sklearn.linear_model import SGDRegressor as SkSGD
from sklearn.metrics import mean_squared_error
class SGDRegressor:

    def __init__(self, learning_rate=0.1, epochs=100, lambda_reg=0.1, use_sklearn=False):
        self.learning_rate = learning_rate   
        self.epochs = epochs
        self.lambda_reg = lambda_reg
        self.theta = None
        self.model = None
        self.use_sklearn = use_sklearn

    def fit(self, X_train, y_train):

        if self.use_sklearn:
            # Use scikit-learn’s SGDRegressor with L2 regularization
            self.model = SkSGD(
                penalty="l2",
                alpha=self.lambda_reg,         # regularization strength
                max_iter=self.epochs,
                eta0=self.learning_rate,
                learning_rate="invscaling",      
                fit_intercept=True,
                random_state=42
            )
            self.model.fit(X_train, y_train.ravel())

            # Combine intercept + coefficients to match "theta"
            self.theta = np.r_[self.model.intercept_, self.model.coef_].reshape(-1, 1)
            print("\n[scikit-learn Ridge/SGD] Theta:\n", self.theta)

        else:
            # Initialize theta
                self.theta = np.random.randn(X_train.shape[1], 1)
                
                for i in range(self.epochs):
                    for j in range(X_train.shape[0]):
                        
                        idx = np.random.randint(0, X_train.shape[0])
                        
                        X_row = X_train[idx: idx + 1]
                        y_row = y_train[idx: idx + 1]
        
                        y_pred = X_row.dot(self.theta)
                        
                        # Gradient of MSE loss
                        g = 2 * X_row.T.dot(y_pred - y_row)
        
                        # Ridge penalty (L2)
                        ridge_penalty = 2 * self.lambda_reg * self.theta
                        ridge_penalty[0] = 0   # do not regularize bias term
                        g = g + ridge_penalty
                        
                        # Update theta
                        self.theta = self.theta - self.learning_rate * g
        
                print("\n[Custom SGD + Ridge] Theta:\n", self.theta)
            
    def predict(self, X_test):
         if self.use_sklearn:
            return self.model.predict(X_test)
         else:
            return X_test.dot(self.theta)

    def mse(self, y_true, y_pred):
        return mean_squared_error(y_true, y_pred)


In [15]:
# Custom Ridge-SGD
sgd_custom = SGDRegressor(learning_rate=0.01, epochs=30, lambda_reg=0.1, use_sklearn=False)
sgd_custom.fit(X_train_b, y_train)   
y_pred_custom = sgd_custom.predict(X_test_b)
print("MSE (Custom SGD):", sgd_custom.mse(y_test, y_pred_custom))

# Scikit-learn Ridge-SGD
sgd_sklearn = SGDRegressor(learning_rate=0.01, epochs=100, lambda_reg=0.1, use_sklearn=True)
sgd_sklearn.fit(X_train, y_train)   
y_pred_sklearn = sgd_sklearn.predict(X_test)
print("MSE (sklearn SGD):", sgd_sklearn.mse(y_test, y_pred_sklearn))


[Custom SGD + Ridge] Theta:
 [[4.33314783]
 [2.33455116]]
MSE (Custom SGD): 0.7779158115963563

[scikit-learn Ridge/SGD] Theta:
 [[3.69632031]
 [3.05906335]]
MSE (sklearn SGD): 0.7913645447724662


In [16]:
print("True theta:\n", true_theta)

True theta:
 [[4]
 [3]]


In [17]:
import numpy as np
from sklearn.linear_model import ElasticNet

class MBGDRegressor:

    def __init__(self, learning_rate=0.1, epochs=100, batch_size=32, lambda_reg=0.1, alpha=0.5,use_sklearn=False):
        self.learning_rate = learning_rate   
        self.epochs = epochs
        self.batch_size = batch_size
        self.lambda_reg = lambda_reg  
        self.alpha = alpha            
        self.use_sklearn = use_sklearn
        self.theta = None
        self.model = None  

    def fit(self, X_train, y_train):

        if self.use_sklearn:
            # Use scikit-learn's ElasticNet
            self.model = ElasticNet(
                alpha=self.lambda_reg,
                l1_ratio=self.alpha,     # balance L1/L2
                max_iter=self.epochs * 10,
                fit_intercept=True,
                random_state=42
            )
            self.model.fit(X_train, y_train.ravel())
            
            # Combine intercept + coefficients into theta-like vector
            self.theta = np.r_[self.model.intercept_, self.model.coef_].reshape(-1, 1)
            print("\n[scikit-learn ElasticNet] Theta:\n", self.theta)

        else:
            
            n_samples, n_features = X_train.shape
            self.theta = np.random.randn(n_features, 1)
    
            for i in range(self.epochs):
                # Shuffle data
                indices = np.arange(n_samples)
                np.random.shuffle(indices)
                X_train = X_train[indices]
                y_train = y_train[indices]
    
                # Process in mini-batches
                for start_idx in range(0, n_samples, self.batch_size):
                    end_idx = start_idx + self.batch_size
                    X_batch = X_train[start_idx:end_idx]
                    y_batch = y_train[start_idx:end_idx]
    
                    y_pred = X_batch.dot(self.theta)
    
                    # Gradient of MSE loss
                    g = (2 / len(y_batch)) * X_batch.T.dot(y_pred - y_batch)
    
                    # Elastic Net penalty (exclude bias term at index 0)
                    l1_penalty = self.alpha * self.lambda_reg * np.sign(self.theta)
                    l2_penalty = (1 - self.alpha) * 2 * self.lambda_reg * self.theta
                    penalty = l1_penalty + l2_penalty
                    penalty[0] = 0   # do not regularize bias
    
                    g = g + penalty
    
                    # Update theta
                    self.theta = self.theta - self.learning_rate * g
    
            print("\n[Custom MBGD + ElasticNet] Theta:\n", self.theta)

    def predict(self, X_test):
        
        if self.use_sklearn:
            return self.model.predict(X_test)
        else:
            return X_test.dot(self.theta)
    def mse(self, y_true, y_pred):
        return mean_squared_error(y_true, y_pred)


In [18]:
# Custom MBGD with ElasticNet
mbgd_custom = MBGDRegressor(learning_rate=0.05, epochs=200, batch_size=16, lambda_reg=0.1, alpha=0.5, use_sklearn=False)
mbgd_custom.fit(X_train_b, y_train)   
y_pred_custom = mbgd_custom.predict(X_test_b)
print("MSE (Custom MBGD):", mbgd_custom.mse(y_test, y_pred_custom))

# Scikit-learn ElasticNet
mbgd_sklearn = MBGDRegressor(lambda_reg=0.1, alpha=0.5, use_sklearn=True)
mbgd_sklearn.fit(X_train, y_train)   
y_pred_sklearn = mbgd_sklearn.predict(X_test)
print("MSE (sklearn MBGD):", mbgd_sklearn.mse(y_test, y_pred_sklearn))


[Custom MBGD + ElasticNet] Theta:
 [[4.15486262]
 [2.80536454]]
MSE (Custom MBGD): 0.8420683147075902

[scikit-learn ElasticNet] Theta:
 [[4.18677669]
 [2.7069881 ]]
MSE (sklearn MBGD): 0.798525323786021


**Batch Gradient Descant, Stochastic Gradient Descant and Mini Batch Gradient Descant without Regularization using Sklearn**

In [2]:
import numpy as np
from sklearn.linear_model import SGDRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

# --- Dataset creation ---
def switch_example(value):
    if value == 1:
        X = 2 * np.random.rand(100, 1)
        true_theta_simple = np.array([4, 3]).reshape(-1, 1)
        y = 4 + 3 * X + np.random.randn(100, 1)
        return X, y, true_theta_simple
    elif value == 2:
        X = 2 * np.random.rand(100, 3)
        true_theta_multiple = np.array([4, 3, 3, 3]).reshape(-1, 1)
        y = 4 + 3*X[:,0] + 3*X[:,1] + 3*X[:,2] + np.random.randn(100)
        y = y.reshape(-1, 1)
        return X, y, true_theta_multiple
    else:
        return "Invalid choice. Please select 1 or 2."

# --- User input ---
choice = int(input("Enter 1 or 2: "))
result = switch_example(choice)

if isinstance(result, tuple):
    X, y, true_theta = result
    print("\nSelected Option:", choice)
    print("X shape:", X.shape)
    print("y shape:", y.shape)
    print("True Theta:\n", true_theta)
else:
    print(result)
    exit()

# -----------------------------
# Split into training and testing
# -----------------------------
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Add bias term for Batch GD
X_train_b = np.c_[np.ones((X_train.shape[0], 1)), X_train]
X_test_b = np.c_[np.ones((X_test.shape[0], 1)), X_test]

# --------------------------------------
# 1. Batch Gradient Descent
# --------------------------------------
def batch_gradient_descent(X, y, lr=0.01, n_iterations=1000):
    m, n = X.shape
    theta = np.random.randn(n, 1)
    for _ in range(n_iterations):
        gradients = 2/m * X.T.dot(X.dot(theta) - y)
        theta -= lr * gradients
    return theta

theta_bgd = batch_gradient_descent(X_train_b, y_train)
y_pred_bgd = X_test_b.dot(theta_bgd)
print("\nBatch GD Theta:\n", theta_bgd)
print("MSE - BGD (Test):", mean_squared_error(y_test, y_pred_bgd))

# ----------------------------------------------
# 2. Stochastic Gradient Descent using sklearn
# ----------------------------------------------
sgd_reg = SGDRegressor(max_iter=1000, learning_rate='constant', eta0=0.01,
                       penalty=None, random_state=42)
sgd_reg.fit(X_train, y_train.ravel())
theta_sgd = np.r_[sgd_reg.intercept_, sgd_reg.coef_]
y_pred_sgd = sgd_reg.predict(X_test)
print("\nStochastic GD Theta:\n", theta_sgd.reshape(-1,1))
print("MSE - SGD (Test):", mean_squared_error(y_test, y_pred_sgd))

# -------------------------------------------------
# 3. Mini-Batch Gradient Descent using partial_fit
# -------------------------------------------------
batch_size = 20
n_epochs = 50
y_train_flat = y_train.ravel()

mbgd_reg = SGDRegressor(max_iter=1, tol=None, learning_rate='constant', eta0=0.01,
                        penalty=None, random_state=42, warm_start=True)

m_train = X_train.shape[0]

for epoch in range(n_epochs):
    indices = np.random.permutation(m_train)
    X_shuffled = X_train[indices]
    y_shuffled = y_train_flat[indices]
    
    for start in range(0, m_train, batch_size):
        end = start + batch_size
        X_batch = X_shuffled[start:end]
        y_batch = y_shuffled[start:end]
        mbgd_reg.partial_fit(X_batch, y_batch)

theta_mbgd = np.r_[mbgd_reg.intercept_, mbgd_reg.coef_]
y_pred_mbgd = mbgd_reg.predict(X_test)
print("\nMini-Batch GD Theta:\n", theta_mbgd.reshape(-1,1))
print("MSE - MBGD (Test):", mean_squared_error(y_test, y_pred_mbgd))


Enter 1 or 2:  2



Selected Option: 2
X shape: (100, 3)
y shape: (100, 1)
True Theta:
 [[4]
 [3]
 [3]
 [3]]

Batch GD Theta:
 [[3.62554599]
 [3.23413919]
 [3.26765292]
 [3.02172852]]
MSE - BGD (Test): 1.170324872471954

Stochastic GD Theta:
 [[3.4040081 ]
 [3.29356037]
 [3.37127471]
 [3.13050345]]
MSE - SGD (Test): 1.2335027523648003

Mini-Batch GD Theta:
 [[3.6331844 ]
 [3.26085041]
 [3.270325  ]
 [3.03619596]]
MSE - MBGD (Test): 1.170125208493768


![alt text](Table-1-Using-Sklearn.png "Using Sklearn")

![alt text](Table-1-Graph-using-sklearn.png "Simple Linear Regression and Multiple Linear Regression with MSE values for each types of GD")

![alt text](Table-2-without-sklearn.png "Table 2 Without Sklearn")

![alt text](Table-2-Graph-without-using-sklearn.png "Simple Linear Regression and Multiple Linear Regression with MSE and type of GD")