In [53]:
import numpy as np

# Linear Regression Model
* __init__ is initializing the class and setting the parameters [m,b] b  - bias term
* _prepare_featues is modifying the input array for the 2D multiplication. Adding the column of 1s in front of the column of Xs for the multiplication with self.theta [m,b]
* hypothesis is returning the our predicted values.
* initialize parameters is setting the self.theta i.e. our parameters [m,b]
* in generate_synthetic_data we are creating our own data with some noise in it.
* the cost function is a way of computing error between predicted and original y column
* in linear regression we often use cost function to understand the differences.
* it is defined as sum of square of diff btw predicted and original y values divided by twice the number of samples.
* for a good linear regression model we tend to minimize the cost function and our cost function depends on 2 things m,b.
* we can minimize it by using gradient descent algorithim. this algo subsequntly minimizes the parameters (here m and b) until the cost function is reached to its minimum value.
* all the parameters are minimized by formula t = t - (alpha)*(partial derivative of J(m,b) wrt t) where t can be m or b and alpha will be figured out by ourselves (for now).

In [54]:
class LinearRegression:
    def __init__(self):
        # theta will store our parameters: [theta_0 (bias), theta_1 (weight for x1)] [m,b]
        self.theta = None

    def _prepare_features(self, X):
        """
        Prepares the feature matrix X for calculations.
        Adds a column of ones for the bias term (theta_0).
        For a single feature X (1D array), it converts it to a 2D array:
        [[1, x1],
         [1, x2],
         ...]

        for many rows of data we have to make the X array into 2d matrix for multiplying it with [m,b]
        """
        if X.ndim == 1: # If X is a 1D array (e.g., np.array([1, 2, 3]))
            X = X.reshape(-1, 1) # Reshape to a column vector: [[1], [2], [3]]

        # Add a column of ones to the left of X for the bias term (theta_0)
        # np.c_ concatenates arrays column-wise
        # This transforms X into a design matrix where the first column is all ones.
        '''this thing makes the X_b matrix as :
        [[1,x1],
        [1,x2],
        ....]'''
        X_b = np.c_[np.ones((X.shape[0], 1)), X]
        return X_b



    def initialize_parameters(self, num_features):
        """
        num_features tells how many variables is there in your eqn
        Initializes theta with zeros.
        num_features is the number of 'x' variables (excluding the bias term).
        So, theta will have size (num_features + 1) because it includes theta_0.
        +1 is adding the y-intersect to the self.theta
        """
        # theta will be [theta_0, theta_1] for single variable LR
        self.theta = np.zeros(num_features + 1)
    
    def cost_function(self, targets, predicted):
        m = len(predicted)
        sum_of_squared_errors = np.sum((predicted - targets)**2)
        return sum_of_squared_errors / (2 * m)
    
    def fit(self, X, y, alpha, iterations):
        # implementing everything from scratch
        X_b = self._prepare_features(X)
        m = len(y)
        num_features = 1 if X.ndim == 1 else X.shape[1]
        self.initialize_parameters(num_features=num_features)
        cost_hist = []

        for i in range(iterations):
            # the predicted error
            # NumPy correctly treats self.theta as a column vector of 
            # shape (2, 1) for the purpose of the dot product.
            predicted = X_b.dot(self.theta)
            errors = predicted - y

            # derivative is just sum of errors in case of b and sum of 
            # errors multiplied by each sample in case of m
            derivatives = (1/m) * X_b.T.dot(errors)

            # updating self.theta
            self.theta = self.theta - (alpha)*(derivatives)
            curr_cost = self.cost_function(y, predicted)
            cost_hist.append(curr_cost)
        print(self.theta)
        return cost_hist
    
    def predict(self, x):
        x = np.array(x)
        X_mod = self._prepare_features(x)
        predictions = X_mod.dot(self.theta)
        if x.ndim == 0:
            return predictions[0]
        else:
            return predictions


In [55]:
def generate_synthetic_data(num_samples=100,num_features=1, bias=2.0, weights= None, noise_std=1.0, random_seed=None):
    """
    Generates synthetic linear data: y = bias + weight * x + noise.

    Args:
        num_samples (int): Number of data points to generate.
        bias (float): The true intercept (theta_0).
        weight (float): The true slope (theta_1).
        noise_std (float): Standard deviation of the random noise.
        random_seed (int, optional): An integer seed for reproducibility. Defaults to None.

    Returns:
        tuple: (X, y) where X is a NumPy array of features and y is a NumPy array of target values.
    """
    if random_seed is not None:
        np.random.seed(random_seed)

    # Generate random x values between 0 and 10
    X = np.random.rand(num_samples, num_features) * 10
    # This is generating an array of weights for our regression 
    if weights is None:
        weights = np.random.rand(num_features) * 2
    # Generate noise from a normal distribution
    noise = np.random.randn(num_samples) * noise_std

    # Generate y values based on the linear equation and add noise
    y = bias + X.dot(weights) + noise

    return X, y

def MSE(predicted, target):
    total_squared_error = 0
    for i in range(len(predicted)):
        error = (predicted[i] - target[i])**2
        total_squared_error += error
    return total_squared_error/len(predicted)

In [56]:
class FeatureScaler:
    def __init__(self, dataset):
        self.mean = np.mean(dataset, axis=0)
        self.std = np.std(dataset, axis=0)
        self.range = np.max(dataset, axis=0) - np.min(dataset, axis=0)
    
    def mean_normalization(self, dataset):
        self.range[self.range==0] = 1
        normalized = (dataset-self.mean)/self.range
        return normalized
    
    def Z_score(self, dataset):
        self.std[self.std == 0] = 1
        normalized = (dataset-self.mean)/self.std
        return normalized

In [57]:
# --- Generate some synthetic data ---
features, targets = generate_synthetic_data(num_samples=50, num_features=3, random_seed=42)

print("--- Generated Data ---")
print("First 5 features (X):", features[:5])
print("First 5 targets (y):", targets[:5])
print("Shape of X:", features.shape) # Should be (50,)
print("Shape of y:", targets.shape) # Should be (50,)
print("-" * 20)

# --- Instantiate the Linear Regression model ---
model = LinearRegression()

# --- Initialize parameters (theta) ---
# For single variable linear regression, we have 1 feature (the 'x' variable).
# So, we need 1 parameter for 'x' (theta_1) + 1 parameter for the bias term (theta_0).
# Total parameters = 1 + 1 = 2.

print("\n--- Model Initialization ---")
print("Initial theta (model parameters):", model.theta) # Should be [0. 0.]
print("-" * 20)

--- Generated Data ---
First 5 features (X): [[3.74540119 9.50714306 7.31993942]
 [5.98658484 1.5601864  1.5599452 ]
 [0.58083612 8.66176146 6.01115012]
 [7.08072578 0.20584494 9.69909852]
 [8.32442641 2.12339111 1.81824967]]
First 5 targets (y): [15.81980293 14.06701995  9.71442967 16.62189525 17.89052623]
Shape of X: (50, 3)
Shape of y: (50,)
--------------------

--- Model Initialization ---
Initial theta (model parameters): None
--------------------


for alpha = 0.01 after 996 iterations the minimum cost function is 0.4127 where b = 2.00397958 and m = 2.99253883

In [59]:
featureScaler = FeatureScaler(dataset=features)
mean_nor = featureScaler.mean_normalization(dataset=features)
mean_nor[:5]

array([[-0.04889595,  0.47466268,  0.24411636],
       [ 0.1794782 , -0.36080586, -0.35404014],
       [-0.37136167,  0.38578718,  0.10820289],
       [ 0.29096997, -0.50318863,  0.49118423],
       [ 0.4177017 , -0.3015958 , -0.32721607]])

In [None]:
model.fit(X=features, y=targets, alpha=0.0691, iterations=200)

[-1.21589269e+126 -5.54259903e+126 -7.13430695e+126 -6.66957547e+126]


[np.float64(106.11010684566384),
 np.float64(1844.2039143783443),
 np.float64(34185.86610961393),
 np.float64(634310.5102452328),
 np.float64(11769765.88465767),
 np.float64(218390706.11495525),
 np.float64(4052289838.0752215),
 np.float64(75191171193.77663),
 np.float64(1395189498312.8916),
 np.float64(25888062458811.37),
 np.float64(480358961045777.5),
 np.float64(8913171150761270.0),
 np.float64(1.6538594344072637e+17),
 np.float64(3.0687742695755346e+18),
 np.float64(5.694181332275074e+19),
 np.float64(1.056568460127069e+21),
 np.float64(1.9604871109527183e+22),
 np.float64(3.637728985161541e+23),
 np.float64(6.749889910295648e+24),
 np.float64(1.2524576181171381e+26),
 np.float64(2.3239639550075984e+27),
 np.float64(4.3121686403199615e+28),
 np.float64(8.001328223052459e+29),
 np.float64(1.4846648791607904e+31),
 np.float64(2.75482987569859e+32),
 np.float64(5.1116502791736105e+33),
 np.float64(9.484784816321798e+34),
 np.float64(1.7599236665007596e+36),
 np.float64(3.265578894925