In [196]:
import numpy as np

# Linear Regression Model
* __init__ is initializing the class and setting the parameters [m,b] b  - bias term
* _prepare_featues is modifying the input array for the 2D multiplication. Adding the column of 1s in front of the column of Xs for the multiplication with self.theta [m,b]
* hypothesis is returning the our predicted values.
* initialize parameters is setting the self.theta i.e. our parameters [m,b]
* in generate_synthetic_data we are creating our own data with some noise in it.
* the cost function is a way of computing error between predicted and original y column
* in linear regression we often use cost function to understand the differences.
* it is defined as sum of square of diff btw predicted and original y values divided by twice the number of samples.
* for a good linear regression model we tend to minimize the cost function and our cost function depends on 2 things m,b.
* we can minimize it by using gradient descent algorithim. this algo subsequntly minimizes the parameters (here m and b) until the cost function is reached to its minimum value.
* all the parameters are minimized by formula t = t - (alpha)*(partial derivative of J(m,b) wrt t) where t can be m or b and alpha will be figured out by ourselves (for now).

In [197]:
class LinearRegression:
    def __init__(self):
        # theta will store our parameters: [theta_0 (bias), theta_1 (weight for x1)] [m,b]
        self.theta = None

    def _prepare_features(self, X):
        """
        Prepares the feature matrix X for calculations.
        Adds a column of ones for the bias term (theta_0).
        For a single feature X (1D array), it converts it to a 2D array:
        [[1, x1],
         [1, x2],
         ...]

        for many rows of data we have to make the X array into 2d matrix for multiplying it with [m,b]
        """
        if X.ndim == 1: # If X is a 1D array (e.g., np.array([1, 2, 3]))
            X = X.reshape(-1, 1) # Reshape to a column vector: [[1], [2], [3]]

        # Add a column of ones to the left of X for the bias term (theta_0)
        # np.c_ concatenates arrays column-wise
        # This transforms X into a design matrix where the first column is all ones.
        '''this thing makes the X_b matrix as :
        [[1,x1],
        [1,x2],
        ....]'''
        X_b = np.c_[np.ones((X.shape[0], 1)), X]
        return X_b



    def initialize_parameters(self, num_features):
        """
        num_features tells how many variables is there in your eqn
        Initializes theta with zeros.
        num_features is the number of 'x' variables (excluding the bias term).
        So, theta will have size (num_features + 1) because it includes theta_0.
        +1 is adding the y-intersect to the self.theta
        """
        # theta will be [theta_0, theta_1] for single variable LR
        self.theta = np.zeros(num_features + 1)
    
    def cost_function(self, targets, predicted):
        m = len(predicted)
        sum_of_squared_errors = np.sum((predicted - targets)**2)
        return sum_of_squared_errors / (2 * m)
    
    def fit(self, X, y, alpha, iterations):
        # implementing everything from scratch
        X_b = self._prepare_features(X)
        m = len(y)
        num_features = 1 if X.ndim == 1 else X.shape[1]
        self.initialize_parameters(num_features)
        cost_hist = []

        for i in range(iterations):
            # the predicted error
            # NumPy correctly treats self.theta as a column vector of 
            predicted = X_b.dot(self.theta)
            errors = predicted - y

            # derivative is just sum of errors in case of b and sum of 
            # errors multiplied by each sample in case of m
            derivatives = (1/m) * X_b.T.dot(errors)

            # updating self.theta
            self.theta = self.theta - (alpha)*(derivatives)
            curr_cost = self.cost_function(y, predicted)
            cost_hist.append(curr_cost)
        print(self.theta)
        return cost_hist
    
    def predict(self, x):
        x = np.array(x)
        X_mod = self._prepare_features(x)
        predictions = X_mod.dot(self.theta)
        if x.ndim == 0:
            return predictions[0]
        else:
            return predictions
    
    def fit_with_normal_equation(self,X,y):
        # normal equation gives the value of parameters by using matrix multiplication
        # the eqn is parameters = inv(Xt.X).Xt.y
        X = np.array(X)
        y = np.array(y)
        X_modi = self._prepare_features(X)
        
        try:
            dot_Xt_X = X_modi.T.dot(X_modi)
            
            inv_part = np.linalg.inv(dot_Xt_X)
            
            pre_result = X_modi.T.dot(y)
            
            self.theta = inv_part.dot(pre_result)

        except:
            print("the matrix is invertible")
            self.theta = None
    
    def predict_normal_equation(self, X):
        X = np.array(X)
        X_b = self._prepare_features(X)
        return X_b.dot(self.theta)


In [198]:
def generate_synthetic_data(num_samples=100,num_features=1, bias=2.0, weights= None, noise_std=1.0, random_seed=None):
    """
    Generates synthetic linear data: y = bias + weight * x + noise.

    Args:
        num_samples (int): Number of data points to generate.
        bias (float): The true intercept (theta_0).
        weight (float): The true slope (theta_1).
        noise_std (float): Standard deviation of the random noise.
        random_seed (int, optional): An integer seed for reproducibility. Defaults to None.

    Returns:
        tuple: (X, y) where X is a NumPy array of features and y is a NumPy array of target values.
    """
    if random_seed is not None:
        np.random.seed(random_seed)

    # Generate random x values between 0 and 10
    X = np.random.rand(num_samples, num_features) * 10
    # This is generating an array of weights for our regression 
    if weights is None:
        weights = np.random.rand(num_features) * 2
    # Generate noise from a normal distribution
    noise = np.random.randn(num_samples) * noise_std

    # Generate y values based on the linear equation and add noise
    y = bias + X.dot(weights) + noise

    return X, y

def MSE(predicted, target):
    total_squared_error = 0
    for i in range(len(predicted)):
        error = (predicted[i] - target[i])**2
        total_squared_error += error
    return total_squared_error/len(predicted)

In [199]:
class FeatureScaler:
    def __init__(self, dataset):
        self.mean = np.mean(dataset, axis=0)
        self.std = np.std(dataset, axis=0)
        self.range = np.max(dataset, axis=0) - np.min(dataset, axis=0)
    
    def mean_normalization(self, dataset):
        self.range[self.range==0] = 1
        normalized = (dataset-self.mean)/self.range
        return normalized
    
    def Z_score(self, dataset):
        self.std[self.std == 0] = 1
        normalized = (dataset-self.mean)/self.std
        return normalized

In [200]:
# --- Generate some synthetic data ---
features, targets = generate_synthetic_data(num_samples=50, num_features=3, random_seed=42)

print("First 5 features (X):", features[:5])
print("First 5 targets (y):", targets[:5])

# --- Instantiate the Linear Regression model ---
model = LinearRegression()

First 5 features (X): [[3.74540119 9.50714306 7.31993942]
 [5.98658484 1.5601864  1.5599452 ]
 [0.58083612 8.66176146 6.01115012]
 [7.08072578 0.20584494 9.69909852]
 [8.32442641 2.12339111 1.81824967]]
First 5 targets (y): [15.81980293 14.06701995  9.71442967 16.62189525 17.89052623]


for alpha = 0.01 after 996 iterations the minimum cost function is 0.4127 where b = 2.00397958 and m = 2.99253883

In [201]:
featureScaler = FeatureScaler(dataset=features)
mean_nor = featureScaler.mean_normalization(dataset=features)
mean_nor[:5]

array([[-0.04889595,  0.47466268,  0.24411636],
       [ 0.1794782 , -0.36080586, -0.35404014],
       [-0.37136167,  0.38578718,  0.10820289],
       [ 0.29096997, -0.50318863,  0.49118423],
       [ 0.4177017 , -0.3015958 , -0.32721607]])

In [202]:
model.fit(X=features, y=targets, alpha=0.01, iterations=2000)

[2.01339645 1.73160163 0.58900131 0.28691794]


[np.float64(106.11010684566384),
 np.float64(11.187562409788084),
 np.float64(5.320628335801655),
 np.float64(4.365665807288334),
 np.float64(3.777010377247039),
 np.float64(3.2931648420641992),
 np.float64(2.885983938851502),
 np.float64(2.542299863129527),
 np.float64(2.251744281873811),
 np.float64(2.005739114130239),
 np.float64(1.7971529223133162),
 np.float64(1.6200458646657256),
 np.float64(1.4694622979258696),
 np.float64(1.3412608377439883),
 np.float64(1.2319748573802307),
 np.float64(1.1386978029488963),
 np.float64(1.0589887692164484),
 np.float64(0.9907946374539652),
 np.float64(0.9323857697964787),
 np.float64(0.8823028151033675),
 np.float64(0.8393126351156903),
 np.float64(0.8023717274050339),
 np.float64(0.7705958198075469),
 np.float64(0.7432345531010816),
 np.float64(0.7196503653757012),
 np.float64(0.6993008515398736),
 np.float64(0.6817240016875763),
 np.float64(0.6665258282640325),
 np.float64(0.6533699786613234),
 np.float64(0.6419690007254976),
 np.float64(0.632

In [203]:
model.predict([[3.74540119, 9.50714306, 7.31993942], [5.98658484, 1.5601864, 1.5599452 ]])

array([16.19888088, 13.74630461])

In [204]:
model.fit_with_normal_equation(features, targets)

In [205]:
model.predict_normal_equation([[3.74540119, 9.50714306, 7.31993942],[5.98658484, 1.5601864,  1.5599452 ]])

array([16.13856651, 13.8440069 ])