In [225]:
import numpy as np

# Linear Regression Model
* __init__ is initializing the class and setting the parameters [m,b] b  - bias term
* _prepare_featues is modifying the input array for the 2D multiplication. Adding the column of 1s in front of the column of Xs for the multiplication with self.theta [m,b]
* hypothesis is returning the our predicted values.
* initialize parameters is setting the self.theta i.e. our parameters [m,b]
* in generate_synthetic_data we are creating our own data with some noise in it.
* the cost function is a way of computing error between predicted and original y column
* in linear regression we often use cost function to understand the differences.
* it is defined as sum of square of diff btw predicted and original y values divided by twice the number of samples.
* for a good linear regression model we tend to minimize the cost function and our cost function depends on 2 things m,b.
* we can minimize it by using gradient descent algorithim. this algo subsequntly minimizes the parameters (here m and b) until the cost function is reached to its minimum value.
* all the parameters are minimized by formula t = t - (alpha)*(partial derivative of J(m,b) wrt t) where t can be m or b and alpha will be figured out by ourselves (for now).

In [226]:
class LinearRegression:
    def __init__(self):
        # theta will store our parameters: [theta_0 (bias), theta_1 (weight for x1)] [m,b]
        self.theta = None

    def _prepare_features(self, X):
        """
        Prepares the feature matrix X for calculations.
        Adds a column of ones for the bias term (theta_0).
        For a single feature X (1D array), it converts it to a 2D array:
        [[1, x1],
         [1, x2],
         ...]

        for many rows of data we have to make the X array into 2d matrix for multiplying it with [m,b]
        """
        if X.ndim == 1: # If X is a 1D array (e.g., np.array([1, 2, 3]))
            X = X.reshape(-1, 1) # Reshape to a column vector: [[1], [2], [3]]

        # Add a column of ones to the left of X for the bias term (theta_0)
        # np.c_ concatenates arrays column-wise
        # This transforms X into a design matrix where the first column is all ones.
        '''this thing makes the X_b matrix as :
        [[1,x1],
        [1,x2],
        ....]'''
        X_b = np.c_[np.ones((X.shape[0], 1)), X]
        return X_b

    def hypothesis(self, X):
        """
        Calculates the predicted output y_hat using the current theta values.
        h_theta(x) = theta_0 * 1 + theta_1 * x_1

        Args:
            X (np.array): Feature array (can be 1D for single feature).

        Returns:
            np.array: Predicted y values.
        """
        if self.theta is None:
            raise ValueError("Model parameters (theta) not initialized. Call initialize_parameters() first.")

        # Prepare X by adding a bias term (column of ones)
        X_prepared = self._prepare_features(X)

        # Perform dot product: (Number of samples x Number of features) . (Number of features x 1)
        # This gives (Number of samples x 1) -> predicted y for each sample
        # Remember: dot product for matrix A (m,n) and vector B (n,) is A.dot(B) which gives (m,)
        return X_prepared.dot(self.theta)

    def initialize_parameters(self, num_features):
        """
        num_features tells how many variables is there in your eqn
        Initializes theta with zeros.
        num_features is the number of 'x' variables (excluding the bias term).
        So, theta will have size (num_features + 1) because it includes theta_0.
        +1 is adding the y-intersect to the self.theta
        """
        # theta will be [theta_0, theta_1] for single variable LR
        self.theta = np.zeros(num_features + 1)
    
    def cost_function(self, targets, predicted):
        m = len(predicted)
        sum_of_squared_errors = np.sum((predicted - targets)**2)
        return sum_of_squared_errors / (2 * m)
    
    def fit(self, X, y, alpha, iterations):
        # implementing everything from scratch
        X_b = self._prepare_features(X)
        m = len(y)
        num_features = 1 if X.ndim == 1 else X.shape[1]
        self.initialize_parameters(num_features=num_features)
        cost_hist = []

        for i in range(iterations):
            # the predicted error
            # NumPy correctly treats self.theta as a column vector of 
            # shape (2, 1) for the purpose of the dot product.
            predicted = X_b.dot(self.theta)
            errors = predicted - y

            # derivative is just sum of errors in case of b and sum of 
            # errors multiplied by each sample in case of m
            derivatives = (1/m) * X_b.T.dot(errors)

            # updating self.theta
            self.theta = self.theta - (alpha)*(derivatives)
            curr_cost = self.cost_function(y, self.hypothesis(X))
            cost_hist.append(curr_cost)
        print(self.theta)
        return cost_hist
    
    def predict(self, x):
        x = np.array(x)
        X_mod = self._prepare_features(x)
        predictions = X_mod.dot(self.theta)
        if x.ndim == 0:
            return predictions[0]
        else:
            return predictions


In [227]:
def generate_synthetic_data(num_samples=100, bias=2.0, weight=3.0, noise_std=1.0, random_seed=None):
    """
    Generates synthetic linear data: y = bias + weight * x + noise.

    Args:
        num_samples (int): Number of data points to generate.
        bias (float): The true intercept (theta_0).
        weight (float): The true slope (theta_1).
        noise_std (float): Standard deviation of the random noise.
        random_seed (int, optional): An integer seed for reproducibility. Defaults to None.

    Returns:
        tuple: (X, y) where X is a NumPy array of features and y is a NumPy array of target values.
    """
    if random_seed is not None:
        np.random.seed(random_seed)

    # Generate random x values between 0 and 10
    X = np.random.rand(num_samples) * 10

    # Generate noise from a normal distribution
    noise = np.random.randn(num_samples) * noise_std

    # Generate y values based on the linear equation and add noise
    y = bias + weight * X + noise

    return X, y

def MSE(predicted, target):
    total_squared_error = 0
    for i in range(len(predicted)):
        error = (predicted[i] - target[i])**2
        total_squared_error += error
    return total_squared_error/len(predicted)

In [228]:
# --- Generate some synthetic data ---
features, targets = generate_synthetic_data(num_samples=50, random_seed=42)

print("--- Generated Data ---")
print("First 5 features (X):", features[:5])
print("First 5 targets (y):", targets[:5])
print("Shape of X:", features.shape) # Should be (50,)
print("Shape of y:", targets.shape) # Should be (50,)
print("-" * 20)

# --- Instantiate the Linear Regression model ---
model = LinearRegression()

# --- Initialize parameters (theta) ---
# For single variable linear regression, we have 1 feature (the 'x' variable).
# So, we need 1 parameter for 'x' (theta_1) + 1 parameter for the bias term (theta_0).
# Total parameters = 1 + 1 = 2.
model.initialize_parameters(num_features=1)

print("\n--- Model Initialization ---")
print("Initial theta (model parameters):", model.theta) # Should be [0. 0.]
print("-" * 20)

# --- Get predictions with initial (zero) theta ---
print("\n--- Initial Predictions ---")
initial_predictions = model.hypothesis(features)
print("First 5 initial predictions (should be close to 0):", initial_predictions[:5])
print("Shape of predictions:", initial_predictions.shape) # Should be (50,)
print("-" * 20)

# You'll notice the initial predictions are all close to zero because theta is initialized to zeros.
# This is expected before the model has been trained!
print(MSE(targets, initial_predictions))

--- Generated Data ---
First 5 features (X): [3.74540119 9.50714306 7.31993942 5.98658484 1.5601864 ]
First 5 targets (y): [13.97467015 30.69279747 23.84416997 19.65865083  5.20203722]
Shape of X: (50,)
Shape of y: (50,)
--------------------

--- Model Initialization ---
Initial theta (model parameters): [0. 0.]
--------------------

--- Initial Predictions ---
First 5 initial predictions (should be close to 0): [0. 0. 0. 0. 0.]
Shape of predictions: (50,)
--------------------
309.720348361245


In [229]:
print(model.cost_function(targets=targets, predicted=initial_predictions))
print(features[:5])
print(targets[:5])

154.8601741806225
[3.74540119 9.50714306 7.31993942 5.98658484 1.5601864 ]
[13.97467015 30.69279747 23.84416997 19.65865083  5.20203722]


for alpha = 0.01 after 996 iterations the minimum cost function is 0.4127 where b = 2.00397958 and m = 2.99253883

In [230]:
model.fit(X=features, y=targets, alpha=0.0691, iterations=900)

[2.09667159 2.97754661]


[np.float64(151.3633364156314),
 np.float64(147.945765823507),
 np.float64(144.6056618282749),
 np.float64(141.3412648991136),
 np.float64(138.15085560911407),
 np.float64(135.03275371583743),
 np.float64(131.98531726316037),
 np.float64(129.00694170390722),
 np.float64(126.09605904277804),
 np.float64(123.25113699909943),
 np.float64(120.47067818892785),
 np.float64(117.75321932605605),
 np.float64(115.09733044147542),
 np.float64(112.50161412086437),
 np.float64(109.96470475967838),
 np.float64(107.48526783543073),
 np.float64(105.06199919676163),
 np.float64(102.69362436890269),
 np.float64(100.37889787515294),
 np.float64(98.11660257399377),
 np.float64(95.905549011476),
 np.float64(93.74457478852412),
 np.float64(91.63254394280776),
 np.float64(89.56834634484265),
 np.float64(87.5508971079886),
 np.float64(85.57913601202074),
 np.float64(83.65202693995747),
 np.float64(81.76855732783808),
 np.float64(79.92773762714762),
 np.float64(78.1286007795948),
 np.float64(76.3702017039565),

In [236]:
model.predict([1.02])

array([5.13376913])