In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [16]:
class MultipleLR():
    def __init__(self):
        self.coef_ = None
        self.intercept_ = None

    def fit(self, X_train, y_train):
        X_train = np.array(X_train)
        y_train = np.array(y_train).reshape(-1, 1)  # force (m,1)
        
        # Add bias column
        X_train_bias = np.insert(X_train, 0, 1, axis=1)   # (m, n+1)

        #calculate the coefficients
        betas = np.linalg.inv(X_train_bias.T.dot(X_train_bias)).dot(X_train_bias.T).dot(y_train)
        self.intercept_  = betas[0,0]
        self.coef_ = betas[1:]
        
    def predict(self, X_test):
        y_pred =  self.intercept_ + np.dot(X_test, self.coef_)
        return y_pred

In [17]:
def generate_lr_data(
    n_samples=200,
    n_features=1,
    noise_std=1.0,
    weight_range=(-5, 5),
    bias_range=(-3, 3),
    x_range=(-10, 10),
    add_outliers=False,
    outlier_ratio=0.05,
    outlier_strength=15,
    seed=42
):
    """
    Generate synthetic Linear Regression data:
        y = Xw + b + noise

    Returns:
        X : (n_samples, n_features)
        y : (n_samples, 1)
        true_w : (n_features, 1)
        true_b : float
    """
    rng = np.random.default_rng(seed)

    # Features
    X = rng.uniform(x_range[0], x_range[1], size=(n_samples, n_features))

    # True weights and bias
    true_w = rng.uniform(weight_range[0], weight_range[1], size=(n_features, 1))
    true_b = rng.uniform(bias_range[0], bias_range[1])

    # Perfect line/plane
    y_clean = X @ true_w + true_b

    # Add Gaussian noise
    noise = rng.normal(0, noise_std, size=(n_samples, 1))
    y = y_clean + noise

    # Optionally add outliers
    if add_outliers:
        n_outliers = int(n_samples * outlier_ratio)
        outlier_indices = rng.choice(n_samples, n_outliers, replace=False)

        # Make y much bigger/smaller randomly
        y[outlier_indices] += rng.normal(0, outlier_strength, size=(n_outliers, 1))

    return X, y, true_w, true_b


def train_test_split_manual(X, Y, test_size=0.2, random_state=42):
    rng = np.random.default_rng(random_state)

    X = np.array(X)
    Y = np.array(Y).reshape(-1, 1)

    n_samples = X.shape[0]

    indices = np.arange(n_samples)
    rng.shuffle(indices)

    test_count = int(n_samples * test_size)

    test_idx = indices[:test_count]
    train_idx = indices[test_count:]

    return X[train_idx], X[test_idx], Y[train_idx], Y[test_idx]



In [18]:
X, y, w_true, b_true = generate_lr_data(n_samples=150, n_features=2, noise_std=2.0, seed=7)
X_train, X_test, y_train, y_test = train_test_split_manual(X, y, test_size=0.2, random_state=42)

# w, b = train_lr_gd(X_train, y_train, lr=0.01, epochs=3000)

print("True weight:", w_true.ravel())
print("True bias:", b_true)
print("X shape:", X.shape, "y shape:", y.shape)



True weight: [ 1.41229111 -3.15554358]
True bias: 1.556966791684835
X shape: (150, 2) y shape: (150, 1)


In [19]:
mlr = MultipleLR()
mlr.fit(X_train,y_train)
y_pred = mlr.predict(X_test)
print(y_pred)

[[  2.85671019]
 [ 21.96446505]
 [-24.90601783]
 [-32.28295235]
 [-18.74126166]
 [ 13.88567625]
 [ 35.21118605]
 [  8.6488759 ]
 [-26.65038907]
 [ 24.41956161]
 [ 18.16125181]
 [ 11.47626195]
 [-21.92976131]
 [ -5.38750906]
 [ 16.28640085]
 [ -8.53215424]
 [-27.6621763 ]
 [-28.96977171]
 [ -4.52569496]
 [ -7.61495937]
 [-30.39379762]
 [ -7.15098076]
 [  9.80195253]
 [ 10.13217749]
 [-20.41255441]
 [ -4.7441203 ]
 [  3.39101186]
 [ 18.8826027 ]
 [  5.55497181]
 [ -6.60512867]]


In [20]:
print(f"Coefficients: {mlr.coef_}\nIntercetp: {mlr.intercept_}")

Coefficients: [[ 1.34674232]
 [-3.17471082]]
Intercetp: 1.4387974429185022
