In [1]:
import pandas as pd
import numpy as np
import matplotlib as plt
import urllib.request

print(f"Pandas Version {pd.__version__}")
print(f"Numpy Version {np.__version__}")
print(f"Matlplotlib Version {plt.__version__}")

Pandas Version 2.2.3
Numpy Version 1.26.4
Matlplotlib Version 3.10.0


In [36]:
url = "https://lib.stat.cmu.edu/datasets/boston"
raw = urllib.request.urlopen(url).read().decode('utf-8')

lines = raw.split("\n")[22:]
X_list = []
y_list = []
for i in range(0, len(lines)-1, 2):
    line1 = lines[i].strip().split()
    line2 = lines[i+1].strip().split()
    features = list(map(float, line1[:13])) + list(map(float, line2[:2]))
    target = float(line2[-1])

    X_list.append(features)
    y_list.append(target)

X = np.array(X_list)
X_mean = X.mean(axis=0)
X_std = X.std(axis=0)

X_scaled = (X - X_mean) / X_std
y = np.array(y_list)

print("X shape:", X.shape)   # (506, 13)
print("y shape:", y.shape)   # (506,)
print(X)
print("First sample:", X[0], y[0])
print("Last sample:", X[505], y[505])

X shape: (506, 13)
y shape: (506,)
[[6.3200e-03 1.8000e+01 2.3100e+00 ... 1.5300e+01 3.9690e+02 4.9800e+00]
 [2.7310e-02 0.0000e+00 7.0700e+00 ... 1.7800e+01 3.9690e+02 9.1400e+00]
 [2.7290e-02 0.0000e+00 7.0700e+00 ... 1.7800e+01 3.9283e+02 4.0300e+00]
 ...
 [6.0760e-02 0.0000e+00 1.1930e+01 ... 2.1000e+01 3.9690e+02 5.6400e+00]
 [1.0959e-01 0.0000e+00 1.1930e+01 ... 2.1000e+01 3.9345e+02 6.4800e+00]
 [4.7410e-02 0.0000e+00 1.1930e+01 ... 2.1000e+01 3.9690e+02 7.8800e+00]]
First sample: [6.320e-03 1.800e+01 2.310e+00 0.000e+00 5.380e-01 6.575e+00 6.520e+01
 4.090e+00 1.000e+00 2.960e+02 1.530e+01 3.969e+02 4.980e+00] 24.0
Last sample: [4.741e-02 0.000e+00 1.193e+01 0.000e+00 5.730e-01 6.030e+00 8.080e+01
 2.505e+00 1.000e+00 2.730e+02 2.100e+01 3.969e+02 7.880e+00] 11.9


In [37]:
class LinearRegression:
    def __init__(self, lr=0.01, epochs=1000):
        self.lr = lr
        self.epochs = epochs
        self.w = None
        self.b = None
        self.cost_history = []

    def predict(self, X):
        return np.dot(X, self.w) + self.b

    def compute_cost(self, X, Y):
        m = len(Y)
        y_pred = self.predict(X)
        cost = (1/(2*m)) * np.sum((y_pred - Y) ** 2)
        return cost

    def fit(self, X, Y):
        m, n = X.shape
        self.w = np.zeros(n)
        self.b = 0

        for i in range(self.epochs):
            y_pred = self.predict(X)

            dw = (1/m) * np.dot(X.T, (y_pred - Y))
            db = (1/m) * np.sum(y_pred - Y)

            self.w -= self.lr * dw
            self.b -= self.lr * db

            if i % 100 == 0:
                cost = self.compute_cost(X, Y)
                self.cost_history.append(cost)
                print(f"Epoch {i}, Cost = {cost:.4f}")

    def get_params(self):
        return self.w, self.b

In [44]:
model = LinearRegression(lr = 0.1, epochs = 1000)

In [45]:
model.fit(X_scaled, y)

Epoch 0, Cost = 231.2302
Epoch 100, Cost = 11.0785
Epoch 200, Cost = 10.9764
Epoch 300, Cost = 10.9553
Epoch 400, Cost = 10.9496
Epoch 500, Cost = 10.9480
Epoch 600, Cost = 10.9476
Epoch 700, Cost = 10.9475
Epoch 800, Cost = 10.9474
Epoch 900, Cost = 10.9474


In [46]:
model.get_params()

(array([-0.92788055,  1.08110107,  0.13944929,  0.68194728, -2.05646608,
         2.67449347,  0.01924331, -3.10414993,  2.65855739, -2.07262027,
        -2.06047144,  0.84924264, -3.74348596]),
 22.532806324110666)