In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
training_set = pd.read_csv("housepricing.csv")

X = training_set[
    [
        "bedrooms",
        "bathrooms",
        "sqft_living",
        "sqft_lot",
        "floors",
        "waterfront",
        "view",
        "condition",
        "sqft_above",
        "sqft_basement",
        "yr_built",
        "yr_renovated"
    ]
].values


y = training_set["price"].values


In [12]:
def normalize(X):
    mu = X.mean(axis=0)
    sigma = X.std(axis=0)
    X_norm = (X - mu) / sigma
    return X_norm, mu, sigma


In [13]:
X_norm, mu, sigma = normalize(X)

In [14]:
def compute_cost(X, y, w, b):
    m = len(y)
    predictions = X @ w + b
    cost = (1 / (2 * m)) * np.sum((predictions - y) ** 2)
    return cost

In [15]:
def compute_gradient(X, y, w, b):
    m = len(y)
    predictions = X @ w + b
    error = predictions - y

    dj_dw = (1 / m) * (X.T @ error)
    dj_db = (1 / m) * np.sum(error)

    return dj_dw, dj_db


In [16]:
def gradient_descent(X, y, alpha, iterations):
    n = X.shape[1]
    w = np.zeros(n)
    b = 0

    for i in range(iterations):
        dj_dw, dj_db = compute_gradient(X, y, w, b)

        w -= alpha * dj_dw
        b -= alpha * dj_db

        if i % 100 == 0:
            cost = compute_cost(X, y, w, b)
            print(f"Iter {i} | Cost {cost:.2f}")

    return w, b


In [17]:
alpha = 0.01
iterations = 5000

w, b = gradient_descent(X_norm, y, alpha, iterations)

print("Pesos:", w)
print("Bias:", b)

Iter 0 | Cost 306313302934.34
Iter 100 | Cost 146744619250.79
Iter 200 | Cost 127861829184.56
Iter 300 | Cost 125013612066.95
Iter 400 | Cost 124498073438.24
Iter 500 | Cost 124373577551.93
Iter 600 | Cost 124333636789.56
Iter 700 | Cost 124318414576.81
Iter 800 | Cost 124312150600.65
Iter 900 | Cost 124309490445.30
Iter 1000 | Cost 124308343933.82
Iter 1100 | Cost 124307845164.83
Iter 1200 | Cost 124307626436.61
Iter 1300 | Cost 124307529708.82
Iter 1400 | Cost 124307486519.31
Iter 1500 | Cost 124307467011.11
Iter 1600 | Cost 124307458074.45
Iter 1700 | Cost 124307453909.42
Iter 1800 | Cost 124307451927.39
Iter 1900 | Cost 124307450960.68
Iter 2000 | Cost 124307450475.75
Iter 2100 | Cost 124307450224.94
Iter 2200 | Cost 124307450091.03
Iter 2300 | Cost 124307450017.28
Iter 2400 | Cost 124307449975.47
Iter 2500 | Cost 124307449951.16
Iter 2600 | Cost 124307449936.72
Iter 2700 | Cost 124307449928.00
Iter 2800 | Cost 124307449922.65
Iter 2900 | Cost 124307449919.34
Iter 3000 | Cost 12430

In [None]:
def predict(X, w, b):
    return X @ w + b

In [None]:
y_pred = predict(X_norm, w, b)

In [20]:
def rmse(y_true, y_pred):
    error = y_pred - y_true
    return np.sqrt(np.mean(error ** 2))

print("RMSE:", rmse(y, y_pred))

RMSE: 498612.97599191323
