In [4]:
# Multivariable Linear Regression using Gradient Descent in Core Python

def read_csv(filename):
    with open(filename, 'r') as f:
        lines = f.readlines()
    headers = lines[0].strip().split(',')
    data = []
    for line in lines[1:]:
        parts = line.strip().split(',')
        if '' in parts:
            continue  # skip rows with missing values
        try:
            row = [float(parts[i]) for i in range(len(parts)) if headers[i] != 'ocean_proximity']
            data.append(row)
        except ValueError:
            continue
    return headers, data

def normalize_features(data):
    # Transpose to get columns
    cols = list(zip(*data))
    means = [sum(col)/len(col) for col in cols[:-1]]  # exclude target
    stds = []
    for i, col in enumerate(cols[:-1]):
        mean = means[i]
        std = (sum((x - mean) ** 2 for x in col) / len(col)) ** 0.5
        stds.append(std)
    norm_data = []
    for row in data:
        norm_row = [(row[i] - means[i]) / stds[i] if stds[i] != 0 else 0 for i in range(len(row)-1)]
        norm_row.insert(0, 1.0)  # bias term
        norm_row.append(row[-1])  # target
        norm_data.append(norm_row)
    return norm_data, means, stds

def gradient_descent(data, alpha=0.01, epochs=1000):
    m = len(data)
    n = len(data[0]) - 1  # number of features including bias
    theta = [0.0] * n
    for epoch in range(epochs):
        gradients = [0.0] * n
        cost = 0.0
        for row in data:
            x = row[:-1]
            y = row[-1]
            prediction = sum(theta[i] * x[i] for i in range(n))
            error = prediction - y
            for i in range(n):
                gradients[i] += error * x[i]
            cost += error ** 2
        for i in range(n):
            theta[i] -= (alpha / m) * gradients[i]
        if epoch % 100 == 0:
            print(f"Epoch {epoch}, Cost: {cost / (2 * m)}")
    return theta, cost / (2 * m)

def main():
    headers, data = read_csv("housing.csv")
    processed_data, means, stds = normalize_features(data)
    theta, final_cost = gradient_descent(processed_data)
    print("Trained parameters (theta):", theta)
    print("Final cost:", final_cost)

if __name__ == "__main__":
    main()


Epoch 0, Cost: 28058813258.967407
Epoch 100, Cost: 6423143894.967642
Epoch 200, Cost: 3415891716.608749
Epoch 300, Cost: 2935810521.9377365
Epoch 400, Cost: 2812114569.348062
Epoch 500, Cost: 2746928344.2381597
Epoch 600, Cost: 2697554198.0897646
Epoch 700, Cost: 2656723271.8183813
Epoch 800, Cost: 2622351769.5012493
Epoch 900, Cost: 2593280461.114898
Trained parameters (theta): [206855.4825604294, -43359.465322032, -48633.53602124988, 20382.5867714799, -4813.181463459487, 22149.379808792553, -32437.14629891768, 22518.890781989725, 78364.03535757643]
Final cost: 2568861340.592311
