Linear regression using

Boston Housing Data

dataset docs: https://www.kaggle.com/code/prasadperera/the-boston-housing-dataset/data

In [3]:
# import packages
import pandas as pd
import numpy as np
from pathlib import Path
import math

from sklearn.datasets import fetch_california_housing

import math, copy


In [4]:
housing = fetch_california_housing()

housing

{'data': array([[   8.3252    ,   41.        ,    6.98412698, ...,    2.55555556,
           37.88      , -122.23      ],
        [   8.3014    ,   21.        ,    6.23813708, ...,    2.10984183,
           37.86      , -122.22      ],
        [   7.2574    ,   52.        ,    8.28813559, ...,    2.80225989,
           37.85      , -122.24      ],
        ...,
        [   1.7       ,   17.        ,    5.20554273, ...,    2.3256351 ,
           39.43      , -121.22      ],
        [   1.8672    ,   18.        ,    5.32951289, ...,    2.12320917,
           39.43      , -121.32      ],
        [   2.3886    ,   16.        ,    5.25471698, ...,    2.61698113,
           39.37      , -121.24      ]]),
 'target': array([4.526, 3.585, 3.521, ..., 0.923, 0.847, 0.894]),
 'frame': None,
 'target_names': ['MedHouseVal'],
 'feature_names': ['MedInc',
  'HouseAge',
  'AveRooms',
  'AveBedrms',
  'Population',
  'AveOccup',
  'Latitude',
  'Longitude'],
 'DESCR': '.. _california_housing_dataset:\n

In [5]:
# np.random.seed(1)
np.random.randint(10)

8

In [30]:
# helpers - data clean

def load_data():
    housing  = fetch_california_housing()
    
    df = pd.DataFrame(data= np.c_[housing['data'], housing['target']],
                     columns= housing['feature_names'] + ['target'])
    return df
    

def train_test_split(df):
    n = len(df)
    
    # train test split (2/3 train, 1/3 test)
    n_train = round(2/3*n)

    train_df = df[:n_train]
    test_df = df[n_train:]
    
    return train_df, test_df



def df_to_input(df):
    m = len(df)
    X = df['AveBedrms'].values.reshape(1, m)
    Y = df['target'].values.reshape(1, m)
    
    return X, Y


def initial_rand(X):
    
    np.random.seed(1)
    
    m = X.shape[0]
    n = X.shape[1]
    
    w = np.random.randn(n).reshape(n,1) * 0.01
    b = np.random.randint(0,100) * 0.01 
    
    return w, b

def initial_zeros(X):
    
    np.random.seed(1)
    
    # m = number of training examples
    m = X.shape[0]
    
    # n = number of features
    n = X.shape[1]
    
    w = np.zeros(n).reshape(n,1)
    b = 0
    
    return w, b
    

In [31]:
# run

df = load_data()

train_df, test_df = train_test_split(df) 


In [32]:
# # show data
# m = number of training examples
m = train_df.values.shape[0]
# n = number of features
n = len(train_df.drop(columns='target').columns)



In [33]:
# X should be of the dimensions m, n

X = train_df.drop(columns='target').values.reshape(m,n)
Y = train_df['target'].values.reshape(m,1)

m, n, X.shape, Y.shape

(13760, 8, (13760, 8), (13760, 1))

In [35]:
w, b = initial_rand(X)

w, b = initial_zeros(X)
w,b

(array([[0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.]]),
 0)

In [39]:
def forward_prop(X, w, b):
    n = X.shape[0]
    # reshape step important for later functions
    Y_hat = np.dot(X, w) + b
    return Y_hat


def calculate_cost(X, Y, w, b):
    m = X.shape[0]
    Y_hat = forward_prop(X, w, b)
    cost = np.sum((Y_hat - Y)**2 ) / (2*m)
    return cost


def calculate_grads(X, Y, w, b):
    m, n = X.shape
    Y_hat = forward_prop(X, w, b)
    db = np.mean(Y_hat - Y)
    dw = np.sum(((Y_hat - Y) * X.T), axis=1) / m
    return db, dw

def gradient_descent(X, y, w_in, b_in, cost_function, gradient_function, alpha, num_iters): 
    """
    Performs batch gradient descent to learn theta. Updates theta by taking 
    num_iters gradient steps with learning rate alpha
    
    Args:
      X (ndarray (m,n))   : Data, m examples with n features
      y (ndarray (m,))    : target values
      w_in (ndarray (n,)) : initial model parameters  
      b_in (scalar)       : initial model parameter
      cost_function       : function to compute cost
      gradient_function   : function to compute the gradient
      alpha (float)       : Learning rate
      num_iters (int)     : number of iterations to run gradient descent
      
    Returns:
      w (ndarray (n,)) : Updated values of parameters 
      b (scalar)       : Updated value of parameter 
      """
    
    # An array to store cost J and w's at each iteration primarily for graphing later
    J_history = []
    w = copy.deepcopy(w_in)  #avoid modifying global w within function
    b = b_in
    
    for i in range(num_iters):

        # Calculate the gradient and update the parameters
        dj_db,dj_dw = gradient_function(X, y, w, b)   ##None
            
        # Update Parameters using w, b, alpha and gradient
        w = w - alpha * dj_dw               ##None
        b = b - alpha * dj_db               ##None
      
        # Save cost J at each iteration
        if i<100000:      # prevent resource exhaustion 
            J_history.append( cost_function(X, y, w, b))

        # Print cost every at intervals 10 times or as many iterations if < 10
        if i% math.ceil(num_iters / 10) == 0:
            print(f"Iteration {i:4d}: Cost {J_history[-1]:8.2f}   ")

        
    return w, b, J_history #return final w,b and J history for graphing

In [40]:

# initialize parameters
initial_w = np.zeros_like(w)
initial_b = 0.

# some gradient descent settings
iterations = 1000
alpha = 5.0e-7

# run gradient descent 
w_final, b_final, J_hist = gradient_descent(X, Y, initial_w, initial_b,
                                                    calculate_cost, calculate_grads, 
                                                    alpha, iterations)
print(f"b,w found by gradient descent: {b_final:0.2f},{w_final} ")
m,_ = X_train.shape
for i in range(m):
    print(f"prediction: {np.dot(X_train[i], w_final) + b_final:0.2f}, target value: {y_train[i]}")

ValueError: operands could not be broadcast together with shapes (13760,1) (8,13760) 