In [None]:
import numpy as np

**Load The Data**

In [None]:
from sklearn.datasets import fetch_california_housing
cal_data = fetch_california_housing("~/data/sklearn_datasets/")

#Print Data description
print(cal_data['DESCR'])

.. _california_housing_dataset:

California Housing dataset
--------------------------

**Data Set Characteristics:**

    :Number of Instances: 20640

    :Number of Attributes: 8 numeric, predictive attributes and the target

    :Attribute Information:
        - MedInc        median income in block
        - HouseAge      median house age in block
        - AveRooms      average number of rooms
        - AveBedrms     average number of bedrooms
        - Population    block population
        - AveOccup      average house occupancy
        - Latitude      house block latitude
        - Longitude     house block longitude

    :Missing Attribute Values: None

This dataset was obtained from the StatLib repository.
http://lib.stat.cmu.edu/datasets/

The target variable is the median house value for California districts.

This dataset was derived from the 1990 U.S. census, using one row per census
block group. A block group is the smallest geographical unit for which the U.S.
Census Bur

**Question 1: Rescale The Data** using the MaxMinScaler

In [None]:
X = cal_data['data']
y= cal_data['target']

#In order to find the best fit we want the MSE to be minimal for the dataset and the points outside the sample
#So we scale the data between 0 to 1 for gradient descent
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
X= scaler.fit_transform(X)

#Prints the transformed data
print(X)


[[0.53966842 0.78431373 0.0435123  ... 0.00149943 0.5674814  0.21115538]
 [0.53802706 0.39215686 0.03822395 ... 0.00114074 0.565356   0.21215139]
 [0.46602805 1.         0.05275646 ... 0.00169796 0.5642933  0.21015936]
 ...
 [0.08276438 0.31372549 0.03090386 ... 0.0013144  0.73219979 0.31175299]
 [0.09429525 0.33333333 0.03178269 ... 0.0011515  0.73219979 0.30179283]
 [0.13025338 0.29411765 0.03125246 ... 0.00154886 0.72582359 0.30976096]]


**Question 2: Train a Model using the sklearn *Linear Regression Model***

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size= 0.50, random_state= 42)

In [None]:
X_train_aug = np.concatenate([np.ones([np.shape(X_train)[0], 1]),X_train ], axis=1)
X_train_aug

array([[1.        , 0.23399677, 0.35294118, ..., 0.00177227, 0.16578108,
        0.65139442],
       [1.        , 0.45701439, 0.2745098 , ..., 0.00227299, 0.50478215,
        0.25796813],
       [1.        , 0.31133364, 0.2745098 , ..., 0.00150367, 0.54941552,
        0.24003984],
       ...,
       [1.        , 0.16789424, 0.68627451, ..., 0.00212431, 0.15834219,
        0.59462151],
       [1.        , 0.35994676, 0.2745098 , ..., 0.00200105, 0.53560043,
        0.23804781],
       [1.        , 0.14314285, 1.        , ..., 0.00113982, 0.55579171,
        0.19223108]])

In [None]:
def solve_normal_eq(X, y):
    n, m = np.shape(X)
    XTX = np.matmul(X.T, X)
    XTX_1=np.linalg.inv(XTX)
    XTY=np.matmul(X.T, y)
    return np.matmul(XTX_1,XTY)

w = solve_normal_eq(X_train_aug, y_train)
for i in range(len(w)):
  print(f'w{i}={w[i]:.5f}')

w0=3.67668
w1=6.40956
w2=0.49202
w3=-16.84340
w4=26.04256
w5=-0.01272
w6=-3.75773
w7=-3.98365
w8=-4.39237


In [None]:
from sklearn.linear_model import LinearRegression
reg = LinearRegression().fit(X_train, y_train)
print(f'intercept={reg.intercept_:.5f}')
for i in range(len(reg.coef_)):
  print(f'w{i+1}={reg.coef_[i]:.5f}')

intercept=3.67668
w1=6.40956
w2=0.49202
w3=-16.84340
w4=26.04256
w5=-0.01272
w6=-3.75773
w7=-3.98365
w8=-4.39237


**Question 3: Train a model using Gradient Descent**

In [None]:
def gradfn(weights, X, y):
    n, m = np.shape(X)
    yhat = np.matmul(X, weights)
    error = yhat - y
    return np.matmul(np.transpose(X), error)/float(n)

In [None]:
def solve_via_gradient_descent(X, y, print_every=100000,
                               niter=500000, eta=0.005):
    n, m = np.shape(X)
    w = np.random.rand(m)
    for k in range(niter):
        dw = gradfn(w, X, y)
        w = w - eta*dw #Change to minus
        if k % print_every == 0:
            print (f'Weight after {k} iteration: {str(w)};  gradient: {str(dw)}')
    return w

In [None]:
w=solve_via_gradient_descent( X=X_train_aug, y=y_train)
print('\n')
for i in range(len(w)):
  print(f'w{i}={w[i]:.5f}')

Weight after 0 iteration: [0.28593199 0.32217752 0.64348802 0.11748373 0.75667619 0.90120427
 0.41051436 0.35405729 0.79151023];  gradient: [-0.81349931 -0.29219549 -0.43831274 -0.02957566 -0.01772962 -0.03187743
 -0.00123206 -0.24890675 -0.35936762]
Weight after 100000 iteration: [ 2.58790223  5.81562695  0.69846997  0.20716241  1.0426977   0.82349457
  0.12638357 -2.78701848 -2.94285935];  gradient: [-2.83511886e-03  6.84785862e-04  3.67010232e-04 -7.36856376e-05
 -8.51122403e-04  3.99368218e-04  5.49951662e-04  2.82961241e-03
  3.24670720e-03]
Weight after 200000 iteration: [ 3.48645183  5.59183928  0.57931544  0.30534012  1.48488267  0.57760498
 -0.130446   -3.68348758 -3.96857822];  gradient: [-0.00105737  0.00026783  0.00014477 -0.00026082 -0.00088862  0.00051409
  0.00048185  0.00105822  0.00120536]
Weight after 300000 iteration: [ 3.82804546  5.50445443  0.53133379  0.43474412  1.91576267  0.34424877
 -0.35868491 -4.02750122 -4.35872634];  gradient: [-4.14743592e-04  1.07489116

**Question 4: Practice with SGD**

In [None]:
def solve_via_SGD(X, y, print_every=100000,
                               niter=500000, eta=0.005):
    n, m = np.shape(X)
    w = np.random.rand(m)
    
    for k in range(niter):
      rand_IDX= np.random.randint(X.shape[0],size= 50)
      dw = gradfn(w, X[rand_IDX], y[rand_IDX])
      w = w - eta*dw #Change to minus
      if k % print_every == 0:
        print (f'Weight after {k} iteration: {str(w)};  gradient: {str(dw)}')
    return w

In [None]:
w=solve_via_SGD( X=X_train_aug, y=y_train)
print('\n')
for i in range(len(w)):
  print(f'w{i}={w[i]:.5f}')

Weight after 0 iteration: [0.83823537 0.49993941 0.35298516 0.16820792 0.56802536 0.76005882
 0.02240434 0.61770512 0.45756018];  gradient: [-0.2737769  -0.20007406 -0.15651264 -0.01427951 -0.00777909 -0.001258
 -0.00045355 -0.06973812 -0.11577289]
Weight after 100000 iteration: [ 2.67228083  5.80042823  0.69698032  0.25186023  0.85533917  0.70167966
 -0.23152486 -2.86190933 -3.02216698];  gradient: [-0.02750412 -0.00062002  0.03414963  0.00091419 -0.0010591  -0.00606405
  0.00011825  0.02791567 -0.04556933]
Weight after 200000 iteration: [ 3.51721617  5.57315246  0.57717772  0.37649997  1.31764623  0.49630763
 -0.46996039 -3.71148873 -4.00251252];  gradient: [ 0.07007649  0.0163326   0.05724375  0.00245852  0.00131511  0.00404617
  0.00021624 -0.00574155  0.04620675]
Weight after 300000 iteration: [ 3.83786886  5.50450956  0.52687155  0.51137489  1.75478139  0.29482595
 -0.67380537 -4.03406515 -4.36930382];  gradient: [ 0.06270831 -0.00020807  0.04050348  0.00173154  0.00115768  0.005

**Question 5: Calculate MAE, MSE, RMSE**

In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error

y_pred=reg.predict(X_test)

print(f'MAE = {mean_absolute_error(y_pred,y_test):.3f}')
MSE= mean_squared_error(y_pred,y_test)
print(f'MSE = {MSE:.3f}')
print(f'RMSE= {np.sqrt(MSE):.3f}')

MAE = 0.532
MSE = 0.531
RMSE= 0.729
