<a href="https://colab.research.google.com/github/MSGanga/MSGanga/blob/main/Task6_Assignment4_Boston_Housing_Dataset.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

` Variables in order:`


|Columns|Description|
|-|-|
|CRIM|per capita crime rate by town|
|ZN| proportion of residential land zoned for lots over 25,000 sq.ft.|
|INDUS|proportion of non-retail business acres per town|
|CHAS| Charles River dummy variable (= 1 if tract bounds river; 0 otherwise)|
|NOX|nitric oxides concentration (parts per 10 million)|
|RM|average number of rooms per dwelling|
|AGE|proportion of owner-occupied units built prior to 1940|
|DIS|weighted distances to five Boston employment centres|
|RAD|index of accessibility to radial highways|
|TAX|full-value property-tax rate per \$10,000|
|PTRATIO|pupil-teacher ratio by town|
|B|1000(Bk - 0.63)^2 where Bk is the proportion of blacks by town|
|LSTAT|% lower status of the population|
|MEDV|Median value of owner-occupied homes in $1000's|  

In [3]:
import numpy as np

# Import phi from train data set
phi = np.loadtxt('/content/housing_train.csv', dtype='float', delimiter=',', skiprows=1,
                 usecols=tuple(range(1, 14)))
np.shape(phi)

(400, 13)

In [4]:
# Import y from train data set
y = np.loadtxt('/content/housing_train.csv', dtype='float', delimiter=',', skiprows=1,
               usecols=14, ndmin=2)
np.shape(y)

(400, 1)

In [5]:
# Import phi_test from test data set
phi_test = np.loadtxt('/content/housing_test.csv.txt', dtype='float', delimiter=',',
                      skiprows=1, usecols=tuple(range(1, 14)))
np.shape(phi_test)

(105, 13)

In [6]:
# Add a column of 1s to right of phi and phi_test
phi_test = np.concatenate((phi_test, np.ones((105, 1))), axis=1)
phi = np.concatenate((phi, np.ones((400, 1))), axis=1)

In [7]:
# Min Max scaling for phi and phi_test (Feature Engineering)
for i in range(0, 13):
    col_max = max(phi[:, i])
    col_min = min(phi[:, i])
    phi[:, i] = (phi[:, i] - col_min) / (col_max - col_min)
    phi_test[:, i] = (phi_test[:, i] - col_min) / (col_max - col_min)

In [8]:
# Log scaling on y
y = np.log(y)

In [9]:
# Function to calculate change in error function
def delta_w(p, phi, w):
    if p == 2:
        deltaw = (2 * (np.dot(np.dot(np.transpose(phi), phi), w) -
                       np.dot(np.transpose(phi), y)) +
                  lam * p * np.power(np.absolute(w), (p - 1)))
    if p < 2 and p > 1:
        deltaw = (2 * (np.dot(np.dot(np.transpose(phi), phi), w) -
                       np.dot(np.transpose(phi), y)) +
                  lam * p * np.power(np.absolute(w), (p - 1)) * np.sign(w))
    return deltaw

In [10]:
# Dictionary containing filenames as keys and p as values
filenames = {'output.csv': 2.0,
             'output_p1.csv': 1.75,
             'output_p2.csv': 1.5,
             'output_p3.csv': 1.3
             }

In [11]:
# For each item in this dictionary
for (fname, p) in filenames.items():
    # Set initial w to zeros
    w = np.zeros((14, 1))

    # Hyperparameter lambda value
    lam = 0.2

    # Maximum step size
    t = 0.00012

    # Calculate new value of w
    w_new = w - t * delta_w(p, phi, w)

    i = 0
    # Repeat steps until error between consecutive w is less than threshold
    while(np.linalg.norm(w_new-w) > 10 ** -10):
        w = w_new
        w_new = w - t * delta_w(p, phi, w)
        i = i + 1

    # Load values of id
    id_test = np.loadtxt('/content/housing_test.csv.txt', dtype='int', delimiter=',',
                         skiprows=1, usecols=0, ndmin=2)

    # Calculate y for test data
    y_test = np.exp(np.dot(phi_test, w_new))

    # Save the ids and y
    np.savetxt(fname, np.concatenate((id_test, y_test), axis=1),
               delimiter=',', fmt=['%d', '%f'], header='ID,MEDV', comments='')