### NumPy library would be required, so code begins by importing it

In [1]:
import numpy as np
import pandas as pd

### Import phi and phi_test from train and test datasets using NumPy's loadtxt function

In [2]:
# Import phi from train data set
phi = np.loadtxt('data/train.csv', delimiter=',', skiprows=1, usecols=tuple(range(1, 14)))

# Import y from train data set
y = np.loadtxt('data/train.csv', delimiter=',', skiprows=1, usecols=14, ndmin=2)

### Import y from train dataset using the loadtxt function

In [3]:
# Import phi_test from test data set
phi_test = np.loadtxt('data/test.csv', delimiter=',', skiprows=1, usecols=tuple(range(1, 14)))

In [4]:
## Checking
# pd.DataFrame(phi)
# pd.DataFrame(phi_test)
# pd.DataFrame(y)

### Concatenate column of 1s to right of phi and phi_test

In [5]:
phi = np.concatenate((phi, np.ones((len(phi), 1))), axis=1)
phi_test = np.concatenate((phi_test, np.ones((len(phi_test), 1))), axis=1)

In [6]:
## Checking
# pd.DataFrame(phi)
# pd.DataFrame(phi_test)

### Apply min max scaling on each column of phi and phi_test

In [7]:
for i in range(0, 13):
    col_max = max(phi[:, i])
    col_min = min(phi[:, i])
    phi[:, i] = (phi[:, i] - col_min) / (col_max - col_min)
    phi_test[:, i] = (phi_test[:, i] - col_min) / (col_max - col_min)

### Apply log scaling on y

In [8]:
y = np.log(y)

In [9]:
## Checking
# pd.DataFrame(phi)
# pd.DataFrame(phi_test)
# pd.DataFrame(y)

### Define a function to calculate change in error function based on phi, w and p norm

In [10]:
def delta_w(p, phi, w):
    if p == 2:
        deltaw = (2 * (np.dot(np.dot(np.transpose(phi), phi), w) -
                       np.dot(np.transpose(phi), y)) +
                  lambd * p * np.power(np.absolute(w), (p - 1)))
    if p > 1 and p < 2:
        deltaw = (2 * (np.dot(np.dot(np.transpose(phi), phi), w) -
                       np.dot(np.transpose(phi), y)) +
                  lambd * p * np.power(np.absolute(w), (p - 1)) * np.sign(w))
    return deltaw

### Make a dictionary containing filenames as keys and p as values

In [11]:
filenames = {'output.csv': 2.0,
             'output_p1.csv': 1.75,
             'output_p2.csv': 1.5,
             'output_p3.csv': 1.3
             }

### For each item in this dictionary

In [12]:
for (fname, p) in filenames.items():
    
    # Set the w to all 0s
    w = np.zeros((14, 1))

    # Set an appropriate value for lambda and step size
    # lambda value
    lambd = 0.2
    # Maximum step size
    t = 0.00012

    # Calculate new value of w
    w_new = w - t * delta_w(p, phi, w)

    i = 0
    # Repeat steps until error between consecutive ws is less than threshold
    while(np.linalg.norm(w_new-w) > 10 ** -10):
        w = w_new
        w_new = w - t * delta_w(p, phi, w)
        i = i + 1

    # Load values of id from test data file
    id_test = np.loadtxt('data/test.csv', dtype='int', delimiter=',',
                         skiprows=1, usecols=0, ndmin=2)

    # Calculate y for test data using phi test and applying inverse log
    y_test = np.exp(np.dot(phi_test, w_new))

    # Save the ids and y according to filename from dictionary
    np.savetxt(fname, np.concatenate((id_test, y_test), axis=1),
               delimiter=',', fmt=['%d', '%f'], header='ID,MEDV', comments='')