In [None]:
pwd

In [2]:
# used for manipulating directory paths
import os
# Scientific and vector computation for python
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
# Plotting library
from matplotlib import pyplot
from mpl_toolkits.mplot3d import Axes3D  # needed to plot 3-D surfaces
# tells matplotlib to embed plots within the notebook
%matplotlib inline

In [3]:
# Load data
data = np.genfromtxt(os.path.join( 'house_prices_data_training_data.csv'), delimiter=',')

X = data[:, 0:17]
y = data[:, 17]

Xtr = data[1:10801, 0:17]
ytr = data[1:10801, 17]

Xcv = data[10801:14401, 0:17]
ycv = data[10801:14401, 17]

Xte = data[14401:18001, 0:17]
yte = data[14401:18001, 17]


m=y.size

m = y.size
 

print(Xtr)

[[ 3.00000e+00  1.00000e+00  1.18000e+03 ... -1.22257e+02  1.34000e+03
   5.65000e+03]
 [ 3.00000e+00  2.25000e+00  2.57000e+03 ... -1.22319e+02  1.69000e+03
   7.63900e+03]
 [ 2.00000e+00  1.00000e+00  7.70000e+02 ... -1.22233e+02  2.72000e+03
   8.06200e+03]
 ...
 [ 2.00000e+00  1.75000e+00  1.80000e+03 ... -1.22060e+02  1.62000e+03
   1.12384e+05]
 [ 4.00000e+00  2.50000e+00  2.40000e+03 ... -1.22339e+02  1.71000e+03
   7.90900e+03]
 [ 4.00000e+00  2.00000e+00  2.37000e+03 ... -1.22279e+02  2.11000e+03
   1.93340e+04]]


In [4]:
def  featureNormalize(X):
    
    # You need to set these values correctly
    X_norm = X.copy()
    mu = np.zeros(X.shape[1])
    sigma = np.zeros(X.shape[1])
    

    # =========================== YOUR CODE HERE =====================
    for i in range (17):
        mu = np.mean(X[:,i],axis=0)
        sigma = np.std(X[:,i],axis = 0)
    X_norm = (X-mu)/sigma
    # ================================================================
    return X_norm, mu, sigma

In [5]:
def  featureNormalize_2(X):
    #fuction to add bias feature
    
    X_norm = X.copy()
    mu = np.zeros(0)
    sigma = np.zeros(0)
    
    for i in range(0,X.shape[1]):
        mu=np.concatenate([mu,[np.mean(X[:,i])]], axis=0)
        sigma=np.concatenate([sigma,[np.std(X[:,i])]], axis=0)
    
    X_norm=(X-mu)/sigma
    
    return X_norm, mu, sigma

In [6]:
def computeCostMulti(X, y, theta):
    
    m = y.shape[0]
    
    J = 0
    
    H = np.dot(X,theta)
    
    J = (1/(2*m)) * np.sum(np.square(H-y))
    
    return J
    
    

In [7]:
def gradientDescentMulti(X, y, theta, alpha, num_iters):
    
    m = y.shape[0]
    
    theta = theta.copy()
    
    J_history = []
    
    for i in range(num_iters):
        
        theta= theta - (alpha/m)*(np.dot(Xtr,theta.T)-ytr).dot(Xtr)
        
        J_history.append(computeCostMulti(Xtr, ytr, theta))
    
    return theta, J_history
        
    

In [8]:
alpha = 0.03
num_iters = 1000

theta = np.zeros(18)

X_norm, mu, sigma =featureNormalize(X)
X = np.concatenate([np.ones((m, 1)), X_norm], axis=1)

print(computeCostMulti(X, y, theta)) #cost function pre-gradient descent

theta, J_history = gradientDescentMulti(X, y, theta, alpha, num_iters) #gradient descent

print(computeCostMulti(X, y, theta)) #cost function post-gradient descent


#test order of polynomials from 1 to 4

#initialize variables for different orders

#d=2
X_2 = np.concatenate((X, np.square(X)), axis = 1)

#d=3
X_3 = np.concatenate((X_2, np.power(X,3)), axis = 1)

#d=4
X_4 = np.concatenate((X_3, np.power(X,4)), axis = 1)

#initialize variables for training, validation and testing (different orders)

#d=2
Xtr_2 = X_2[1:10801, 0:17]
Xcv_2 = X_2[10801:14401, 0:17]
Xte_2 = X_2[14401:18001, 0:17]

#d=3
Xtr_3 = X_3[1:10801, 0:17]
Xcv_3 = X_3[10801:14401, 0:17]
Xte_3 = X_3[14401:18001, 0:17]

#d=4
Xtr_4 = X_4[1:10801, 0:17]
Xcv_4 = X_4[10801:14401, 0:17]
Xte_4 = X_4[14401:18001, 0:17]
            

ytr = data[1:10801, 17]

ycv = data[10801:14401, 17]

yte = data[14401:18001, 17]




#normalize, cost function and gradient descent of first order 

Xtr_norm, mu_tr, sigma_tr =featureNormalize_2(Xtr)
Xcv_norm, mu_cv, sigma_cv =featureNormalize_2(Xcv)
Xte_norm, mu_te, sigma_te =featureNormalize2(Xte)
Xtr = np.concatenate([np.ones((10800, 1)), Xtr_norm], axis=1)
Xcv = np.concatenate([np.ones((3600, 1)), Xcv_norm], axis=1)
Xte = np.concatenate([np.ones((3599, 1)), Xte_norm], axis=1)

theta = np.zeros(18)

print(computeCostMulti(Xtr, ytr, theta))
theta, J_history = gradientDescentMulti(Xtr, ytr, theta, alpha, num_iters)
print(computeCostMulti(Xtr, ytr, theta), "first training error")

print(computeCostMulti(Xcv, ycv, theta),"first cross validation error")



#normalize, cost function and gradient descent of second order 

Xtr_2_norm, mu_tr_2, sigma_tr_2 =featureNormalize_2(Xtr_2)
Xcv_2_norm, mu_cv_2, sigma_cv_2 =featureNormalize_2(Xcv_2)
Xte_2_norm, mu_te_2, sigma_te_2 =featureNormalize_2(Xte_2)
Xtr_2 = np.concatenate([np.ones((10800, 1)), Xtr_2_norm], axis=1)
Xcv_2 = np.concatenate([np.ones((3600, 1)), Xcv_2_norm], axis=1)
Xte_2 = np.concatenate([np.ones((3600, 1)), Xte_2_norm], axis=1)

theta_2 = np.zeros(35)

print(computeCostMulti(Xtr_2, ytr, theta_2))
theta_2, J_history_2 = gradientDescentMulti(Xtr_2, ytr, theta_2, alpha, num_iters)
print(computeCostMulti(Xtr_2, ytr, theta_2), "second training error")

print(computeCostMulti(Xcv_2, ycv, theta_2),"second cross validation error")



#normalize, cost function and gradient descent of third order 

Xtr_3_norm, mu_tr_3, sigma_tr_3 =featureNormalize_2(Xtr_3)
Xcv_3_norm, mu_cv_3, sigma_cv_3 =featureNormalize_2(Xcv_3)
Xte_3_norm, mu_te_3, sigma_te_3 =featureNormalize_2(Xte_3)
Xtr_3 = np.concatenate([np.ones((10800, 1)), Xtr_3_norm], axis=1)
Xcv_3 = np.concatenate([np.ones((3600, 1)), Xcv_3_norm], axis=1)
Xte_3 = np.concatenate([np.ones((3600, 1)), Xte_3_norm], axis=1)

theta_3 = np.zeros(52)

print(computeCostMulti(Xtr_3, ytr, theta_3))
theta_3, J_history_3 = gradientDescentMulti(Xtr_3, ytr, theta_3, alpha, num_iters)
print(computeCostMulti(Xtr_3, ytr, theta_3), "third training error")

print(computeCostMulti(Xcv_3, ycv, theta_3),"third cross validation error")




#normalize, cost function and gradient descent of third order 

Xtr_4_norm, mu_tr_4, sigma_tr_4 =featureNormalize_2(Xtr_4)
Xcv_4_norm, mu_cv_4, sigma_cv_4 =featureNormalize_2(Xcv_4)
Xte_4_norm, mu_te_4, sigma_te_4 =featureNormalize_2(Xte_4)
Xtr_4 = np.concatenate([np.ones((10800, 1)), Xtr_4_norm], axis=1)
Xcv_4 = np.concatenate([np.ones((3600, 1)), Xcv_4_norm], axis=1)
Xte_4 = np.concatenate([np.ones((3600, 1)), Xte_4_norm], axis=1)

theta_4 = np.zeros(69)

print(computeCostMulti(Xtr_4, ytr, theta_4))
theta_4, J_history_4 = gradientDescentMulti(Xtr_4, ytr, theta_4, alpha, num_iters)
print(computeCostMulti(Xtr_4, ytr, theta_4), "third training error")

print(computeCostMulti(Xcv_4, ycv, theta_4),"third cross validation error")





    

nan


ValueError: shapes (10800,17) and (18,) not aligned: 17 (dim 1) != 18 (dim 0)

In [None]:
#computinng test errors for the  four degrees


#d =1
print(computeCostMulti(Xte, yte, theta),"first testing error")

#d =2
print(computeCostMulti(Xte_2, yte, theta_2),"second testing error")

#d =3
print(computeCostMulti(Xte_3, yte, theta_3),"third testing error")

#d =4
print(computeCostMulti(Xte_4, yte, theta_4),"fourth testing error")






In [None]:
def gradientDescentMultiRegularized(X, y, theta, alpha, num_iters,lambda_):
    
    #regularization
    
     m = y.shape[0]
        
    theta = theta.copy()
    
    h = np.dot(X,theta)
    J_history = []
    for i in range(num_iters):
        
        theta = theta - ((alpha/m)* np.sum(np.dot((h-y),X))) + (np.dot(lambda_, theta))
        
    J_history.append(computeCostMultiRegularized(X, y, theta,lambda_))
    
    return theta, J_history

In [None]:
#regularization cont_

# Load data
data = np.genfromtxt(os.path.join( 'house_prices_data_training_data.csv'), delimiter=',')

X = data[:, 0:17]
y = data[:, 17]

Xtr = data[1:10801, 0:17]
ytr = data[1:10801, 17]

Xcv = data[10801:14401, 0:17]
ycv = data[10801:14401, 17]

Xte = data[14401:18001, 0:17]
yte = data[14401:18001, 17]

m = y.size

alpha = 0.03

num_iters = 1000


#test order of polynomials from 1 to 4

#initialize variables for different orders

#d=2
X_2 = np.concatenate((X, np.square(X)), axis = 1)

#d=3
X_3 = np.concatenate((X_2, np.power(X,3)), axis = 1)

#d=4
X_4 = np.concatenate((X_3, np.power(X,4)), axis = 1)

#initialize variables for training, validation and testing (different orders)

#d=2
Xtr_2 = X_2[1:10801, 0:17]
Xcv_2 = X_2[10801:14401, 0:17]
Xte_2 = X_2[14401:18000, 0:17]

#d=3
Xtr_3 = X_3[1:10801, 0:17]
Xcv_3 = X_3[10801:14401, 0:17]
Xte_3 = X_3[14401:18000, 0:17]

#d=4
Xtr_4 = X_4[1:10801, 0:17]
Xcv_4 = X_4[10801:14401, 0:17]
Xte_4 = X_4[14401:18000, 0:17]



#normalize first order

X_norm, mu, sigma =featureNormalize(X)

X = np.concatenate([np.ones((m, 1)), X_norm], axis=1)

Xtr=X[1:10800,0:17]
Xcv=X[10801:14400,0:17]
Xte=X[14401:18000,0:17]


theta = np.zeros(18)
lambda_=0.1
print(computeCostMultiRegularized(Xtr, ytr, theta,lambda_))
theta, J_history = gradientDescentMultiRegularized(Xtr, ytr, theta, alpha, num_iters,lambda_)
print(computeCostMultiRegularized(Xtr, ytr, theta,lambda_),"first regularized training error")

print(computeCostMulti(Xcv, ycv, theta),"first rregularized cross validationn error")


#normalize second order

Xtr_2_norm, mu_tr_2, sigma_tr_2 =featureNormalize_2(Xtr_2)
Xcv_2_norm, mu_cv_2, sigma_cv_2 =featureNormalize_2(Xcv_2)
Xte_2_norm, mu_te_2, sigma_te_2 =featureNormalize_2(Xte_2)
Xtr_2 = np.concatenate([np.ones((10800, 1)), Xtr_2_norm], axis=1)
Xcv_2 = np.concatenate([np.ones((3600, 1)), Xcv_2_norm], axis=1)
Xte_2 = np.concatenate([np.ones((3600, 1)), Xte_2_norm], axis=1)



theta = np.zeros(35)
lambda_=0.1
print(computeCostMultiRegularized(Xtr_2, ytr, theta_2,lambda_))
theta_2, J_history_2 = gradientDescentMultiRegularized(Xt_2, ytr, theta_2, alpha, num_iters,lambda_)
print(computeCostMultiRegularized(Xtr_2, ytr, theta_2,lambda_),"second regularized training error")

print(computeCostMulti(Xcv_2, ycv, theta_2),"second regularized cross validationn error")




#normalize third order

Xtr_3_norm, mu_tr_3, sigma_tr_3 =featureNormalize_2(Xtr_3)
Xcv_3_norm, mu_cv_3, sigma_cv_3 =featureNormalize_2(Xcv_3)
Xte_3_norm, mu_te_3, sigma_te_3 =featureNormalize_2(Xte_3)
Xtr_3 = np.concatenate([np.ones((10800, 1)), Xtr_3_norm], axis=1)
Xcv_3 = np.concatenate([np.ones((3600, 1)), Xcv_3_norm], axis=1)
Xte_3 = np.concatenate([np.ones((3600, 1)), Xte_3_norm], axis=1)



theta = np.zeros(52)
lambda_=0.1
print(computeCostMultiRegularized(Xtr_3, ytr, theta_3,lambda_))
theta_3, J_history_3 = gradientDescentMultiRegularized(Xt_3, ytr, theta_3, alpha, num_iters,lambda_)
print(computeCostMultiRegularized(Xtr_3, ytr, theta_3,lambda_),"second regularized training error")

print(computeCostMulti(Xcv_3, ycv, theta_3),"third regularized cross validationn error")



#normalize third order

Xtr_4_norm, mu_tr_4, sigma_tr_4 =featureNormalize_2(Xtr_4)
Xcv_4_norm, mu_cv_4, sigma_cv_4 =featureNormalize_2(Xcv_4)
Xte_4_norm, mu_te_4, sigma_te_4 =featureNormalize_2(Xte_4)
Xtr_4 = np.concatenate([np.ones((10800, 1)), Xtr_4_norm], axis=1)
Xcv_4 = np.concatenate([np.ones((3600, 1)), Xcv_4_norm], axis=1)
Xte_4 = np.concatenate([np.ones((3600, 1)), Xte_4_norm], axis=1)



theta = np.zeros(69)
lambda_=0.1
print(computeCostMultiRegularized(Xtr_4, ytr, theta_4,lambda_))
theta_4, J_history_4 = gradientDescentMultiRegularized(Xt_4, ytr, theta_4, alpha, num_iters,lambda_)
print(computeCostMultiRegularized(Xtr_4, ytr, theta_4,lambda_),"second regularized training error")

print(computeCostMulti(Xcv_4, ycv, theta_4),"third regularized cross validationn error")














In [None]:
#regularization testing error for four degrees

#d=1
print(computeCostMultiRegularized(Xte, yte, theta,lambda_),"first regularized testing error")

#d=2
print(computeCostMultiRegularized(Xte_2, yte, theta_2,lambda_),"second regularized testing error")

#d=3
print(computeCostMultiRegularized(Xte_3, yte, theta_3,lambda_),"third regularized testing error")

#d=4
print(computeCostMultiRegularized(Xte_4, yte, theta_4,lambda_),"fourth regularized testing error")


