In [95]:
import pandas as pd
import numpy as np
import random

In [96]:
def train_test_split(data):
    X=data.iloc[:,:-1]
    Y=data.iloc[:,-1]
    random.seed(30)
    index=random.sample(range(data.shape[0]), int(0.6*data.shape[0]))
    features_train=X.iloc[index].to_numpy()
    y=Y.iloc[index]
    features_test= X[~X.index.isin(index)].to_numpy()
    y_test= Y[~Y.index.isin(index)]
    return features_train, y, features_test, y_test
    

In [97]:
def record_parameters(features):
    mean_train = features.mean(axis=0)
    std_train = features.std(axis=0)
    return mean_train, std_train

In [98]:
# Function to Normalize Data Set
def normalize(A):
    mean_diff = A - A.mean(axis=0)
    normal = mean_diff / A.std(axis=0)
    return normal

In [99]:
def gradient_descent(learning,tolerance):
    #Intialize RMSE, Delta RMSE and i counter.
    RMSE = 1e10
    i=0
    delta_RMSE = 10
    weights = np.zeros(features_array.shape[1])

    # The number of Iterations are determined by the Tolerance of the delta RMSE
    while(delta_RMSE > tolerance and i<=50000) :
        prev_RMSE = RMSE
        y_predicted = np.dot(features_array,weights)
        risk = np.square(y-y_predicted).mean()
        error = y - y_predicted
        gradient = -2*np.dot(features_array.T,error)/len(y)
        step_size = learning * gradient
        weights -= step_size
        RMSE = np.sqrt(np.square(np.subtract(y,y_predicted)).mean())
        delta_RMSE = np.abs(RMSE - prev_RMSE)
        i+=1
    return weights,RMSE,i

# Gradient Descent Algorithm Implementation For Housing Dataset

In [100]:
housing = pd.read_csv('housing.csv', header= None)

In [101]:
features, y, features_test, y_test= train_test_split(housing)

In [102]:
mean_train, std_train=record_parameters(features)

In [103]:
# Normalize the Train Set using Z score
normalized_array = normalize(features)

#Adding a column with 1s to capture the regression intercept
features_array  = np.c_[np.ones(normalized_array.shape[0]),normalized_array]

In [104]:
weights_1,RMSE_1,iterations_1 = gradient_descent(.0004,0.005)
print(f' Weights and RMSE for learning parameter : 0.0004 and tolerance : 0.005 \n ')
print(f' Weights: \n {weights_1}')
print(f' RMSE for Train Set : {RMSE_1}')
print(f' Iterations :  {iterations_1}\n')


weights_2,RMSE_2,iterations_2 = gradient_descent(.01,0.01)
print(f' Weights and RMSE for learning parameter : .01 and tolerance : 0.01 \n ')
print(f' Weights: \n {weights_2}')
print(f' RMSE for Train Set : {RMSE_2}')
print(f' Iterations : {iterations_2}\n')

weights_3,RMSE_3,iterations_3 = gradient_descent(.007,0.05)
print(f' Weights and RMSE for learning parameter : .007 and tolerance : 0.05 \n ')
print(f' Weights: \n {weights_3}')
print(f' RMSE for Train Set : {RMSE_3}')
print(f' Iterations : {iterations_3}\n')

 Weights and RMSE for learning parameter : 0.0004 and tolerance : 0.005 
 
 Weights: 
 [15.38696046 -0.33665339  0.73576379 -0.68907852  0.61680243 -0.38791811
  2.64335977 -0.31793603 -0.88157885  0.25631368 -0.34202632 -1.38973333
  0.76588651 -2.31792383]
 RMSE for Train Set : 8.99674951560325
 Iterations :  1417

 Weights and RMSE for learning parameter : .01 and tolerance : 0.01 
 
 Weights: 
 [21.26078875 -0.23445029  0.85751182 -0.67033588  0.72794459 -0.61014313
  3.20555583 -0.31566107 -1.81899927  0.8053266  -0.26038346 -1.6020829
  1.02438945 -3.04561714]
 RMSE for Train Set : 5.151841207543486
 Iterations : 137

 Weights and RMSE for learning parameter : .007 and tolerance : 0.05 
 
 Weights: 
 [17.94212492 -0.29371793  0.7728102  -0.6814153   0.68360959 -0.41706864
  2.93768164 -0.29517369 -1.20690483  0.45468089 -0.28859546 -1.4978248
  0.8537396  -2.61330759]
 RMSE for Train Set : 7.007735744963284
 Iterations : 111



In [105]:
# Normalize the Test Set using Train Parameters
normalized_test = (features_test - mean_train)/std_train
predictors_test  = np.c_[np.ones(normalized_test.shape[0]),normalized_test]

In [106]:
# Calculate Predicted y on Test Set for learning parameter : 0.0004 and tolerance : 0.005
y_predicted_test_1 = np.dot(predictors_test,weights_1)
# Calculate Predicted y on Test Set for learning parameter : .01 and tolerance : 0.01
y_predicted_test_2 = np.dot(predictors_test,weights_2)
# Calculate Predicted y on Test Set for learning parameter : .007 and tolerance : 0.05
y_predicted_test_3 = np.dot(predictors_test,weights_3)

In [107]:
#RMSE Calculation on Test data set for learning parameter : 0.0004 and tolerance : 0.005
RMSE_Test_1 = np.sqrt(np.square(np.subtract(y_test,y_predicted_test_1)).mean())
print(f' RMSE for Test Set for learning parameter : 0.0004 and tolerance : 0.005  : {RMSE_Test_1}')
#RMSE Calculation on Test data set for learning parameter : .01 and tolerance : 0.01
RMSE_Test_2 = np.sqrt(np.square(np.subtract(y_test,y_predicted_test_2)).mean())
print(f' RMSE for Test Set for learning parameter : .01 and tolerance : 0.01  : {RMSE_Test_2}')
#RMSE Calculation on Test data set for learning parameter : .007 and tolerance : 0.05
RMSE_Test_3 = np.sqrt(np.square(np.subtract(y_test,y_predicted_test_3)).mean())
print(f' RMSE for Test Set for learning parameter : .007 and tolerance : 0.05 : {RMSE_Test_3}')

 RMSE for Test Set for learning parameter : 0.0004 and tolerance : 0.005  : 8.45865218010999
 RMSE for Test Set for learning parameter : .01 and tolerance : 0.01  : 4.819845941903359
 RMSE for Test Set for learning parameter : .007 and tolerance : 0.05 : 6.427016634672382


# Gradient Descent Algorithm Implementation For Yacht Dataset

In [108]:
yachtdata=pd.read_csv('yachtData.csv', header=None)

In [109]:
features, y, features_test, y_test= train_test_split(yachtdata)

In [110]:
mean_train, std_train=record_parameters(features)

# Normalize the Train Set using Z score
normalized_array = normalize(features)

#Adding a column with 1s to capture the regression intercept
features_array  = np.c_[np.ones(normalized_array.shape[0]),normalized_array]

In [111]:
weights_1,RMSE_1,iterations_1 = gradient_descent(0.001,0.001)
print(f' Weights and RMSE for learning parameter : 0.001 and tolerance : 0.001 \n ')
print(f' Weights: \n {weights_1}')
print(f' RMSE for Train Set : {RMSE_1}')
print(f' Iterations :  {iterations_1}\n')


weights_2,RMSE_2,iterations_2 = gradient_descent(0.01,0.01)
print(f' Weights and RMSE for learning parameter : .01 and tolerance : 0.01 \n ')
print(f' Weights: \n {weights_2}')
print(f' RMSE for Train Set : {RMSE_2}')
print(f' Iterations : {iterations_2}\n')

weights_3,RMSE_3,iterations_3 = gradient_descent(.007,0.05)
print(f' Weights and RMSE for learning parameter : .007 and tolerance : 0.05 \n ')
print(f' Weights: \n {weights_3}')
print(f' RMSE for Train Set : {RMSE_3}')
print(f' Iterations : {iterations_3}\n')

 Weights and RMSE for learning parameter : 0.001 and tolerance : 0.001 
 
 Weights: 
 [ 8.9573005  -0.27105221 -0.29970924  0.20034681  0.40431788 -0.13836657
  9.9621455 ]
 RMSE for Train Set : 8.789325953698354
 Iterations :  1003

 Weights and RMSE for learning parameter : .01 and tolerance : 0.01 
 
 Weights: 
 [ 9.00167179 -0.2725755  -0.29933302  0.20232345  0.4047322  -0.13611549
 10.0113634 ]
 RMSE for Train Set : 8.782418351023008
 Iterations : 101

 Weights and RMSE for learning parameter : .007 and tolerance : 0.05 
 
 Weights: 
 [ 6.37976473 -0.18357004 -0.32147915  0.10176859  0.37035785 -0.25755503
  7.09741521]
 RMSE for Train Set : 10.447769306886263
 Iterations : 68



In [112]:
# Normalize the Test Set using Train Parameters
normalized_test = (features_test - mean_train)/std_train
predictors_test  = np.c_[np.ones(normalized_test.shape[0]),normalized_test]

In [113]:
# Calculate Predicted y on Test Set for learning parameter : 0.001 and tolerance : 0.001
y_predicted_test_1 = np.dot(predictors_test,weights_1)
# Calculate Predicted y on Test Set for learning parameter : .01 and tolerance : 0.01
y_predicted_test_2 = np.dot(predictors_test,weights_2)
# Calculate Predicted y on Test Set for learning parameter : .007 and tolerance : 0.05
y_predicted_test_3 = np.dot(predictors_test,weights_3)

In [114]:
#RMSE Calculation on Test data set for learning parameter :  0.001 and tolerance : 0.001
RMSE_Test_1 = np.sqrt(np.square(np.subtract(y_test,y_predicted_test_1)).mean())
print(f' RMSE for Test Set for learning parameter : 0.001 and tolerance : 0.001  : {RMSE_Test_1}')
#RMSE Calculation on Test data set for learning parameter : .01 and tolerance : 0.01
RMSE_Test_2 = np.sqrt(np.square(np.subtract(y_test,y_predicted_test_2)).mean())
print(f' RMSE for Test Set for learning parameter : .01 and tolerance : 0.01  : {RMSE_Test_2}')
#RMSE Calculation on Test data set for learning parameter : .007 and tolerance : 0.05
RMSE_Test_3 = np.sqrt(np.square(np.subtract(y_test,y_predicted_test_3)).mean())
print(f' RMSE for Test Set for learning parameter : .007 and tolerance : 0.05 : {RMSE_Test_3}')

 RMSE for Test Set for learning parameter : 0.001 and tolerance : 0.001  : 10.135363409826883
 RMSE for Test Set for learning parameter : .01 and tolerance : 0.01  : 10.109931023913854
 RMSE for Test Set for learning parameter : .007 and tolerance : 0.05 : 12.128293471415065


#  Gradient Descent Algorithm Implementation For Concrete Dataset

In [115]:
concrete= pd.read_csv("concreteData.csv",header=None)

In [116]:
features, y, features_test, y_test= train_test_split(concrete)

In [117]:
mean_train, std_train=record_parameters(features)

# Normalize the Train Set using Z score
normalized_array = normalize(features)

#Adding a column with 1s to capture the regression intercept
features_array  = np.c_[np.ones(normalized_array.shape[0]),normalized_array]

In [118]:
weights_1,RMSE_1,iterations_1 = gradient_descent(0.0007,0.0001)
print(f' Weights and RMSE for learning parameter : 0.0007 and tolerance : 0.0001 \n ')
print(f' Weights: \n {weights_1}')
print(f' RMSE for Train Set : {RMSE_1}')
print(f' Iterations :  {iterations_1}\n')


weights_2,RMSE_2,iterations_2 = gradient_descent(0.01,0.01)
print(f' Weights and RMSE for learning parameter : .01 and tolerance : 0.01 \n ')
print(f' Weights: \n {weights_2}')
print(f' RMSE for Train Set : {RMSE_2}')
print(f' Iterations : {iterations_2}\n')

weights_3,RMSE_3,iterations_3 = gradient_descent(.007,0.05)
print(f' Weights and RMSE for learning parameter : .007 and tolerance : 0.05 \n ')
print(f' Weights: \n {weights_3}')
print(f' RMSE for Train Set : {RMSE_3}')
print(f' Iterations : {iterations_3}\n')

 Weights and RMSE for learning parameter : 0.0007 and tolerance : 0.0001 
 
 Weights: 
 [36.04578933  7.62767057  3.85536523  0.31351888 -5.70227368  3.23673577
 -1.34713997 -3.03918176  6.24862466]
 RMSE for Train Set : 10.522278353653654
 Iterations :  3131

 Weights and RMSE for learning parameter : .01 and tolerance : 0.01 
 
 Weights: 
 [34.54983917  7.11788105  3.27210339 -0.20702229 -5.12459927  3.75033705
 -1.38543555 -3.07015555  5.80159682]
 RMSE for Train Set : 10.809250233364638
 Iterations : 145

 Weights and RMSE for learning parameter : .007 and tolerance : 0.05 
 
 Weights: 
 [30.40939968  6.33102122  2.58540911 -0.58767906 -4.27753288  3.85787543
 -1.40789209 -2.80816839  4.8915673 ]
 RMSE for Train Set : 12.509432606553968
 Iterations : 127



In [119]:
# Normalize the Test Set using Train Parameters
normalized_test = (features_test - mean_train)/std_train
predictors_test  = np.c_[np.ones(normalized_test.shape[0]),normalized_test]

In [120]:
# Calculate Predicted y on Test Set for learning parameter : 0.0007 and tolerance : 0.0001
y_predicted_test_1 = np.dot(predictors_test,weights_1)
# Calculate Predicted y on Test Set for learning parameter : .01 and tolerance : 0.01
y_predicted_test_2 = np.dot(predictors_test,weights_2)
# Calculate Predicted y on Test Set for learning parameter : .007 and tolerance : 0.05
y_predicted_test_3 = np.dot(predictors_test,weights_3)

In [121]:
#RMSE Calculation on Test data set for learning parameter : 0.0007 and tolerance : 0.0001
RMSE_Test_1 = np.sqrt(np.square(np.subtract(y_test,y_predicted_test_1)).mean())
print(f' RMSE for Test Set for learning parameter : 0.001 and tolerance : 0.001  : {RMSE_Test_1}')
#RMSE Calculation on Test data set for learning parameter : .01 and tolerance : 0.01
RMSE_Test_2 = np.sqrt(np.square(np.subtract(y_test,y_predicted_test_2)).mean())
print(f' RMSE for Test Set for learning parameter : .01 and tolerance : 0.01  : {RMSE_Test_2}')
#RMSE Calculation on Test data set for learning parameter : .007 and tolerance : 0.05
RMSE_Test_3 = np.sqrt(np.square(np.subtract(y_test,y_predicted_test_3)).mean())
print(f' RMSE for Test Set for learning parameter : .007 and tolerance : 0.05 : {RMSE_Test_3}')

 RMSE for Test Set for learning parameter : 0.001 and tolerance : 0.001  : 10.751717093319687
 RMSE for Test Set for learning parameter : .01 and tolerance : 0.01  : 10.851963485636311
 RMSE for Test Set for learning parameter : .007 and tolerance : 0.05 : 12.09249350491293
