# Batch Gradient Descent

In [1]:
import warnings
warnings.filterwarnings("ignore")

In [2]:
import pandas as pd
import numpy as np

In [3]:
df = pd.read_csv("BostonHousing.csv")

df.head()

Unnamed: 0,crim,zn,indus,chas,nox,rm,age,dis,rad,tax,ptratio,b,lstat,medv
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222,18.7,396.9,5.33,36.2


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 506 entries, 0 to 505
Data columns (total 14 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   crim     506 non-null    float64
 1   zn       506 non-null    float64
 2   indus    506 non-null    float64
 3   chas     506 non-null    int64  
 4   nox      506 non-null    float64
 5   rm       506 non-null    float64
 6   age      506 non-null    float64
 7   dis      506 non-null    float64
 8   rad      506 non-null    int64  
 9   tax      506 non-null    int64  
 10  ptratio  506 non-null    float64
 11  b        506 non-null    float64
 12  lstat    506 non-null    float64
 13  medv     506 non-null    float64
dtypes: float64(11), int64(3)
memory usage: 55.5 KB


In [5]:
df.shape

(506, 14)

In [6]:
df.isnull().sum()

crim       0
zn         0
indus      0
chas       0
nox        0
rm         0
age        0
dis        0
rad        0
tax        0
ptratio    0
b          0
lstat      0
medv       0
dtype: int64

In [7]:
input_features = df.iloc[:,:-1]
outpot = df.iloc[:,-1:]

print(input_features[:5])
print(outpot[:5])

      crim    zn  indus  chas    nox     rm   age     dis  rad  tax  ptratio  \
0  0.00632  18.0   2.31     0  0.538  6.575  65.2  4.0900    1  296     15.3   
1  0.02731   0.0   7.07     0  0.469  6.421  78.9  4.9671    2  242     17.8   
2  0.02729   0.0   7.07     0  0.469  7.185  61.1  4.9671    2  242     17.8   
3  0.03237   0.0   2.18     0  0.458  6.998  45.8  6.0622    3  222     18.7   
4  0.06905   0.0   2.18     0  0.458  7.147  54.2  6.0622    3  222     18.7   

        b  lstat  
0  396.90   4.98  
1  396.90   9.14  
2  392.83   4.03  
3  394.63   2.94  
4  396.90   5.33  
   medv
0  24.0
1  21.6
2  34.7
3  33.4
4  36.2


In [8]:
from sklearn.preprocessing import StandardScaler

Standard = StandardScaler()

scaled_input_features = Standard.fit_transform(input_features)

print(scaled_input_features[:5])

[[-0.41978194  0.28482986 -1.2879095  -0.27259857 -0.14421743  0.41367189
  -0.12001342  0.1402136  -0.98284286 -0.66660821 -1.45900038  0.44105193
  -1.0755623 ]
 [-0.41733926 -0.48772236 -0.59338101 -0.27259857 -0.74026221  0.19427445
   0.36716642  0.55715988 -0.8678825  -0.98732948 -0.30309415  0.44105193
  -0.49243937]
 [-0.41734159 -0.48772236 -0.59338101 -0.27259857 -0.74026221  1.28271368
  -0.26581176  0.55715988 -0.8678825  -0.98732948 -0.30309415  0.39642699
  -1.2087274 ]
 [-0.41675042 -0.48772236 -1.30687771 -0.27259857 -0.83528384  1.01630251
  -0.80988851  1.07773662 -0.75292215 -1.10611514  0.1130321   0.41616284
  -1.36151682]
 [-0.41248185 -0.48772236 -1.30687771 -0.27259857 -0.83528384  1.22857665
  -0.51117971  1.07773662 -0.75292215 -1.10611514  0.1130321   0.44105193
  -1.02650148]]


In [9]:
from sklearn.model_selection import train_test_split

X_train, X_test, Y_train, Y_test = train_test_split(scaled_input_features, outpot, random_state = 2, test_size = 0.2)

print(X_train[:5])
print(X_test[:5])
print(Y_train[:5])
print(Y_test[:5])

[[-0.3993852  -0.48772236 -0.54814912 -0.27259857 -0.53294229  0.13016481
  -0.50762365  0.35431986 -0.52300145 -0.72006176  0.52915834  0.44105193
  -0.81063385]
 [-0.41119127 -0.48772236 -0.75534039 -0.27259857 -0.48111231 -0.61920565
  -0.96279897  0.06615109 -0.52300145 -0.76757602  0.34421334  0.44105193
  -0.54430367]
 [-0.41823068  2.94584308 -1.36815964 -0.27259857 -1.46588193 -0.07783535
  -1.31840469  2.516679   -0.98284286 -0.99326877 -0.11814915 -0.16527708
   0.03881927]
 [-0.41734159 -0.48772236 -0.59338101 -0.27259857 -0.74026221  1.28271368
  -0.26581176  0.55715988 -0.8678825  -0.98732948 -0.30309415  0.39642699
  -1.2087274 ]
 [-0.32270106 -0.48772236 -0.43725801 -0.27259857 -0.14421743 -0.97679498
   0.60897831  0.31353319 -0.6379618  -0.6012761   1.17646583 -0.58389629
   0.54064142]]
[[ 0.25690863 -0.48772236  1.01599907 -0.27259857  1.36749033  0.32534305
   0.75833271 -0.47225199  1.66124525  1.53092646  0.80657583  0.40728171
  -0.33123951]
 [-0.28941366 -0.4877

In [10]:
print(X_train.shape)
print(X_test.shape)
print(Y_train.shape)
print(Y_test.shape)

(404, 13)
(102, 13)
(404, 1)
(102, 1)


In [11]:
from sklearn.linear_model import SGDRegressor

SGD = SGDRegressor()

SGD.fit(X_train, Y_train)

In [12]:
y_predict = SGD.predict(X_test)

In [13]:
from sklearn.metrics import mean_squared_error

print("Mean Squared Error is", mean_squared_error(Y_test, y_predict))

Mean Squared Error is 18.54728358144482
