# Linear Regression using Scikit-Learn

### Import the required tools

In [3]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import SGDRegressor
from sklearn.preprocessing import StandardScaler
np.set_printoptions(precision=2)

>**sklearn.linera_model.SGDRegressor** := It is to implement Gradient Descent for Linear Regression  
>**sklearn.preprocessing.StandardScaler** := It is to implement Z Score Normalization

In [7]:
data = np.loadtxt("./Data/Houses.txt", delimiter=",", skiprows=1)
X_train = data[:, :4]
y_train = data[:, 4]

In [8]:
X_features = ['size(sqft)','bedrooms','floors','age']

## Scale/Normalize the training Data

In [13]:
scaler = StandardScaler()
X_norm = scaler.fit_transform(X_train)
print(f"Peak to Peak range by column in Raw        X:{np.ptp(X_train,axis=0)}")   
print(f"Peak to Peak range by column in Normalized X:{np.ptp(X_norm,axis=0)}")

Peak to Peak range by column in Raw        X:[2.41e+03 4.00e+00 1.00e+00 9.50e+01]
Peak to Peak range by column in Normalized X:[5.85 6.14 2.06 3.69]


In [16]:
def zscore_normalize(x):
    mu = np.mean(x, axis=0)
    sigma = np.std(x, axis=0)
    
    x_norm = (x - mu) / sigma
    
    return x_norm

## Create a regression model

In [14]:
sgdr = SGDRegressor(max_iter=1000)
sgdr.fit(X_norm, y_train)
print(sgdr)
print(f"number of iterations completed: {sgdr.n_iter_}, number of weight updates: {sgdr.t_}")

SGDRegressor()
number of iterations completed: 120, number of weight updates: 11881.0


In [15]:
b_norm = sgdr.intercept_
w_norm = sgdr.coef_
print(f"model parameters:                   w: {w_norm}, b:{b_norm}")
print( "model parameters from previous lab: w: [110.56 -21.27 -32.71 -37.97], b: 363.16")

model parameters:                   w: [110.09 -21.02 -32.41 -38.1 ], b:[363.15]
model parameters from previous lab: w: [110.56 -21.27 -32.71 -37.97], b: 363.16


In [18]:
y_pred_sgdr = sgdr.predict(X_norm)

y_pred = (X_norm @ w_norm) + b_norm

x_norm = zscore_normalize(X_train)
w = [110.56, -21.27, -32.71, -37.97]
b = 363.16

y_pred_prev = (x_norm @ w) + b

print((y_pred_sgdr == y_pred).all())

print(f"first 4 prediction using normalized features: {y_pred[:4]}")
print(f"first 4 prediction using prevoious features: {y_pred_prev[:4]}")
print(f"first 4 target values: {y_train[:4]}")

True
first 4 prediction using normalized features: [295.11 485.97 389.64 492.14]
first 4 prediction using prevoious features: [295.18 485.98 389.52 492.15]
first 4 target values: [300.  509.8 394.  540. ]
