# Linear Regression using Gradient Decent Algorithm

In [51]:
import numpy as np
from sklearn import datasets, metrics
from sklearn.preprocessing import StandardScaler

X, y = datasets.fetch_california_housing(return_X_y=True)

X_train_temp1 = X[0:16000, :]
X_train = np.zeros((X_train_temp1.shape[0], X_train_temp1.shape[1] + 1))
X_train[:, 0] = np.ones((X_train_temp1.shape[0]))
X_train[:, 1:] = X_train_temp1
print("Type of X_train: ", type(X_train), "Shape of X_train: ", X_train.shape)

Type of X_train:  <class 'numpy.ndarray'> Shape of X_train:  (16000, 9)


In [52]:
y_train = y[0:16000]
X_test_temp1 = X[16000:20604, :]
X_test = np.zeros((X_test_temp1.shape[0], X_test_temp1.shape[1] + 1))
X_test[:, 0] = np.ones((X_test_temp1.shape[0]))
X_test[:, 1:] = X_test_temp1
print("Type of X_test: ", type(X_test), "Shape of X_test: ", X_test.shape)

Type of X_test:  <class 'numpy.ndarray'> Shape of X_test:  (4604, 9)


In [53]:
y_test = y[16000:20604]

scalar = StandardScaler()
scalar.fit(X_train[:, 1:])

X_train[:, 1:] = scalar.transform(X_train[:, 1:])
X_test[:, 1:] = scalar.transform(X_test[:, 1:])

theta = np.random.uniform(0, 1, size = (X_train.shape[1]))
print("Type of theta: ", type(theta), "Shape of theta: ", theta.shape)

Type of theta:  <class 'numpy.ndarray'> Shape of theta:  (9,)


In [54]:
n_iterations = 1000
alpha = 0.01
m = X_train.shape[0]
n = X_train.shape[1]

for i in range(n_iterations):
    update = np.zeros(X_train.shape[1])
    y_pred = np.dot(X_train, theta)
    error = y_pred - y_train
    for j in range(n):
        update[j] = np.sum(error * (X_train.T)[j])
    theta = theta - (1/m)*(alpha)*update
    
print("Shape of theta: ", theta.shape)

predictions = np.dot(X_test, theta)

print("MAE: ", metrics.mean_absolute_error(y_true = y_test, y_pred = predictions))
print("MSE: ", metrics.mean_squared_error(y_true = y_test, y_pred = predictions))

Shape of theta:  (9,)
MAE:  0.5825368710295348
MSE:  0.6421915148455236
