Linear Regression (Supervised Machine Learning)

In [1]:
#Import Python Libraries
import numpy as np 
import pandas as pd 
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression 
from sklearn.metrics import r2_score, mean_squared_error

In [2]:
#Create a random 10 length array
array = np.array([1,2,3,4,5,6,7,8,9, 10])
print(array)

[ 1  2  3  4  5  6  7  8  9 10]


In [3]:
#Reshap array 
new_array = array.reshape(-1,1)
print(new_array)

[[ 1]
 [ 2]
 [ 3]
 [ 4]
 [ 5]
 [ 6]
 [ 7]
 [ 8]
 [ 9]
 [10]]


In [4]:
#Create a ones array
ones_array = np.ones((10,1))
print(ones_array)

[[1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]]


In [5]:
#Combine both arrays (new_array and ones_array)
X = np.hstack((new_array, ones_array))
print(X)

[[ 1.  1.]
 [ 2.  1.]
 [ 3.  1.]
 [ 4.  1.]
 [ 5.  1.]
 [ 6.  1.]
 [ 7.  1.]
 [ 8.  1.]
 [ 9.  1.]
 [10.  1.]]


In [6]:
#Convert combined array into dataframe (features, Inpute/Independent variables)
X_result = pd.DataFrame(X)
print(X_result)

      0    1
0   1.0  1.0
1   2.0  1.0
2   3.0  1.0
3   4.0  1.0
4   5.0  1.0
5   6.0  1.0
6   7.0  1.0
7   8.0  1.0
8   9.0  1.0
9  10.0  1.0


In [7]:
#Create another array and convert it into dataframe (Target, output, Dependent variable)
Y = pd.array([3,4,5,6,7,8,9,10,11,12])
Y_result = pd.DataFrame(Y)
print(Y_result)

    0
0   3
1   4
2   5
3   6
4   7
5   8
6   9
7  10
8  11
9  12


In [8]:
#Split data into train and test set
x_train, x_test, y_train, y_test = train_test_split(X_result, Y_result, test_size=0.2 , random_state=42)

In [9]:
#Create a model
model = LinearRegression()
model.fit(x_train,  y_train)

In [10]:
#Test a model
y_pred = model.predict(x_test)
print(y_test)
print(y_pred)

    0
8  11
1   4
[[11.]
 [ 4.]]


Manual/Internal process of Linear Regeression 

In [11]:
#Number of samples and features in train dataset
num_samples, num_features = x_train.shape
print(num_features)

2


In [12]:
#Generate a random weight/parameter
weight = np.random.rand(num_features, 1)
print(weight)

[[0.8619241 ]
 [0.39912289]]


In [13]:
#Predic H(x) = x * θ (θ = random weight/parameter that we assume)
y_pred = np.dot(x_train, weight)
print(y_pred)

[[5.57066749]
 [1.26104699]
 [7.29451569]
 [2.98489519]
 [9.01836389]
 [4.70874339]
 [3.84681929]
 [6.43259159]]


In [14]:
#Calculate gradient
m = len(x_train)
dw = 1/m * np.dot(x_train.T, y_pred - y_train)
print(dw)

[[-13.982670365084712]
 [-2.3602945618453317]]


In [15]:
#calculate gradient decent (weight update rule)
learning_rate = 0.01
weight = weight - learning_rate * dw
print(weight)

[[1.0017508036597595]
 [0.4227258337241028]]


In [16]:
#Create a class and put the above steps inside a method
class LinearRegressions:
    def __init__(self, num_features):
        self.num_iteration = 10000
        self.learning_rate = 0.01
        self.weight = np.random.rand(num_features, 1)

    def fit(self, x_train, y_train):
        for i in range(self.num_iteration):
            y_pred = np.dot(x_train, self.weight)
            dw = 1/2 * np.dot(x_train.T, y_pred - y_train)
            self.weight = self.weight - learning_rate * dw

    def predict(self, x_test):
        return np.dot(x_test, self.weight)


In [17]:
#Print result 
model = LinearRegressions(num_features)
model.fit(x_train, y_train)
y_predict = model.predict(x_test)
print(y_predict)
print(y_test)

[[11.000000000000004]
 [3.9999999999999902]]
    0
8  11
1   4


In [18]:
#Print result using inbuilt methods
mse = mean_squared_error(y_predict, y_test)
r2 = r2_score(y_predict, y_test)
print("MSE :", mse )
print("R2 :", r2 )

MSE : 5.403697200763931e-29
R2 : 1.0
