Linear Regression (Supervised Machine Learning)

In [62]:
#Import Python Libraries
import numpy as np 
import pandas as pd 
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression 
from sklearn.metrics import r2_score, mean_squared_error

In [63]:
#Create a random 10 length array
array = np.array([1,2,3,4,5,6,7,8,9, 10])
print(array)

[ 1  2  3  4  5  6  7  8  9 10]


In [64]:
#Reshap array 
new_array = array.reshape(-1,1)
print(new_array)

[[ 1]
 [ 2]
 [ 3]
 [ 4]
 [ 5]
 [ 6]
 [ 7]
 [ 8]
 [ 9]
 [10]]


In [65]:
#Create a ones array
ones_array = np.ones((10,1))
print(ones_array)

[[1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]]


In [66]:
#Combine both arrays (new_array and ones_array)
X = np.hstack((new_array, ones_array))
print(X)

[[ 1.  1.]
 [ 2.  1.]
 [ 3.  1.]
 [ 4.  1.]
 [ 5.  1.]
 [ 6.  1.]
 [ 7.  1.]
 [ 8.  1.]
 [ 9.  1.]
 [10.  1.]]


In [67]:
#Convert combined array into dataframe (features, Input/Independent variables)
X_result = pd.DataFrame(X)
print(X_result)

      0    1
0   1.0  1.0
1   2.0  1.0
2   3.0  1.0
3   4.0  1.0
4   5.0  1.0
5   6.0  1.0
6   7.0  1.0
7   8.0  1.0
8   9.0  1.0
9  10.0  1.0


In [68]:
#Create another array and convert it into dataframe (Target, output, Dependent variable)
Y = pd.array([0,0,0,0,0,1,1,1,1,1])
Y_result = pd.DataFrame(Y)
print(Y_result)

   0
0  0
1  0
2  0
3  0
4  0
5  1
6  1
7  1
8  1
9  1


In [69]:
#Split data into train and test set
x_train, x_test, y_train, y_test = train_test_split(X_result, Y_result, test_size=0.2 , random_state=42)

In [70]:
#Create a model
model = LogisticRegression()
model.fit(x_train,  y_train)

  y = column_or_1d(y, warn=True)


In [71]:
#Test a model
y_pred = model.predict(x_test)
print(y_test)
print(y_pred)

   0
8  1
1  0
[1. 0.]


Manual/Internal process of Linear Regeression 

In [72]:
#Number of samples and features in train dataset
num_samples, num_features = x_train.shape
print(num_features)

2


In [73]:
#Generate a random weight/parameter
weight = np.random.rand(num_features, 1)
print(weight)

[[0.82812399]
 [0.69545274]]


In [74]:
#Predic H(x) = x * θ (θ = random weight/parameter that we assume)
y_pred = np.dot(x_train, weight)
print(y_pred)

[[5.66419667]
 [1.52357672]
 [7.32044465]
 [3.1798247 ]
 [8.97669262]
 [4.83607268]
 [4.00794869]
 [6.49232066]]


In [75]:
#For logistic regression, final prediciton is made using sigmoid function
def sigmoid(Z):
    Z = np.divide(1,np.add(1, np.exp(-Z)))
    return np.where(Z >= 0.5, 1, 0);

final_y_pred = sigmoid(y_pred)
print(final_y_pred)

[[1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]
 [1]]


In [76]:
#Calculate gradient
m = len(x_train)
dw = 1/m * np.dot(x_train.T, final_y_pred - y_train)
print(dw)

[[1.625]
 [0.5  ]]


In [77]:
#calculate gradient decent (weight update rule)
learning_rate = 0.01
weight = weight - learning_rate * dw
print(weight)

[[0.81187399]
 [0.69045274]]


In [82]:
#Gradient Descent Looping
num_iterations = 1000
learning_rate = 0.01

for i in range(num_iterations):
    y_pred = np.dot(x_train, weight)
    y_pred = sigmoid(y_pred)
    dw = 1/m*np.dot(x_train.T, y_pred-y_train)
    weight = weight - learning_rate*dw

print(weight)

[[ 0.0458663 ]
 [-0.23525026]]


In [83]:
#Methods to predict the new data
def predict(X, weights):
    return sigmoid(np.dot(X, weights))

y_pred = predict(x_test, weight)
print(y_pred)
print(y_test)

[[1]
 [0]]
   0
8  1
1  0


In [84]:
#Create a class and put the above steps inside a method
class LogisticRegressions:
    def __init__(self, num_features):
        self.num_iteration = 1000
        self.learning_rate = 0.01
        self.weight = np.random.rand(num_features, 1)

    def sigmoid(self, Z):
        Z = np.divide(1,np.add(1, np.exp(-Z)))
        return np.where(Z >= 0.5, 1, 0);

    def fit(self, x_train, y_train):
        for i in range(self.num_iteration):
            y_pred = np.dot(x_train, self.weight)
            y_pred = self.sigmoid(y_pred)
            dw = 1/m * np.dot(x_train.T, y_pred - y_train)
            self.weight = self.weight - learning_rate * dw

    def predict(self, x_test):
        return self.sigmoid(np.dot(x_test, self.weight))


In [85]:
#Predict using manual process
model = LogisticRegressions(num_features)
model.fit(x_train, y_train)
y_predict = model.predict(x_test)
print(y_predict)
print(y_test)

[[1]
 [0]]
   0
8  1
1  0
