Linear Regression (Supervised Machine Learning)

In [91]:
#Import Python Libraries
import numpy as np 
import pandas as pd 
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression 
from sklearn.metrics import r2_score, mean_squared_error

In [92]:
#Create a random 10 length array
array = np.array([1,2,3,4,5,6,7,8,9, 10])
print(array)

[ 1  2  3  4  5  6  7  8  9 10]


In [93]:
#Reshap array 
new_array = array.reshape(-1,1)
print(new_array)

[[ 1]
 [ 2]
 [ 3]
 [ 4]
 [ 5]
 [ 6]
 [ 7]
 [ 8]
 [ 9]
 [10]]


In [94]:
#Create a ones array
ones_array = np.ones((10,1))
print(ones_array)

[[1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]]


In [95]:
#Combine both arrays (new_array and ones_array)
X = np.hstack((new_array, ones_array))
print(X)

[[ 1.  1.]
 [ 2.  1.]
 [ 3.  1.]
 [ 4.  1.]
 [ 5.  1.]
 [ 6.  1.]
 [ 7.  1.]
 [ 8.  1.]
 [ 9.  1.]
 [10.  1.]]


In [96]:
#Convert combined array into dataframe (features, Input/Independent variables)
X_result = pd.DataFrame(X)
print(X_result)

      0    1
0   1.0  1.0
1   2.0  1.0
2   3.0  1.0
3   4.0  1.0
4   5.0  1.0
5   6.0  1.0
6   7.0  1.0
7   8.0  1.0
8   9.0  1.0
9  10.0  1.0


In [97]:
#Create another array and convert it into dataframe (Target, output, Dependent variable)
Y = pd.array([0,0,0,0,0,1,1,1,1,1])
Y_result = pd.DataFrame(Y)
print(Y_result)

   0
0  0
1  0
2  0
3  0
4  0
5  1
6  1
7  1
8  1
9  1


In [98]:
#Split data into train and test set
x_train, x_test, y_train, y_test = train_test_split(X_result, Y_result, test_size=0.2 , random_state=42)

In [99]:
#Create a model
model = LogisticRegression()
model.fit(x_train,  y_train)

  y = column_or_1d(y, warn=True)


In [100]:
#Test a model
y_pred = model.predict(x_test)
print(y_test)
print(y_pred)

   0
8  1
1  0
[1. 0.]


Manual/Internal process of Linear Regeression 

In [101]:
#Number of samples and features in train dataset
num_samples, num_features = x_train.shape
print(num_features)

2


In [102]:
#Generate a random weight/parameter
weight = np.random.rand(num_features, 1)
print(weight)

[[0.34925939]
 [0.102565  ]]


In [103]:
#Predic H(x) = x * θ (θ = random weight/parameter that we assume)
y_pred = np.dot(x_train, weight)
print(y_pred)

[[2.19812136]
 [0.45182439]
 [2.89664014]
 [1.15034318]
 [3.59515893]
 [1.84886197]
 [1.49960257]
 [2.54738075]]


In [104]:
#Calculate gradient
m = len(x_train)
dw = 1/m * np.dot(x_train.T, y_pred - y_train)
print(dw)

[[9.786334740672482]
 [1.5234916621602403]]


In [105]:
#calculate gradient decent (weight update rule)
learning_rate = 0.01
weight = weight - learning_rate * dw
print(weight)

[[0.2513960455299871]
 [0.08733008438672225]]


In [106]:
#Gradient Descent Looping
num_iterations = 1000
learning_rate = 0.01

for i in range(num_iterations):
    y_pred = np.dot(x_train, weight)
    dw = 1/m*np.dot(x_train.T, y_pred-y_train)
    weight = weight - learning_rate*dw

print(weight)

[[0.14591346780268044]
 [-0.29063767690419495]]


In [107]:
#Methods to predict the new data
def predict(X, weights):
    return np.dot(X, weights)

y_pred = predict(x_test, weight)
print(y_pred)
print(y_test)

[[1.022583533319929]
 [0.0011892587011659295]]
   0
8  1
1  0


In [110]:
#Create a class and put the above steps inside a method
class LogisticRegressions:
    def __init__(self, num_features):
        self.num_iteration = 10000
        self.learning_rate = 0.01
        self.weight = np.random.rand(num_features, 1)

    def sigmoid(self, Z):
        Z = np.divide(1,np.add(1, np.exp(-Z)))
        return np.where(Z >= 0.5, 1, 0);

    def fit(self, x_train, y_train):
        for i in range(self.num_iteration):
            y_pred = np.dot(x_train, self.weight)
            y_pred = self.sigmoid(y_pred)
            dw = 1/m * np.dot(x_train.T, y_pred - y_train)
            self.weight = self.weight - learning_rate * dw

    def predict(self, x_test):
        return self.sigmoid(np.dot(x_test, self.weight))


In [111]:
#Print result 
model = LogisticRegressions(num_features)
model.fit(x_train, y_train)
y_predict = model.predict(x_test)
print(y_predict)
print(y_test)

TypeError: loop of ufunc does not support argument 0 of type float which has no callable exp method

In [None]:
#Print result using inbuilt methods
mse = mean_squared_error(y_predict, y_test)
r2 = r2_score(y_predict, y_test)
print("MSE :", mse )
print("R2 :", r2 )

MSE : 0.0018579072532697567
R2 : 0.9937011841773752
