In [159]:
from sklearn.datasets import load_iris
import time
#import matplotlib.pyplot as plt
#from mpl_toolkits.mplot3d import Axes3D
from sklearn import datasets
#from sklearn.decomposition import PCA
import numpy as np
import pandas as pd
#import matplotlib.pyplot as plt
#plt.rcParams['figure.figsize'] = (20.0, 10.0)

iris = datasets.load_iris()
X = iris.data[:, :2]
#X = (X - np.mean(X, axis = 0))/X.std()  # normalization

y = (iris.target != 0) * 1
#theta = np.zeros((X.shape[1], 1)) 
theta = np.zeros(X.shape[1])


$h_\theta(x) = g(\theta^Tx)$

$z = \theta^Tx$

$g(z) = \frac{1}{1 - e^{-z}}$

In [164]:
def sigmoid(z):
    return 1 / (1 + np.exp(-z))
z = np.dot(X, theta)
h = sigmoid(z)

$h = g(x\theta)$

$J(\theta) = \frac{1}{m}(-y^Tlog(h) - (1-y)^Tlog(1-h)$

In [165]:
def loss(h, y):
    return (-y * np.log(h) - (1 - y) * np.log(1 - h)).mean()

$\frac{\partial J(\theta)}{\partial \theta_j} = \frac{1}{m} X^T (g(X\theta)-y)$

In [None]:
gradient = np.dot(X.T, (h - y)) / y.shape[0]

Then we update the weights by substracting to them the derivative times the learning rate.

In [None]:
lr = 0.01
theta -= lr * gradient

# Predictions
By calling the sigmoid function we get the probability that some input x belongs to class 1. Let’s take all probabilities ≥ 0.5 = class 1 and all probabilities < 0 = class 0. This threshold should be defined depending on the business problem we were working.

In [166]:
def predict_probs(X, theta):
    return sigmoid(np.dot(X, theta))
def predict(X, theta, threshold=0.5):
    return predict_probs(X, theta) >= threshold

# Putting it all together

In [160]:
class LogisticRegression1:
    def __init__(self, lr=0.01, num_iter=100000, fit_intercept=False, verbose=False):
        self.lr = lr
        self.num_iter = num_iter
        self.fit_intercept = fit_intercept
        self.theta = np.zeros(X.shape[1])
        
    def __add_intercept(self, X):
        intercept = np.ones((X.shape[0], 1))
        return np.concatenate((intercept, X), axis=1)
    
    def __sigmoid(self, z):
        return 1 / (1 + np.exp(-z))
    def __loss(self, h, y):
        return (-y * np.log(h) - (1 - y) * np.log(1 - h)).mean()
    
    def fit(self, X, y):
        if self.fit_intercept:
            X = self.__add_intercept(X)
        
       
                # weights initialization
        #self.theta = np.zeros(X.shape[1])
        for i in range(self.num_iter):
            z = np.dot(X, self.theta)
            h = self.__sigmoid(z)
            gradient = np.dot(X.T, (h - y)) / y.size
            self.theta -= self.lr * gradient
            
            if(self.verbose == True and i % 10000 == 0):
                z = np.dot(X, self.theta)
                h = self.__sigmoid(z)
                print(f'loss: {self.__loss(h, y)} \t')
    
    def predict_prob(self, X):
        if self.fit_intercept:
            X = self.__add_intercept(X)
    
        return self.__sigmoid(np.dot(X, self.theta))
    
    def predict(self, X, threshold=0.5):
        return self.predict_prob(X) >= threshold
   

In [161]:
model1 = LogisticRegression1()

In [162]:
preds = model1.predict(X)

In [163]:
(preds == y).mean()

0.6666666666666666

# Exercise 1
Repeat this code by using the intercept term

In [120]:
from sklearn.datasets import load_iris
import time
#import matplotlib.pyplot as plt
#from mpl_toolkits.mplot3d import Axes3D
from sklearn import datasets
#from sklearn.decomposition import PCA
import numpy as np
import pandas as pd
#import matplotlib.pyplot as plt
#plt.rcParams['figure.figsize'] = (20.0, 10.0)

iris = datasets.load_iris()
X = iris.data[:, :2]

y = (iris.target != 0) * 1

m = len(y)
X0 = np.ones(m)
Xpre = X
X = np.concatenate((intercept, X), axis=1)
#X = np.vstack([X0.T,X.T])
#X = X.T

#theta = np.zeros((X.shape[1], 1)) 
theta = np.zeros(X.shape[1])

In [111]:
theta.shape

(3,)

In [106]:
intercept = np.ones((X.shape[0], 1))
intercept.shape

(150, 1)

# Exercise 2
Use built function to conduct optimization

In [12]:
from sklearn.datasets import load_iris
import time
from scipy.optimize import fmin_tnc
#import matplotlib.pyplot as plt
#from mpl_toolkits.mplot3d import Axes3D
from sklearn import datasets
#from sklearn.decomposition import PCA
import numpy as np
import pandas as pd
#import matplotlib.pyplot as plt
#plt.rcParams['figure.figsize'] = (20.0, 10.0)

iris = datasets.load_iris()
X = iris.data[:, :2]
#X = (X - np.mean(X, axis = 0))/X.std()  # normalization

y = (iris.target != 0) * 1
#theta = np.zeros((X.shape[1], 1)) 
theta = np.zeros(X.shape[1])

In [13]:
def sigmoid(x):
    # Activation function used to map any real value between 0 and 1
    return 1 / (1 + np.exp(-x))

def net_input(theta, x):
    # Computes the weighted sum of inputs
    return np.dot(x, theta)

def probability(theta, x):
    # Returns the probability after passing through sigmoid
    return sigmoid(net_input(theta, x))

In [14]:
def cost_function(theta, x, y):
    # Computes the cost function for all the training samples
    m = x.shape[0]
    total_cost = -(1 / m) * np.sum(
        y * np.log(probability(theta, x)) + (1 - y) * np.log(
            1 - probability(theta, x)))
    return total_cost

def gradient(theta, x, y):
    # Computes the gradient of the cost function at the point theta
    m = x.shape[0]
    return (1 / m) * np.dot(x.T, sigmoid(net_input(theta,   x)) - y)

In [15]:

def fit(x, y, theta):
    opt_weights = fmin_tnc(func=cost_function, x0=theta,
                  fprime=gradient,args=(x, y.flatten()))
    return opt_weights[0]
parameters = fit(X, y, theta)

In [16]:
print(parameters)

[  7.57842852 -13.22112074]


In [18]:
def predict(x):
    theta = parameters[:, np.newaxis]
    return probability(theta, x)
def accuracy( x, actual_classes, probab_threshold=0.5):
    predicted_classes = (predict(x) >= 
                         probab_threshold).astype(int)
    predicted_classes = predicted_classes.flatten()
    accuracy = np.mean(predicted_classes == actual_classes)
    return accuracy * 100
accuracy(X, y.flatten())

99.33333333333333

In [118]:
model1 = LogisticRegression1()

In [127]:
preds = model1.predict(Xpre,0.5)
(preds == y).mean()

0.6666666666666666

### Exercise 3:
Use scikit-learng to conduct logistic regression

In [167]:
from sklearn.linear_model import LogisticRegression
model = LogisticRegression()
model.fit(X, y)
preds = model.predict(X)
score = model.score(X, y)
print(score)
model.intercept_, model.coef_

1.0


(array([1.29953553]), array([[ 3.88395951, -3.41339666]]))

In [168]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score 
model = LogisticRegression()
model.fit(X, y)
predicted_classes = model.predict(X)
accuracy = accuracy_score(y.flatten(),predicted_classes)
parameters = model.coef_
print(accuracy)

1.0
