In [4]:
import numpy as np
import seaborn as sns
import pandas as pd
from sklearn import datasets

In [5]:
iris = datasets.load_iris()
X = iris.data[:, :2]
y = (iris.target != 0) * 1

In [17]:
class LogisticRegression:
    def __init__(self, lr=0.01, num_iter=100000, fit_intercept=True, verbose=False):
        self.lr = lr
        self.num_iter = num_iter
        self.fit_intercept = fit_intercept
        self.verbose = verbose
    
    def __add_intercept(self, X):
        intercept = np.ones((X.shape[0], 1))
        return np.concatenate((intercept, X), axis=1)
    
    def __sigmoid(self, z):
        return 1 / (1 + np.exp(-z))
    
    
    def __loss(self, h, y):
        return (-y * np.log(h) - (1 - y) * np.log(1 - h)).mean()
    
    
    def fit(self, X, y):
        if self.fit_intercept:
            X = self.__add_intercept(X)
        
        # weights initialization
        self.theta = np.zeros(X.shape[1])
        
        for i in range(self.num_iter):
            z = np.dot(X, self.theta)
            h = self.__sigmoid(z)
            gradient = np.dot(X.T, (h - y)) / y.size
            self.theta -= self.lr * gradient
            
            if(self.verbose == True and i % 10000 == 0):
                z = np.dot(X, self.theta)
                h = self.__sigmoid(z)
                print(f'loss: {self.__loss(h, y)} \t')
    
    def predict_prob(self, X):
        if self.fit_intercept:
            X = self.__add_intercept(X)
    
        return self.__sigmoid(np.dot(X, self.theta))
    
    
    def predict(self, X, threshold):
        return self.predict_prob(X) >= threshold

In [18]:
model = LogisticRegression(lr=0.1, num_iter=300000)
%time model.fit(X, y)

CPU times: user 3 s, sys: 0 ns, total: 3 s
Wall time: 3 s


In [20]:
preds = model.predict(X, 0.5)
# accuracy
(preds == y).mean()

1.0

In [8]:
from random import randrange
 
# Split a dataset into k folds

def cross_validation_split(dataset, folds=3):
    dataset_split = list()
    dataset_copy = list(dataset)
    fold_size = int(len(dataset) / folds)
    for i in range(folds):
        fold = list()
        while len(fold) < fold_size:
            index = randrange(len(dataset_copy))
            fold.append(dataset_copy.pop(index))
            dataset_split.append(fold)
    return dataset_split
 
# test cross validation split
#seed(1)
#dataset = [[1], [2], [3], [4], [5], [6], [7], [8], [9], [10]]
folds = cross_validation_split(iris, 5)
print(folds)

[['filename'], ['DESCR'], ['target'], ['data']]


In [16]:
x=cross_validation_split(np.array([[1,2],[2,3],[4,5],[5,6]]), folds=3)

In [18]:
x

[[array([1, 2])], [array([5, 6])], [array([4, 5])]]