In [52]:
import numpy as np
import pandas as pd
from math import exp
from random import randrange
import matplotlib.pyplot as plt

class LogisticRegression():
    
    def __init__(self):
        self.X_train = []
        self.y_train = []
        self.epochs = 1000
        self.alpha = 0.001
        self.parameters = []
        self.X = []
        self.y = []
        self.x_minmax = []
        self.y_minmax = []
        
    def min_max(self):
        
        for i in range(len(self.X[0])):
            col_val = [j[i] for j in self.X]
            min_value = min(col_val)
            max_value = max(col_val)
            self.x_minmax.append([min_value, max_value])
        
        y_min_value = min(self.y)
        y_max_value = max(self.y)
        self.y_minmax.append([y_min_value,y_max_value])
        
    
    def normalization(self):
        
        for i in range(len(self.X)):
            for j in range(len(self.X[0])):
                numerator = self.X[i][j] - self.x_minmax[j][0]
                denominator = self.x_minmax[j][1] - self.x_minmax[j][0]
                self.X[i][j] = numerator/denominator
            
            numerator = self.y[i] - self.y_minmax[0][0]
            denominator = self.y_minmax[0][1] - self.y_minmax[0][0]
            self.y[i] = numerator/denominator
    
    
    # Evaluate an algorithm using a train/test split
    def train_test_split(self,X, y, split):
        self.X = X
        self.y = y
        self.y = self.y.astype(float)
        self.min_max()
        self.normalization()
        
        X_train = list()
        y_train = list()
        train_size = split * len(X)
        X_test = list(self.X)
        y_test = list(self.y)
        
        while len(X_train) < train_size:
            index = randrange(len(X_test))
            X_train.append(X_test.pop(index))
            y_train.append(y_test.pop(index))
        
        return X_train,X_test,y_train,y_test
    
    
    def accuracy_check(self, pred, actual):
        correct = 0
        
        for i in range(len(actual)):
            if(pred[i] == actual[i]):
                correct = correct + 1
        
        accuracy = (correct/len(actual))
        return accuracy
    
    
    def score(self,X_test,y_test):
        predictions = []
        unique_value = np.unique(y_test)
        
        for i in X_test:
            value_difference = []
            pred = self.prediction(i,self.parameters)
            for j in unique_value:
                value_difference.append(abs(j-pred))
            for j in range(len(value_difference)):
                if value_difference[j] == min(value_difference):
                    predictions.append(unique_value[j])
        
        accuracy = self.accuracy_check(predictions,y_test)
        return "{:.2f}".format(float(accuracy))

    
    def prediction(self, row, parameters):
        hypothesis = parameters[0]
        
        for i in range(len(row)):
            hypothesis = hypothesis + row[i]*parameters[i+1]
        
        return 1/(1+exp(-hypothesis))
    
    
    def cost_function(self, parameters):
        cost = 0
        
        for i in range(len(self.X_train)):
            pred = self.prediction(self.X_train[i], parameters)
            y = self.y_train[i]
            cost = cost + (-(y*np.log(pred)) + (-(1-y)*np.log(1-pred)))
        average_cost = cost/len(self.X_train)
        
        return average_cost
    
    
    def gradient_descent(self):
        parameters = [0] * (len(self.X_train[0])+1)
        cost_history = []
        
        for i in range(self.epochs):
            for j in range(len(self.X_train)):
                pred = self.prediction(self.X_train[j],parameters)
                parameters[0] = parameters[0] - self.alpha*(pred - self.y_train[j])
                for k in range(len(self.X_train[j])):
                    parameters[k+1] = parameters[k+1] - self.alpha*(pred-self.y_train[j])*self.X_train[j][k]
            cost_history.append(self.cost_function(parameters))
        
        return cost_history,parameters
    
    
    def fit(self, X_train, y_train):
        
        self.X_train = X_train
        self.y_train = y_train
        cost_history,parameters = self.gradient_descent()
        self.parameters = parameters
        
        
    def predict(self,row):
        value_difference = []
        
        for j in range(len(row)):
            numerator = row[j] - self.x_minmax[j][0]
            denominator = self.x_minmax[j][1] - self.x_minmax[j][0]
            row[j] = numerator/denominator
        
        value = self.prediction(row,self.parameters)
        unique_value = np.unique(self.y)
        
        for j in unique_value:
            value_difference.append(abs(j-value))
        
        for j in range(len(value_difference)):
            if value_difference[j] == min(value_difference):
                value = unique_value[j]

        denominator = self.y_minmax[0][1] - self.y_minmax[0][0]
        value = (value * denominator) + self.y_minmax[0][0]
        
        return int(value)
    
if __name__ == "__main__":
    
    from sklearn.datasets import load_iris
    iris = load_iris()
    X = iris.data
    y = iris.target
    
    model = LogisticRegression()
    X_train,X_test,y_train,y_test  = model.train_test_split(X, y, .8)
    model.fit(X_train,y_train)
    print(model.score(X_test,y_test))
    print(model.predict([5.2, 4.1, 1.5, 0.1]))
    print(model.predict([6.4, 3.2, 4.5, 1.5]))
    print(model.predict([6.2, 3.4, 5.4, 2.3]))

0.97
0
1
2
