In [98]:
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.cross_validation import train_test_split
from sklearn.metrics import accuracy_score
import numpy as np
import math

## Load and Scale Data

In [132]:
iris = datasets.load_iris()

In [170]:
scaler = StandardScaler()
iris_scaled = scaler.fit_transform(iris.data)

data = iris_scaled
labels = [0 if target == 2 else 1 for target in iris.target]

In [171]:
# add bias to the data
bias_column = np.array([[1]*len(data)])
data = np.append(data, bias_column.T, axis=1)

In [172]:
train_X, test_X, train_y, test_y = train_test_split(data, labels, test_size = 0.25, random_state = 49)

## Logistic Regression Classifier

In [173]:
class LogisticRegressionClassifier(object):
    def __init__(self, n_iterations, learning_rate):
        self.n_iterations = n_iterations
        self.learning_rate = learning_rate
    
    def sigmoid(self, x):
        return 1.0/(1 + np.exp(-x))
    
    def init_weights(self):
        self.weights =  np.random.random_sample(self.n_attributes)
        print(self.weights)
    
    def calculate_penalty(self):
        self.H = self.predict(self.X)
        penalty = -np.sum(self.y*np.log(self.H) + (1 - self.y)*np.log(1-self.H)) / self.n_attributes
        return penalty
    
    def update_weights(self):
        delW = self.learning_rate*np.dot(self.y - self.H, self.X).T
        self.weights += delW
        print(self.weights)
        
    def predict(self, X, final=False):
        predictions =  self.sigmoid(np.dot(self.weights.T, X.T))
        if final:
            predictions = [round(pred) for pred in predictions]
        return predictions
    
    def train(self, X, y):
        self.X = np.array(X)
        self.y = np.array(y)
        self.n_attributes = self.X.shape[1]
        self.init_weights()
        
        for iteration in range(self.n_iterations):
            self.calculate_penalty()
            self.update_weights()
        
        

In [186]:
clf = LogisticRegressionClassifier(n_iterations=10, learning_rate=0.1)

In [187]:
clf.train(train_X, train_y)

[ 0.57498366  0.02244312  0.54454813  0.3192111   0.81237296]
[-5.71004411  1.31689012 -6.22266677 -6.54591801  1.3101815 ]
[-4.70194091  0.48199002 -5.18840246 -5.8332211   3.83758897]
[-3.5950598   0.12475908 -4.37618117 -5.25387361  5.53481319]
[-2.73832614  0.06251635 -3.89210179 -4.91212106  6.50159401]
[-2.31969075  0.32578553 -3.87843793 -4.93978287  6.66076698]
[-2.02091081  0.58354641 -3.93856044 -5.0232358   6.6897154 ]
[-1.76492315  0.78651238 -3.99047575 -5.09477716  6.72034087]
[-1.54633057  0.94779552 -4.03977176 -5.16068387  6.74744477]
[-1.35761476  1.07656999 -4.08726136 -5.2225068   6.77226454]
[-1.19306507  1.18025101 -4.13357602 -5.28124385  6.79557735]


In [188]:
predictions = clf.predict(test_X, True)

In [189]:
accuracy_score(test_y, predictions)

0.94736842105263153