In [16]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
import numpy as np
import matplotlib.pyplot as plt


iris = load_iris()
iris_data = iris.data
iris_target = iris.target

train, test, train_labels, test_labels = train_test_split(iris_data, iris_target, test_size = 0.2)


# a = train[train_labels == 0][:,:2]
# a_labels = train_labels[train_labels == 0]
# b = train[train_labels == 1][:,:2]
# b_labels = train_labels[train_labels == 1]

# train = np.concatenate((a, b))
# labels = np.concatenate((a_labels, b_labels))

# shuffle_index = np.random.permutation(79)
# Train, Train_labels = train[shuffle_index], labels[shuffle_index]

# plt.scatter(Train[Train_labels == 1][:,1], Train[Train_labels == 1][:,2])
# plt.scatter(Train[Train_labels == 0][:,1], Train[Train_labels == 0][:,2])
# plt.show()

In [19]:
# from  sklearn import datasets
# nist = datasets.load_digits()
# train, test, train_labels, test_labels = train_test_split(np.float32(nist.data), nist.target, test_size = 0.2)

In [4]:
import random
from sklearn.base import BaseEstimator, TransformerMixin
def sign(x):
    '''
    Function that return 1 if value is grater than 0
    and -1 in other case.
    param: x: value
    return: -1, 1
    '''
    if x >= 0:
        return 1
    else:
        return -1
    
def prepare_data(class_to_clf):
    '''
    Function that prepere data to classification.
    param number: number to classify.
    return: labels of training and test data.
    '''
    if class_to_clf not in np.unique(train_labels):
        raise ValueError('Class not found')
    y = np.ones_like(train_labels)
    y[train_labels == class_to_clf] = 1
    y[train_labels != class_to_clf] = -1
    
    y_test = np.ones_like(test_labels)
    y_test[test_labels == class_to_clf] = 1
    y_test[test_labels != class_to_clf] = -1
    
    return y, y_test


class my_Percepron(BaseEstimator, TransformerMixin):
    '''
    Class that represent single perceptron.
    '''
    def __init__(self, lr = 0.1):
        '''
        param: lr: learning rate.
        '''
        self.lr = lr

    def fit(self, train, target):
        '''
        Function that train data - search for best weights and bias.
        param: train: training data.
        param: target: labels of training data.
        '''
        self.W = np.random.rand(len(train[0])+1)         # Initialization random weights.
        X = np.hstack((np.ones((len(train), 1)), train)) # Initialization bias as index 0 of training data.
        misclasified = True
        iterations = 0
        while misclasified and iterations < 100:
            iterations +=1
            misclasified = False
            for xi, yi in zip(X, target):   
                value = sign(xi.dot(self.W))             # Dot product of X[i] and weights, closed in sign function, 
                error = yi - value                       # Error = desired - our_value, possible cases: 1 - 1 = 0,
                if error != 0:
                    misclasified = True
                self.W += self.lr*error*xi               # 1 - (-1) = 2, -1 - 1 = -2, -1 - (-1) = 0
                
        return iterations
                    
    def predict(self, data):
        '''
        Function that predicts value of data.
        param: data: data to predict.
        return: weighted sum of data.
        '''
        data = np.insert(data, 0, 1)
        value = data.dot(self.W)
        return np.array(value)
    
    def score(self, test, labels):
        '''
        Function that return score of prediction.
        param: test: test data.
        param: labels: labels of test data.
        return score of prediction.
        '''
        predicted = []
        for i in test:
            predicted.append(sign(self.predict(i)))
#         print('Weights:', self.W[1:])
#         print('Bias: ', self.W[0])
        print('Number of wrong classsified examples: ', np.sum(predicted != labels))
        return np.sum(predicted == labels) / len(predicted)
    
    def return_W_B(self):
        '''
        Function that return weights and bias.
        '''
        return self.W[1:], self.W[0]
    
bp = my_Percepron()
number = 2
y, y_test = prepare_data(number)
print('How many iterations need to train:', bp.fit(train, y))
print('Score: ', bp.score(test, y_test)*100, '%')
print('---------------------------------------------------------')
print(bp.return_W_B())

How many iterations need to train: 100
Number of wrong classsified examples:  0
Score:  100.0 %
---------------------------------------------------------
(array([-14.82340419,  -6.85414502,  18.87390113,  22.50100397]), -14.573626350553646)


In [7]:
from sklearn.model_selection import cross_val_score

cross_val_score(bp, test, y_test, cv = 5)

Number of wrong classsified examples:  0
Number of wrong classsified examples:  0
Number of wrong classsified examples:  0
Number of wrong classsified examples:  0
Number of wrong classsified examples:  0


array([ 1.,  1.,  1.,  1.,  1.])

In [8]:
class OVAClf(BaseEstimator, TransformerMixin):
    '''
    Class that represent 10 binary Perceptrons that recognizes hand written digits.
    '''
    def fit(self, train, labels):
        '''
        Function that train 10 single perceptron for every number.
        param: train: training data.
        param: labels: labels of training data
        '''
        self.perceptrons = [my_Percepron() for i in range(len(np.unique(labels)))]
        for index, perceptron in enumerate(self.perceptrons):
            y, y_test = prepare_data(index)
            perceptron.fit(train, y)
            
    def predict(self, number):
        '''
        Function that predict label of data.
        param: number: data to predict.
        return: Array with propability of every number.
        '''
        propab = []
        for perceptron in self.perceptrons:
            guess_ = perceptron.predict(number)
            propab.append(guess_)
        propab = np.argmax(np.array(propab))
        return propab
    
    def score(self, data, labels):
        '''
        Function that calculate score of good classified examples.
        param: data: data to classify.
        param: labels of data.
        return: score of good classified examples.
        '''
        propab = []
        for i in range(len(data)):
            propab.append([])
            for perceptron in self.perceptrons:
                guess_ = perceptron.predict(data[i])
                propab[i].append(guess_)
            propab[i] = np.argmax(np.array(propab[i]))
        print('Number of wrong classsified examples: ', np.sum(propab != labels))
        return np.sum(propab == labels) / len(labels)
       

In [13]:
ovaclf = OVAClf()
ovaclf.fit(train, train_labels)
ovaclf.score(test,test_labels)

Number of wrong classsified examples:  3


0.90000000000000002

In [17]:
cross_val_score(ovaclf, test, test_labels)

Number of wrong classsified examples:  6
Number of wrong classsified examples:  5
Number of wrong classsified examples:  5


array([ 0.4,  0.5,  0.5])