In [16]:
import numpy as np
import pandas as pd
import seaborn as sns
import random
import matplotlib.pyplot as plt
import sklearn.datasets
import time
from numpy import (array, dot, arccos, clip)
from numpy.linalg import norm
from sklearn import datasets
from sklearn.datasets import (make_classification, make_blobs, make_gaussian_quantiles)
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.svm import LinearSVC

In [165]:
np.random.seed(66) #Set seed

#Generating random data sets
X, y = make_classification(n_samples=5000, n_features=100, n_informative=2, n_redundant=0, 
                                            n_repeated=0, n_classes=2, n_clusters_per_class=2, weights=None, 
                                            flip_y=0.01, class_sep=10, hypercube=True, shift=0.0, scale=1.0, 
                                            shuffle=True, random_state=None)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)

In [166]:
#Assigning the training data to variables

data = X_train
target = y_train
rows, columns = data.shape

In [167]:
#defining a function for the projections onto each of the ith sets
def altProj(x, i):
    separation_vector = x + (min(0, target[i] - data[i, :] @ x))*(data[i] / (norm(data[i])**2))
    
    return separation_vector

In [168]:
#The cyclic projection algorithm

duration = []
for t in range(10):
    start_time = time.time()
    tol = 1e-4               #tolerance level
    iterate = 0              #initialisation
    iterate_counter = []     #initialisation
    norm_counter = []        #initialisation
    residual = 5

    x_start = np.random.rand(columns)   #starting point
    norm_counter.append(norm(x_start))
    iterate_counter.append(0)
    while residual > tol:
        for row in range(rows):
            y = altProj(x_start, row)      #projection onto each rows
            x_start = y
        iterate += 1
        iterate_counter.append(iterate)
        norm_counter.append(norm(x_start))
        residual = abs(norm_counter[-1] - norm_counter[-2])   #Difference in norm of consecutive resultant vectors

    p = (time.time() - start_time)
    duration.append(round(p, 4))

In [169]:
average_duration = np.mean(duration)
maximum_duration = np.max(duration)
minimum_duration = np.min(duration)

In [170]:
#Converting classified arrays to binary arrays

predict_array = data@x_start

def array_to_binary(array):
    count_one = 0
    count_zero = 0
    binary_array = []

    for elements in array:
        if elements >= 0:
            output = 1
            binary_array.append(output)
            count_one += 1
        else:
            output = 0
            binary_array.append(output)
            count_zero +=1
    
    return binary_array

predicted_class = array_to_binary(predict_array)

In [171]:
#######TRAINING
##Performance measure of the algorithm

def projection_accuracy(target, prediction):
    right = 0
    for object in range(len(target)):
        if target[object] == prediction[object]:
            right += 1
    classification_accuracy = right / float(len(target))
    return classification_accuracy

classification_accuracy = projection_accuracy(target, predicted_class)
print("The Percentage Training Accuracy is {:.4f}".format(classification_accuracy))

The Percentage Training Accuracy is 0.9745


In [172]:
#######TEST
##Performance measure of the algorithm

predict_array_test = X_test@x_start

predicted_class_test = array_to_binary(predict_array_test)

def projection_accuracy(y_test, predicted_class_test):
    right = 0
    for object in range(len(y_test)):
        if y_test[object] == predicted_class_test[object]:
            right += 1
    classification_accuracy = right / float(len(y_test))
    return classification_accuracy
classification_accuracy2 = projection_accuracy(y_test, predicted_class_test)
print("The Percentage Test Accuracy is: {:.4f}".format(classification_accuracy2))

The Percentage Test Accuracy is: 0.9680


In [173]:
print('Convergence after {} iterations'.format(iterate))
print('Duration to Convergence:')
print("[{0}]".format(', '.join(map(str, duration))))
print('The Average Duration to Convergence {} seconds'.format(average_duration))
print('The Minimum Duration to Convergence {} seconds'.format(minimum_duration))
print('The Maximum Duration to Convergence {} seconds'.format(maximum_duration))
print("The Training Accuracy is {:.4f}".format(classification_accuracy))
print("The Test Accuracy is: {:.4f}".format(classification_accuracy2))

Convergence after 21 iterations
Duration to Convergence:
[1.4151, 1.5536, 1.0624, 1.7291, 2.7408, 1.3245, 1.1308, 1.5116, 1.3765, 1.2709]
The Average Duration to Convergence 1.51153 seconds
The Minimum Duration to Convergence 1.0624 seconds
The Maximum Duration to Convergence 2.7408 seconds
The Training Accuracy is 0.9745
The Test Accuracy is: 0.9680
