# Iris dataset experiment

### 1. Import libraries

In [None]:
from sklearn import datasets
import auxiliary_fun as a
import numpy as np
import perceptron as p
import random
import matplotlib.pyplot as plt
from matplotlib import pyplot
from mpl_toolkits.mplot3d import Axes3D
import time

### 2. Load iris dataset and keep just two of the three classes.

In [None]:
iris = datasets.load_iris()
X = iris.data
y = iris.target

X1 = X[y==0]
X2 = X[y==1]
y1 = y[y==0]
y2 = y[y==1]

X = np.concatenate((X1,X2))
y = np.concatenate((y1,y2))

Graph of the dataset

In [None]:
fig = pyplot.figure()
ax = Axes3D(fig)
ax.scatter(X[:,0], X[:,1], X[:,2], c = y,cmap = "prism")
pyplot.show()

### 3. Obtaining the dominating dataset with $\varepsilon \le 0.5$

In [None]:
start = time.time()
l1 = a.dominatingSet(X1,y1,epsilon = 0.5)
l2 = a.dominatingSet(X2,y2,epsilon = 0.5)
Xsub = np.concatenate((X1[l1],X2[l2]))
ysub = np.concatenate((y1[l1],y2[l2]))
end = time.time()
print(end - start)

Graph of the dataset

In [None]:
fig = pyplot.figure()
ax = Axes3D(fig)
ax.scatter(Xsub[:,0], Xsub[:,1], Xsub[:,2], c = ysub, cmap = "prism")
pyplot.show()

### 4. Generation of a random dataset with the dominating dataset length.

In [None]:
random_index = random.sample(range(len(X)),len(Xsub))
Xrand = X[random_index]
yrand = y[random_index]

Graph of the dataset

In [None]:
fig = pyplot.figure()
ax = Axes3D(fig)
ax.scatter(Xrand[:,0], Xrand[:,1], Xrand[:,2], c = yrand,cmap = "prism")
pyplot.show()

### 5. Perceptron training.

Several parameters can be tuned such as the number of iteration, the type of training...

In [None]:
p1 = p.Perceptron(Xor = X,yor = y)
p2 = p.Perceptron(Xor = X,yor = y)
p3 = p.Perceptron(Xor = X,yor = y)

v = np.random.random_sample((5,))
p1.weight = v#[0.8]*5
p2.weight = v#[0.8]*5
p3.weight = v#[0.8]*5

it = 100
st = False
p1.train(X,y,stochastic = st, iterations = it)
p2.train(Xsub,ysub,stochastic = st, iterations = it)
p3.train(Xrand,yrand,stochastic = st, iterations = it)

#### Accuracy plots along the training process

In [None]:
k = it
# Plots of accuracy over themselves:

orig = plt.plot(np.array(p1.history)[0:k], label = "Original")
repRand = plt.plot(np.array(p3.history)[0:k], label = "Random Dataset")
repSub = plt.plot(np.array(p2.history)[0:k], label = "Dominating Dataset")
plt.legend()
plt.show

In [None]:
orig = plt.plot(np.array(p1.history_or)[0:k], label = "Original")
repRand = plt.plot(np.array(p3.history_or)[0:k], label = "Random Dataset")
repSub = plt.plot(np.array(p2.history_or)[0:k], label = "Dominating Dataset")
plt.legend()
plt.show

#### 100 iterations of the training (Might take some time)

In [None]:
it = 200
l1 = []
l2 = []
l3 = []
for i in range(100):
    random_index = random.sample(range(len(X)),len(Xsub))
    Xrand = X[random_index]
    yrand = y[random_index]
    p1 = p.Perceptron(Xor = X,yor = y)
    p2 = p.Perceptron(Xor = X,yor = y)
    p3 = p.Perceptron(Xor = X,yor = y)
    v = np.random.random_sample((5,))
    p1.weight = v#[0.8]*5
    p2.weight = v#[0.8]*5
    p3.weight = v#[0.8]*5
    p1.train(X,y,stochastic = st, iterations = it)
    p2.train(Xsub,ysub,stochastic = st, iterations = it)
    p3.train(Xrand,yrand,stochastic = st, iterations = it)
    l1.append(p.output_over_dataset(X,p1))
    l2.append(p.output_over_dataset(Xsub,p2))
    l3.append(p.output_over_dataset(Xrand,p3))

#### List of the different errors through the 100 iterations

In [None]:
e1 = []
for i in range(100):
    e1.append(np.sum((l1[i][:,4]-y)**2)/len(X))
e2 = []
for i in range(100):
    e2.append(np.sum((l2[i][:,4]-ysub)**2)/len(Xsub))
e3 = []
for i in range(100):
    e3.append(np.sum((l3[i][:,4]-yrand)**2)/len(Xrand))
e1 = np.array(e1)
e2 = np.array(e2)
e3 = np.array(e3)

In [None]:
print("Interval of the error values for the original dataset: [",np.min(e1),",",np.max(e1),"]")
print("Interval of the error values for the dominating dataset: [",np.min(e2),",",np.max(e2),"]")
print("Interval of the error values for the random dataset: [",np.min(e3),",",np.max(e3),"]")

### 6. Persistent homology

#### Persistence diagrams

In [None]:
from ripser import ripser, plot_dgms
diagrams_or = ripser(X,maxdim=2)['dgms']
diagrams_Sub = ripser(Xsub,maxdim=2)['dgms']
diagrams_Rand = ripser(Xrand,maxdim=2)['dgms']

Plot of the persistence diagrams

In [None]:
print("Original dataset")
plot_dgms(diagrams_or, show=True)
print("Dominating dataset")
plot_dgms(diagrams_Sub, show=True)
print("Random dataset")
plot_dgms(diagrams_Rand, show=True)

#### Bottleneck distance

In [None]:
import gudhi as g
message = "Bottleneck distance for dominating dataset and dimension 0 =" + '%.2f' % g.bottleneck_distance(diagrams_or[0], diagrams_Sub[0])
print(message)
message = "Bottleneck distance for dominating dataset and dimension 1 =" + '%.2f' % g.bottleneck_distance(diagrams_or[1], diagrams_Sub[1])
print(message)
message = "Bottleneck distance for Dominating dataset and dimension 2 =" + '%.2f' % g.bottleneck_distance(diagrams_or[2], diagrams_Sub[2])
print(message)
message = "Bottleneck distance for Random dataset and dimension 0 =" + '%.2f' % g.bottleneck_distance(diagrams_or[0], diagrams_Rand[0])
print(message)
message = "Bottleneck distance for Random dataset and dimension 1 =" + '%.2f' % g.bottleneck_distance(diagrams_or[1], diagrams_Rand[1])
print(message)
message = "Bottleneck distance for Random dataset and dimension 2 =" + '%.2f' % g.bottleneck_distance(diagrams_or[2], diagrams_Rand[2])
print(message)

#### Hausdorff distance

In [None]:
from scipy.spatial.distance import directed_hausdorff
print("Hausdorff distance between the original dataset and the dominating dataset: ",max(directed_hausdorff(X, Xsub)[0], directed_hausdorff(Xsub, X)[0]))
print("Hausdorff distance between the original dataset and the random dataset: ",max(directed_hausdorff(X, Xrand)[0], directed_hausdorff(Xrand, X)[0]))