# Step III : Spirale challenge

In this example, the set of coordinates are now separated using two spirals. Build your own network to properly separate the two sets of data.


Definition of a training data where the two classes are defined by two opposite spirals. The size of the 
training data set is 500. On top of it a Validation data set and a Testing data set are also defined, both of them
being of size 200.

In [None]:
import random
import numpy as np
import math
import matplotlib.pyplot as plt
    
def Training_set_spirale(a, b, Ntrain, Nval, Ntest, p_error):
    
    n = 0
    N = Ntrain+Nval+Ntest
    Data = np.zeros([N,2])
    Labels = np.zeros([N,1])
      
    for n in range(0,N):

        # define the data points. The label depends on the value of the random 
        # variable l and will define to which arm the point will be allocated.
        Theta = random.uniform(0, 10*math.pi)
        r = a*Theta+b;
        label = random.uniform(0, 1)

        Data[n,0] = (r+random.uniform(-.5, .5))*math.cos(r)
        Data[n,1] = (r+random.uniform(-.5, .5))*math.sin(r)
        
        if label < 0.5 :
            Labels[n,0] = 0
            Data[n,0] = (r+random.uniform(-.5, .5))*math.cos(r)
            Data[n,1] = (r+random.uniform(-.5, .5))*math.sin(r)
        else : 
            Labels[n,0] = 1
            Data[n,0] = -(r+random.uniform(-.5, .5))*math.cos(r)
            Data[n,1] = -(r+random.uniform(-.5, .5))*math.sin(r)

        # according to the value of the random variable e, the label will be 
        # inverted in order to introduce random errors in the data set
        error = random.uniform(0, 1)
        if error > 1 - p_error:
          Labels[n,0] = 1 - Labels[n,0]

    Training_data = Data[:Ntrain,]
    Training_label = Labels[:Ntrain,]
    Validation_data = Data[Ntrain+1:Ntrain+Nval,]
    Validation_label = Labels[Ntrain+1:Ntrain+Nval,]
    Testing_data = Data[Ntrain+Nval+1:N,]
    Testing_label = Labels[Ntrain+Nval+1:N,]
        
    return Training_data, Testing_data, Validation_data, Training_label, Validation_label, Testing_label

def Training_set_clusters(N):
    
    n = 0
    Data = np.zeros([N+400,2])
    Labels = np.zeros([N+400,1])   

The training/validation/testing sets are defined below.

In [None]:
a = 0.25;
b = 0;
error_probability = 0.1
Ntrain = 200
Nval = 100
Ntest = 1000
    
Training_data, Testing_data, Validation_data, Training_label, Validation_label, Testing_label = Training_set_spirale(a, b, Ntrain, Nval, Ntest, error_probability) 

The training set is then plot using two different colors
to distinguish the two classes

In [None]:
# normalize the data
# ------------------
E = np.mean(Training_data)
Std = np.std(Training_data)

Training_data = (Training_data-E)/Std
Testing_data = (Testing_data-E)/Std
Validation_data = (Validation_data-E)/Std

# sort the data according to their class
# --------------------------------------

Idx_class_0 = Training_label==0
Idx_class_1 = Training_label==1

X0 = Training_data[Idx_class_0[:,0],0]
Y0 = Training_data[Idx_class_0[:,0],1]
X1 = Training_data[Idx_class_1[:,0],0]
Y1 = Training_data[Idx_class_1[:,0],1]
         
plt.rcParams['figure.figsize'] = (6,7) # Make the figures a bit bigger
plt.plot(X0, Y0, 'r.', ms=10)
plt.plot(X1, Y1, 'b.', ms=10)
plt.xlabel('X train', fontsize=15)
plt.ylabel('Y train', fontsize=15)
plt.show()

Define the architecture of the model. You will have to decide how many layers and neurons are necessary to solve this problem. Since we are working with two classes, the activation function is "sigmoid" and the loss function "binary cross-entropy". In the end the distinction between the two classes will be made on the base of whether the output will be below or above 0,5.

In [None]:
model = ...

Training of the model. The number of Epoch and the minibatch size are defined below. The results at each iteraction
are saved in order to compare the accuracy calculated for the training set and for the validation set. These data are
saved in the variable history.


In [None]:
history = ...

The accuracy of the model is tested using the testing set of data.

In [None]:
Results = model.predict(Testing_data)

X0 = []
X1 = []
Y0 = []
Y1 = []
for n in range(len(Results)):

    if Results[n]<=0.5:
        X0.append(Testing_data[n,0])
        Y0.append(Testing_data[n,1])
    else:
        X1.append(Testing_data[n,0])
        Y1.append(Testing_data[n,1]) 
        
plt.rcParams['figure.figsize'] = (6,7) # Make the figures a bit bigger
plt.plot(X0, Y0, 'r.', ms=10)
plt.plot(X1, Y1, 'b.', ms=10)
plt.xlabel('X test', fontsize=15)
plt.ylabel('Y test', fontsize=15)
plt.show()

In the same way, using the model.evaluate function you can test the accuracy of the model when working on the testing
set. The second number returns the average accuracy.

In [None]:
Predication_accuracy = model.evaluate(Testing_data, Testing_label)
print(Predication_accuracy)

Below the accuracy for the training and validation sets are plotted

In [None]:
history_dict = history.history
print( history_dict.keys() )

acc_values = history_dict['accuracy']
val_acc_values = history_dict['val_accuracy']

n = len(acc_values)
epochs = range(1, n+1)

plt.rcParams['figure.figsize'] = (6,6)
plt.plot(epochs, acc_values, 'bo', label='Training acc')
plt.plot(epochs, val_acc_values, 'b', label='Validation acc')
plt.title('Training and validation accuracy')
plt.xlabel('Epochs', fontsize=15)
plt.ylabel('Accuracy', fontsize=15)
plt.legend()
plt.show()

Below the validation loss for the training and validation sets are plotted

In [None]:
loss_values = history_dict['loss']
val_loss_values = history_dict['val_loss']

plt.rcParams['figure.figsize'] = (6,6)
plt.plot(epochs, loss_values, 'bo', label='Training loss')
plt.plot(epochs, val_loss_values, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.xlabel('Epochs', fontsize=15)
plt.ylabel('Loss', fontsize=15)
plt.legend()
plt.show()