In [None]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import keras
from sklearn.model_selection import train_test_split
#generate random data -- not linearly separable 
np.random.seed(0)
N = 1000 # number of points per class
D = 2 # dimensionality
num_classes = 4 # number of classes
X = np.zeros((N*num_classes,D))
num_train_examples = X.shape[0]
y = np.zeros(N*num_classes, dtype='uint8')
for j in range(num_classes):
    ix = range(N*j,N*(j+1))
    r = np.linspace(0.0,1,N) # radius
    t = np.linspace(j*4,(j+1)*4,N) + np.random.randn(N)*0.2 # theta
    X[ix] = np.c_[r*np.sin(t), r*np.cos(t)]
    y[ix] = j
    
fig = plt.figure()
plt.scatter(X[:, 0], X[:, 1], c=y, s=40, cmap=plt.cm.Spectral)
plt.title('Spiral Three Classes Problem')
plt.show()


X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.33, random_state=42)

y_train = keras.utils.to_categorical(y_train,num_classes)
y_val = keras.utils.to_categorical(y_val,num_classes)

In [None]:
from keras.layers import Input, Dense
from keras.models import Model

In [None]:
#The current model seems to stop without success.
#Can you improve the performance keeping same number of neurons and layers? 

#Try different better set of parameters in the SGD optimizer:
#a. OPTIMIZER=keras.optimizers.SGD(learning_rate=.75,decay=.1)
#b. OPTIMIZER=keras.optimizers.SGD(learning_rate=.0001)
#c. OPTIMIZER=keras.optimizers.SGD(learning_rate=1)
#d. OPTIMIZER=keras.optimizers.SGD(learning_rate=.01)
#e. OPTIMIZER=keras.optimizers.SGD(learning_rate=.01,momentum=.9, nesterov=False)
#f. OPTIMIZER=keras.optimizers.SGD(learning_rate=.01,momentum=.9, nesterov=True)
#Try different optimizers:
#d. OPTIMIZER=keras.optimizers.Adam(learning_rate=.01)
#e. OPTIMIZER=keras.optimizers.Nadam(learning_rate=.01)
#Are activation function important?:
#f.  Use Dense(5) instead of Dense(5,activation='relu')

OPTIMIZER=keras.optimizers.SGD(learning_rate=.75)
input_vec=Input(shape=(D,))
x=Dense(5,activation='relu')(input_vec) 
x2=Dense(5,activation='relu')(x) 
x_out=Dense(num_classes, activation='softmax')(x2)
model=Model(inputs=input_vec,outputs=x_out)
model.summary()
model.compile(optimizer=OPTIMIZER,loss=keras.losses.CategoricalCrossentropy())



In [None]:
from functools import reduce
from keras.callbacks import LambdaCallback
parameters_value=[]
save_weights = LambdaCallback(on_epoch_end=lambda 
                               batch, logs: parameters_value.append(reduce(lambda x, y: np.concatenate([np.ndarray.flatten(x),np.ndarray.flatten(y)],axis=0), model.get_weights())))


In [None]:
history=model.fit(X_train,y_train,epochs=100,batch_size=24,callbacks=[save_weights],validation_data=[X_val,y_val])

In [None]:
plt.plot(history.history['loss'],label='training_loss')
plt.plot(history.history['val_loss'],label='validation_loss')
plt.legend()
plt.xlabel('Number of Epochs')
plt.xlabel('Loss function on training data')

In [None]:
plt.plot(parameters_value)
plt.xlabel('Number of Epoch')
plt.ylabel('Paramater Value')
print('Flat zones in the parameters are associated with non-learning problems')
print('Chaotics behavior in the parameters are associated with large learning rate values')

In [None]:
parameters_value=np.array(parameters_value)

In [None]:
plt.plot(np.mean(parameters_value,axis=0),np.std(parameters_value,axis=0),'.')
plt.xlabel('Mean Value of Parameters')
plt.ylabel('Std Value of Parameters')
plt.title('All Epochs')
plt.xlim(-2,2)
plt.ylim(0,.5)

In [None]:
h = 0.02
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                     np.arange(y_min, y_max, h))
X_ = np.c_[xx.ravel(), yy.ravel()]

Z = model.predict(X_)
Z = np.argmax(Z, axis=1)
Z = Z.reshape(xx.shape)
fig = plt.figure()
plt.contourf(xx, yy, Z, cmap=plt.cm.Spectral, alpha=0.7)
plt.scatter(X[:, 0], X[:, 1], c=y, s=40, cmap=plt.cm.Spectral)
plt.xlim(xx.min(), xx.max())
plt.ylim(yy.min(), yy.max())
plt.title('Classification Map')