# Keras tutorial

## About loss

## Import packages

In [None]:
import numpy as np

## Read file (.csv)

In [None]:
''' Read input files '''
my_data = np.genfromtxt('pkgo_city66_class5_v1.csv', delimiter=',',skip_header=1)
my_data

In [None]:
''' The first column to the 199th column is used as input features '''
X_train = my_data[:,0:200]
X_train = X_train.astype('float32')

''' The 200-th column is the answer '''
y_train = my_data[:,200]
y_train = y_train.astype('int')

## The target looks like that ...

In [None]:
y_train

##  Convert to one-hot encoding

In [None]:
from keras.utils import np_utils
Y_train = np_utils.to_categorical(y_train,5)

##  Shuffle training data

In [None]:
from sklearn.utils import shuffle
X_train,Y_train = shuffle(X_train,Y_train,random_state=100)

## Import keras to build a DL model

In [None]:
from keras.models import Sequential
from keras.layers.core import Dense, Activation

# Loss function
## Building a model with categorical_crossentropy

In [None]:
model_ce = Sequential()
model_ce.add(Dense(128, input_dim=200))
model_ce.add(Activation('sigmoid'))
model_ce.add(Dense(256))
model_ce.add(Activation('sigmoid'))
model_ce.add(Dense(5))
model_ce.add(Activation('softmax'))

## Building a model with mean_squared_error

In [None]:
model_mse = Sequential()
model_mse.add(Dense(128, input_dim=200))
model_mse.add(Activation('sigmoid'))
model_mse.add(Dense(256))
model_mse.add(Activation('sigmoid'))
model_mse.add(Dense(5))
model_mse.add(Activation('softmax'))

## Set up the optimizer

In [None]:
from keras.optimizers import SGD, Adam, RMSprop, Adagrad
sgd = SGD(lr=0.01,momentum=0.0,decay=0.0,nesterov=False)

## Compile model with specified loss and optimizer

### loss with crossentropy

In [None]:
model_ce.compile(loss='categorical_crossentropy',
				optimizer=sgd,
				metrics=['accuracy'])

### loss with mean_squared_error

In [None]:
model_mse.compile(loss= 'mean_squared_error',
              optimizer=sgd,
              metrics=['accuracy'])

## set the size of mini-batch and number of epochs

In [None]:
batch_size = 16
epochs = 30

## Fit models and use validation_split=0.1

In [None]:
history_ce = model_ce.fit(X_train, Y_train,
							batch_size=batch_size,
							epochs=epochs,
							verbose=0,
							shuffle=True,
                    		validation_split=0.1)

In [None]:
history_mse = model_mse.fit(X_train, Y_train,
							batch_size=batch_size,
							epochs=epochs,
							verbose=0,
							shuffle=True,
                    		validation_split=0.1)

## Access the loss and accuracy in every epoch

In [None]:
loss_ce	= history_ce.history.get('loss')
acc_ce 	= history_ce.history.get('acc')
loss_mse= history_mse.history.get('loss')
acc_mse = history_mse.history.get('acc')

## Visualization

In [None]:
import matplotlib.pyplot as plt
plt.figure(0,figsize=(8,6))
plt.subplot(121)
plt.plot(range(len(loss_ce)), loss_ce,label='CE')
plt.plot(range(len(loss_mse)), loss_mse,label='MSE')
plt.title('Loss')
plt.xlabel("epoch")
plt.legend(loc='lower right')
plt.subplot(122)
plt.plot(range(len(acc_ce)), acc_ce,label='CE')
plt.plot(range(len(acc_mse)), acc_mse,label='MSE')
plt.title('Accuracy')
plt.xlabel("epoch")
plt.legend(loc='lower right')
plt.tight_layout()
plt.savefig('01_lossFuncSelection.png',dpi=300,format='png')
plt.show()

# Learning rate (fit loss function)

In [None]:
sgd1 = SGD(lr=0.1,momentum=0.0,decay=0.0,nesterov=False)

In [None]:
sgd2 = SGD(lr=0.01,momentum=0.0,decay=0.0,nesterov=False)

In [None]:
sgd3 = SGD(lr=0.001,momentum=0.0,decay=0.0,nesterov=False)

## Compile

* **sgd ＝ 0.1**

In [None]:
model_ce.compile(loss='categorical_crossentropy',
				optimizer=sgd1,
				metrics=['accuracy'])

In [None]:
history_ce1= model_ce.fit(X_train, Y_train,
							batch_size=batch_size,
							epochs=epochs,
							verbose=0,
							shuffle=True,
                    		validation_split=0.1)

In [None]:
loss_small = history_ce1.history.get('loss')
acc_small = history_ce1.history.get('acc')

 * **sgd ＝ 0.01**

In [None]:
model_ce.compile(loss='categorical_crossentropy',
				optimizer=sgd2,
				metrics=['accuracy'])

In [None]:
history_ce2 = model_ce.fit(X_train, Y_train,
							batch_size=batch_size,
							epochs=epochs,
							verbose=0,
							shuffle=True,
                    		validation_split=0.1)

In [None]:
loss_median = history_ce2.history.get('loss')
acc_median = history_ce2.history.get('acc')

 * **sgd ＝ 0.001**

In [None]:
model_ce.compile(loss= 'categorical_crossentropy',
              optimizer=sgd3,
              metrics=['accuracy'])

In [None]:
history_ce3 = model_ce.fit(X_train, Y_train,
							batch_size=batch_size,
							epochs=epochs,
							verbose=0,
							shuffle=True,
                    		validation_split=0.1)

In [None]:
loss_large = history_ce3.history.get('loss')
acc_large = history_ce3.history.get('acc')

## Visualization

In [None]:
import matplotlib.pyplot as plt
plt.figure(0,figsize=(8,6))
plt.subplot(121)
plt.plot(range(len(loss_large)), loss_large,label='lr=0.1')
plt.plot(range(len(loss_median)), loss_median,label='lr=0.01')
plt.plot(range(len(loss_small)), loss_small,label='lr=0.001')
plt.title('Loss')
plt.xlabel("epoch")
plt.legend(loc='lower right')
plt.subplot(122)
plt.plot(range(len(acc_large)), acc_large,label='lr=0.1')
plt.plot(range(len(acc_median)), acc_median,label='lr=0.01')
plt.plot(range(len(acc_small)), acc_small,label='lr=0.001')
plt.title('Accuracy')
plt.xlabel("epoch")
plt.legend(loc='lower right')
plt.tight_layout()
plt.show()

# Activation function (fit loss function, learning rate)

## RELU

In [None]:
model_sp = Sequential()
model_sp.add(Dense(128, input_dim=200))
model_sp.add(Activation('relu'))
model_sp.add(Dense(256))
model_sp.add(Activation('relu'))
model_sp.add(Dense(5))
model_sp.add(Activation('softmax'))


In [None]:
model_sp.compile(loss= 'categorical_crossentropy',
              optimizer=sgd,
              metrics=['accuracy'])

history_sp = model_sp.fit(X_train, Y_train,
							batch_size=batch_size,
							epochs=epochs,
							verbose=0,
							shuffle=True,
                    		validation_split=0.1)

loss_sp = history_sp.history.get('loss')
acc_sp = history_sp.history.get('acc')

## Sigmoid

In [None]:
model_bm = Sequential()
model_bm.add(Dense(128, input_dim=200))
model_bm.add(Activation('sigmoid'))
model_bm.add(Dense(256))
model_bm.add(Activation('sigmoid'))
model_bm.add(Dense(5))
model_bm.add(Activation('softmax'))

In [None]:
model_bm.compile(loss='categorical_crossentropy',
				optimizer=sgd,
				metrics=['accuracy'])
history_bm = model_bm.fit(X_train, Y_train,
							batch_size=batch_size,
							epochs=epochs,
							verbose=0,
							shuffle=True,
                    		validation_split=0.1)
loss_bm	= history_bm.history.get('loss')
acc_bm 	= history_bm.history.get('acc')

## Visualization

In [None]:
import matplotlib.pyplot as plt
plt.figure(0,figsize=(8,6))
plt.subplot(121)
plt.plot(range(len(loss_sp)),loss_sp,label='relu')
plt.plot(range(len(loss_bm)),loss_bm,label='Sigmoid')
plt.title('Loss')
plt.xlabel("epoch")
plt.legend(loc='upper left')
plt.subplot(122)
plt.plot(range(len(acc_sp)),acc_sp,label='relu')
plt.plot(range(len(acc_bm)),acc_bm,label='Sigmoid')
plt.title('Accuracy')
plt.xlabel("epoch")
plt.legend(loc='upper left')
plt.tight_layout()
plt.show()

# Optimizer
Optimization algorithms helps us to **minimize** a **Loss** function. <br>
Here, we will present four different optimizers.<br>
### How to Select Optimizers
* People usually use **Adam**
    * Adaptive learning rate for every weights 
    * Momentum included
* Recommend **RMSprop** in RNN
    * **explosive gradient** may happen when training 
    * Solution : Clip gradient 
    
### Parameters common to all Keras optimizers
* clipnorm
    * EX. sgd = optimizers.SGD(lr=0.01, **clipnorm=1.**)
    * All parameter gradients will be clipped to a maximum norm of 1.
* clipvalue
    * EX. sgd = optimizers.SGD(lr=0.01, **clipvalue=0.5**)
    * All parameter gradients will be clipped to a maximum value of 0.5 and a minimum value of -0.5.


In [None]:
from keras.optimizers import SGD, RMSprop, Adagrad, Adam

## SGD
Stochastic Gradient Descent
* **lr** : float >= 0. Learning rate.
* **momentum** : float >= 0. Accelerates SGD in the relevant direction.
* **decay**: float >= 0. Learning rate decay over each update.
* **nesterov**: boolean. Whether to apply Nesterov momentum.

---
![](https://i.imgur.com/vpCTJih.png)


In [None]:
model_sp.compile(loss= 'categorical_crossentropy',
              		optimizer=SGD(lr=0.01), 
              		metrics=['accuracy'])
history_sgd = model_sp.fit(X_train, Y_train,
							batch_size=batch_size,
							epochs=epochs,
							verbose=0,
							shuffle=True,
                    		validation_split=0.1)

loss_sgd = history_sgd.history.get('loss')
acc_sgd = history_sgd.history.get('acc')

## Adagrad
Adaptive Learning Rate <br>
Modified by the **root mean square** of all **previous gradients**.

---
![](https://i.imgur.com/Vp0HPn0.png)
![](https://i.imgur.com/BoYUtM7.png)

In [None]:
model_sp.compile(loss= 'categorical_crossentropy',
              		optimizer=Adagrad(lr=0.01),
              		metrics=['accuracy'])
history_ada = model_sp.fit(X_train, Y_train,
							batch_size=batch_size,
							epochs=epochs,
							verbose=0,
							shuffle=True,
                    		validation_split=0.1)

loss_ada = history_ada.history.get('loss')
acc_ada = history_ada.history.get('acc')

## RMSprop
Similar with Adagrad, also modified by previous gradients. <br>
RMSprop is a good choice for **RNN**.

---
![](https://i.imgur.com/0Lf7JUd.png)

In [None]:
model_sp.compile(loss= 'categorical_crossentropy',
              		optimizer=RMSprop(lr=0.01),
              		metrics=['accuracy'])
history_rms = model_sp.fit(X_train, Y_train,
							batch_size=batch_size,
							epochs=epochs,
							verbose=0,
							shuffle=True,
                    		validation_split=0.1)

loss_rms = history_rms.history.get('loss')
acc_rms = history_rms.history.get('acc')

## Adam
Similar with RMSprop + Momentum

In [None]:
model_sp.compile(loss= 'categorical_crossentropy',
              		optimizer=Adam(lr=0.01),
              		metrics=['accuracy'])
history_adam = model_sp.fit(X_train, Y_train,
							batch_size=batch_size,
							epochs=epochs,
							verbose=0,
							shuffle=True,
                    		validation_split=0.1)

loss_adam= history_adam.history.get('loss')
acc_adam = history_adam.history.get('acc')

## Visualization

---
**Adagrad, Adadelta** and **RMSprop** find the correct direction in a short time. <br>
![](https://static.leiphone.com/uploads/new/article/740_740/201706/5943a067842cf.gif)

In [None]:
import matplotlib.pyplot as plt
plt.figure(0,figsize=(8,6))
plt.subplot(121)
plt.plot(range(len(loss_adam)), loss_adam,label='Adam')
plt.plot(range(len(loss_sgd)), loss_sgd,label='SGD')
plt.plot(range(len(loss_rms)), loss_rms,label='RMS')
plt.plot(range(len(loss_ada)), loss_ada,label='Ada')

plt.title('Loss')
plt.xlabel("epoch")
plt.legend(loc='upper left')
plt.subplot(122)
plt.plot(range(len(acc_adam)), acc_adam,label='Adam')
plt.plot(range(len(acc_sgd)), acc_sgd,label='SGD')
plt.plot(range(len(acc_rms)), acc_rms,label='RMS')
plt.plot(range(len(acc_ada)), acc_ada,label='Ada')
plt.legend(loc='upper left')
plt.title('Accuracy')
plt.xlabel("epoch")
plt.show()

## Callback