# Convolutional Neural Networks II

In [None]:
!pip install utils

## Model 1: Image Classification with the CIFAR-10 Dataset

In [None]:
from __future__ import print_function
import keras
from keras.datasets import cifar10
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D
import os

In [None]:
batch_size = 32
num_classes = 10
epochs = 100
data_augmentation = False
save_dir = os.path.join(os.getcwd(), 'saved_models')
model_name = 'keras_cifar10_trained_model.h5'

In [None]:
# The data, split between train and test sets:
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

In [None]:
# Convert X data from ints to floats; scale to be between 0 and 1.

In [None]:
# Convert class vectors to binary class matrices.

In [None]:
model = Sequential()                              # Instantiate model.
         # Specify (number of filters, filter size, padding size, input shape)

                     # ReLU ctivation function to be used.
                     # Specify tuple again. (padding default is 'valid' a.k.a. 0 padding)
                     # ReLU ctivation function to be used.
         # Pooling with filter size 2x2. (defaults to non-overlapping regions)
                          # Dropout 25% of nodes randomly at each epoch.

In [None]:
model.add(Conv2D(64, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

In [None]:
model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes))
model.add(Activation('softmax'))

In [None]:
# initiate RMSprop optimizer
opt = keras.optimizers.rmsprop(lr=0.0001, decay=1e-6)  # RMSProp instead of gradient descent for optimization.

In [None]:
# Let's train the model using RMSprop

In [None]:
if not data_augmentation:
    print('Not using data augmentation.')
    model.fit(x_train, y_train,
              batch_size=batch_size,
              epochs=epochs,
              validation_data=(x_test, y_test),
              shuffle=True)
else:
    print('Using real-time data augmentation.')
    # This will do preprocessing and realtime data augmentation:
    datagen = ImageDataGenerator(
        featurewise_center=False,  # set input mean to 0 over the dataset
        samplewise_center=False,  # set each sample mean to 0
        featurewise_std_normalization=False,  # divide inputs by std of the dataset
        samplewise_std_normalization=False,  # divide each input by its std
        zca_whitening=False,  # apply ZCA whitening
        rotation_range=0,  # randomly rotate images in the range (degrees, 0 to 180)
        width_shift_range=0.1,  # randomly shift images horizontally (fraction of total width)
        height_shift_range=0.1,  # randomly shift images vertically (fraction of total height)
        horizontal_flip=True,  # randomly flip images
        vertical_flip=False)  # randomly flip images

    # Compute quantities required for feature-wise normalization
    # (std, mean, and principal components if ZCA whitening is applied).
    datagen.fit(x_train)

    # Fit the model on the batches generated by datagen.flow().
    model.fit_generator(datagen.flow(x_train, y_train,
                                     batch_size=batch_size),
                        epochs=epochs,
                        validation_data=(x_test, y_test),
                        workers=4)

In [None]:
# Save model and weights
if not os.path.isdir(save_dir):
    os.makedirs(save_dir)
model_path = os.path.join(save_dir, model_name)
model.save(model_path)
print('Saved trained model at %s ' % model_path)

In [None]:
# Score trained model.


In [None]:
# Check out summary of model.

## Model 2: Forecasting the Stock Market using the Stock Market

In [None]:
from utils import *

import numpy as np
import pandas as pd
import matplotlib.pylab as plt

from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution1D, MaxPooling1D
from keras import regularizers
from keras.layers.normalization import BatchNormalization
from keras.layers.advanced_activations import *
from keras.optimizers import Adam
from keras.initializers import *

from sklearn.model_selection import train_test_split

import seaborn as sns
sns.despine()

In [None]:
data_original = pd.read_csv('../AAPL1216.csv')[::-1]

In [None]:
## Preprocessing

openp = data_original.loc[:, 'Open'].tolist()
highp = data_original.loc[:, 'High'].tolist()
lowp = data_original.loc[:, 'Low'].tolist()
closep = data_original.loc[:, 'Adj Close'].tolist()
volumep = data_original.loc[:, 'Volume'].tolist()
data_chng = data_original.loc[:, 'Adj Close'].pct_change().dropna().tolist()

In [None]:
WINDOW = 30
EMB_SIZE = 6  # "Embedding size" a.k.a. input size
STEP = 1
FORECAST = 1

X, Y = [], []

In [None]:
for i in range(0, len(data_original), STEP): 
    try:
        o = openp[i:i+WINDOW]
        h = highp[i:i+WINDOW]
        l = lowp[i:i+WINDOW]
        c = closep[i:i+WINDOW]
        v = volumep[i:i+WINDOW]
        d = data_chng[i:i+WINDOW]

        o = (np.array(o) - np.mean(o)) / np.std(o)
        h = (np.array(h) - np.mean(h)) / np.std(h)
        l = (np.array(l) - np.mean(l)) / np.std(l)
        c = (np.array(c) - np.mean(c)) / np.std(c)
        v = (np.array(v) - np.mean(v)) / np.std(v)
        d = (np.array(d) - np.mean(d)) / np.std(d)

        x_i = closep[i:i+WINDOW]
        y_i = closep[i+WINDOW+FORECAST]  

        last_close = x_i[-1]
        next_close = y_i

        if last_close < next_close:
            y_i = [1, 0]
        else:
            y_i = [0, 1] 

        x_i = np.column_stack((o, h, l, c, v, d))

    except Exception as e:
        break

    X.append(x_i)
    Y.append(y_i)

In [None]:
X, Y = np.array(X), np.array(Y)
X_train, X_test, Y_train, Y_test = train_test_split(X, 
                                                    Y, 
                                                    test_size = 0.25, 
                                                    random_state = 42)

X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], EMB_SIZE))
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], EMB_SIZE))

In [None]:
model = Sequential()
model.add(Convolution1D(input_shape = (WINDOW, EMB_SIZE),
                        filters = 16,
                        kernel_size = 4,
                        padding = 'same'))
model.add(BatchNormalization()) ## speeds up computation by normalizing
                                ## outputs from previous layer
model.add(LeakyReLU())
model.add(Dropout(0.5))

In [None]:
model.add(Convolution1D(filters = 8,
                        kernel_size = 4,
                        padding = 'same'))
model.add(BatchNormalization())
model.add(LeakyReLU())
model.add(Dropout(0.5))

In [None]:
model.add(Flatten())

model.add(Dense(64))
model.add(BatchNormalization())
model.add(LeakyReLU())

In [None]:
model.add(Dense(2))
model.add(Activation('softmax'))

In [None]:
model.compile(optimizer = 'adam', 
              loss='categorical_crossentropy',
              metrics=['accuracy'])

history = model.fit(X_train, Y_train, 
          epochs = 100, 
          batch_size = 128, 
          verbose=1, 
          validation_data=(X_test, Y_test),
          shuffle=True)

In [None]:
# Make predictions.


In [None]:
from sklearn.metrics import confusion_matrix
C = confusion_matrix([np.argmax(y) for y in Y_test], [np.argmax(y) for y in pred])

print(["TN", "FP"])
print(["FN", "TP"])
print()
print(C / C.astype(np.float).sum())

In [None]:
plt.figure()
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='best') ## I love this loc = 'best' command.
plt.show();

In [None]:
plt.figure()
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='best')
plt.show();

<details> <summary> How might we improve the performance of this model?
</summary>
```
- Our test error looks as though it's leveled off, but it's possible we're in a local minimum. If so, our model would be suffering from some error due to bias. We can account for that by:
    - increasing the number of epochs.
    - building a deeper network.
    - regularizing less than we are now (perhaps dropout is inflating our bias, for example).
    - adding more features!
    - If we try these things and our model continually performs worse, then we have evidence that our model is at risk of overfitting, so we wouldn't want to make our model more complex.
- We might also return to "standard" classification techniques:
    - consider changing our threshold for predicting "yes."
    - we might want to optimize for specificity or sensitivity.
    - luckily our classes look balanced, so no adjustment is needed there.
```
</details>

<details> <summary> What changes would I make to turn this into a regression problem?
</summary>
```
- Instead of predicting Y in {higher, lower}, we want to predict the change in the price, which can *theoretically* be anywhere between negative infinity and positive infinity.
    - Change the output layer to only have one node.
    - Change the final activation function to be linear.
- Note: It'll be easier for us to predict higher versus lower than to predict the exact amount of change in a stock price.
```
</details>