### Load the training data from AWS s3

In [1]:
import pandas as pd
import boto3

bucket = "cmpe258-cifar-100"
file_name_x = "CIFAR_train_images.csv"
file_name_y = "CIFAR_train_labels.csv"

# 's3' is a key word. create connection to S3 using default config and all buckets within S3
s3 = boto3.client('s3') 

# Get object and file (key) from bucket
obj_x = s3.get_object(Bucket= bucket, Key= file_name_x) 
obj_y = s3.get_object(Bucket= bucket, Key= file_name_y) 


x_df = pd.read_csv(obj_x['Body']) # 'Body' is a key word
y_df = pd.read_csv(obj_y['Body'])

In [2]:
# Transfer label to one-hot format
y_df = pd.get_dummies(y_df.iloc[:, 0])

In [3]:
X_train = x_df.values
Y_train = y_df.values

# reshape the flatten array to (50000,32,32,3)
X_train = X_train.reshape((-1,3,32,32)).transpose([0,2,3,1])

X_train = X_train.astype('float32')

# Normalize images value from [0, 255] to [0, 1].
X_train = X_train / 255.

In [4]:
# Split the data to train & validation
from sklearn.model_selection import train_test_split
x_train, x_val, y_train, y_val = train_test_split(X_train, Y_train, test_size=0.2, shuffle = True)

### Build the CNN model

In [5]:
from __future__ import print_function
import keras
#from keras.datasets import cifar100
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D

import os
import pickle
import h5py

Using TensorFlow backend.





In [8]:
num_classes = 100
#save_dir = os.path.join(os.getcwd(), 'saved_models')
#model_name = 'cifar100.h5'

In [11]:
# Define loss function
from keras import backend as K
def root_mean_squared_error(y_true, y_pred):
        return K.sqrt(K.mean(K.square(y_pred - y_true))) 

In [12]:
# Create Model
model = Sequential()

model.add(Conv2D(128, (3, 3), padding='same',
                 input_shape=x_train.shape[1:]))
model.add(Activation('elu'))
model.add(Conv2D(128, (3, 3)))
model.add(Activation('elu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
#model.add(Dropout(0.25))

model.add(Conv2D(256, (3, 3), padding='same'))
model.add(Activation('elu'))
model.add(Conv2D(256, (3, 3)))
model.add(Activation('elu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(512, (3, 3), padding='same'))
model.add(Activation('elu'))
model.add(Conv2D(512, (3, 3)))
model.add(Activation('elu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))


model.add(Flatten())
model.add(Dense(1024))
model.add(Activation('elu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes))
model.add(Activation('softmax'))

# initiate RMSprop optimizer
opt = keras.optimizers.rmsprop(lr=0.0001, decay=1e-6)

# Let's train the model using RMSprop
model.compile(loss=root_mean_squared_error,
              optimizer=opt,
              metrics=['accuracy'])

### Train the model

In [13]:
# Run model and store test validated accuracy after each epoch.
epochs = 200
data_augmentation = True
num_predictions = 20
batch_size = 64
validations = []

for i in range(epochs):
    print('total epoch: ', i+1)
    if not data_augmentation:
        print('Not using data augmentation.')
        model.fit(x_train, y_train,
                  batch_size=batch_size,
                  epochs=epochs,
                  validation_data=(x_val, y_val),
                  shuffle=True)
    else:
        print('Using real-time data augmentation.')
        # This will do preprocessing and realtime data augmentation:
        datagen = ImageDataGenerator(
            featurewise_center=False,  # set input mean to 0 over the dataset
            samplewise_center=False,  # set each sample mean to 0
            featurewise_std_normalization=False,  # divide inputs by std of the dataset
            samplewise_std_normalization=False,  # divide each input by its std
            zca_whitening=False,  # apply ZCA whitening
            rotation_range=0,  # randomly rotate images in the range (degrees, 0 to 180)
            width_shift_range=0.1,  # randomly shift images horizontally (fraction of total width)
            height_shift_range=0.1,  # randomly shift images vertically (fraction of total height)
            horizontal_flip=True,  # randomly flip images
            vertical_flip=False)  # randomly flip images

        # Compute quantities required for feature-wise normalization
        # (std, mean, and principal components if ZCA whitening is applied).
        datagen.fit(x_train)

        # Fit the model on the batches generated by datagen.flow().
        model.fit_generator(datagen.flow(x_train, y_train,
                                         batch_size=batch_size),
                            steps_per_epoch=x_train.shape[0] // batch_size,
                            epochs=1)#,
                            #validation_data=(x_val, y_val))
        validations.append(model.evaluate_generator(datagen.flow(x_val, y_val,
                                          batch_size=batch_size),
                                          steps=x_val.shape[0] // batch_size))

total epoch:  1
Using real-time data augmentation.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where



Epoch 1/1





total epoch:  2
Using real-time data augmentation.
Epoch 1/1
total epoch:  3
Using real-time data augmentation.
Epoch 1/1
total epoch:  4
Using real-time data augmentation.
Epoch 1/1
total epoch:  5
Using real-time data augmentation.
Epoch 1/1
total epoch:  6
Using real-time data augmentation.
Epoch 1/1
total epoch:  7
Using real-time data augmentation.
Epoch 1/1
total epoch:  8
Using real-time data augmentation.
Epoch 1/1
total epoch:  9
Using real-time data augmentation.
Epoch 1/1
total epoch:  10
Using real-time data augmentation.
Epoch 1/1
total epoch:  11
Using real-time data augmentation.
Epoch 1/1
total epoch:  12
Using real-time data augmentation.
Epoch 1/1
total epoch:  13
Using real-time data augmentation.
Epoch 1/1
total epoch:  14
Using real-time data augmentation.
Epoch 1/1
total epoch:  15
Using real-time data a

total epoch:  42
Using real-time data augmentation.
Epoch 1/1
total epoch:  43
Using real-time data augmentation.
Epoch 1/1
total epoch:  44
Using real-time data augmentation.
Epoch 1/1
total epoch:  45
Using real-time data augmentation.
Epoch 1/1
total epoch:  46
Using real-time data augmentation.
Epoch 1/1
total epoch:  47
Using real-time data augmentation.
Epoch 1/1
total epoch:  48
Using real-time data augmentation.
Epoch 1/1
total epoch:  49
Using real-time data augmentation.
Epoch 1/1
total epoch:  50
Using real-time data augmentation.
Epoch 1/1
total epoch:  51
Using real-time data augmentation.
Epoch 1/1
total epoch:  52
Using real-time data augmentation.
Epoch 1/1
total epoch:  53
Using real-time data augmentation.
Epoch 1/1
total epoch:  54
Using real-time data augmentation.
Epoch 1/1
total epoch:  55
Using real-time data augmentation.
Epoch 1/1
total epoch:  56
Using real-time data augmentation.
Epoch 1/1
total epoch:  57
Using real-time data augmentation.
Epoch 1/1
total ep

total epoch:  97
Using real-time data augmentation.
Epoch 1/1
total epoch:  98
Using real-time data augmentation.
Epoch 1/1
total epoch:  99
Using real-time data augmentation.
Epoch 1/1
total epoch:  100
Using real-time data augmentation.
Epoch 1/1
total epoch:  101
Using real-time data augmentation.
Epoch 1/1
total epoch:  102
Using real-time data augmentation.
Epoch 1/1
total epoch:  103
Using real-time data augmentation.
Epoch 1/1
total epoch:  104
Using real-time data augmentation.
Epoch 1/1
total epoch:  105
Using real-time data augmentation.
Epoch 1/1
total epoch:  106
Using real-time data augmentation.
Epoch 1/1
total epoch:  107
Using real-time data augmentation.
Epoch 1/1
total epoch:  108
Using real-time data augmentation.
Epoch 1/1
total epoch:  109
Using real-time data augmentation.
Epoch 1/1
total epoch:  110
Using real-time data augmentation.
Epoch 1/1
total epoch:  111
Using real-time data augmentation.
Epoch 1/1
total epoch:  112
Using real-time data augmentation.
Epoch

total epoch:  152
Using real-time data augmentation.
Epoch 1/1
total epoch:  153
Using real-time data augmentation.
Epoch 1/1
total epoch:  154
Using real-time data augmentation.
Epoch 1/1
total epoch:  155
Using real-time data augmentation.
Epoch 1/1
total epoch:  156
Using real-time data augmentation.
Epoch 1/1
total epoch:  157
Using real-time data augmentation.
Epoch 1/1
total epoch:  158
Using real-time data augmentation.
Epoch 1/1
total epoch:  159
Using real-time data augmentation.
Epoch 1/1
total epoch:  160
Using real-time data augmentation.
Epoch 1/1
total epoch:  161
Using real-time data augmentation.
Epoch 1/1
total epoch:  162
Using real-time data augmentation.
Epoch 1/1
total epoch:  163
Using real-time data augmentation.
Epoch 1/1
total epoch:  164
Using real-time data augmentation.
Epoch 1/1
total epoch:  165
Using real-time data augmentation.
Epoch 1/1
total epoch:  166
Using real-time data augmentation.
Epoch 1/1
total epoch:  167
Using real-time data augmentation.
Ep

In [14]:
# Save the validations result
pickle.dump(validations, open("loss_validation_v2.p",'wb'))

# Save the model
filename = 'cifar100_cnn_v2.sav'
pickle.dump(model, open(filename, 'wb'))

### Predict test data

In [11]:
# Load the test data
file_name_x_test = "CIFAR_test_images.csv"
obj_x_test = s3.get_object(Bucket= bucket, Key= file_name_x_test) 
x_test_df = pd.read_csv(obj_x_test['Body'], index_col = 'index')

x_test = x_test_df.values
x_test = x_test.reshape((-1,3,32,32)).transpose([0,2,3,1])
x_test = x_test.astype('float32')
x_test = x_test / 255.

In [13]:
# Output the labels
import numpy as np

y_test = model.predict(x_test)
y_pred = np.argmax(y_test, axis=1)

predictPdsDF = pd.DataFrame({'answer': y_pred})

predictPdsDF.to_csv("predict.csv", index = True, index_label = 'index')