In [1]:
from tensorflow.keras.datasets.cifar10 import load_data
import pandas as pd
import numpy as np
import keras

test_data = pd.read_csv("https://raw.githubusercontent.com/dphi-official/Datasets/master/cifar_image_flattened_pixels.csv")

In [2]:
(x_train, y_train), (x_test, y_test) = load_data()

In [3]:
x_train.shape

(50000, 32, 32, 3)

In [4]:
x_test.shape

(10000, 32, 32, 3)

In [5]:
y_train.shape, y_train[0]

((50000, 1), array([6], dtype=uint8))

In [6]:
y_test.shape

(10000, 1)

In [7]:
x_train[0]

array([[[ 59,  62,  63],
        [ 43,  46,  45],
        [ 50,  48,  43],
        ...,
        [158, 132, 108],
        [152, 125, 102],
        [148, 124, 103]],

       [[ 16,  20,  20],
        [  0,   0,   0],
        [ 18,   8,   0],
        ...,
        [123,  88,  55],
        [119,  83,  50],
        [122,  87,  57]],

       [[ 25,  24,  21],
        [ 16,   7,   0],
        [ 49,  27,   8],
        ...,
        [118,  84,  50],
        [120,  84,  50],
        [109,  73,  42]],

       ...,

       [[208, 170,  96],
        [201, 153,  34],
        [198, 161,  26],
        ...,
        [160, 133,  70],
        [ 56,  31,   7],
        [ 53,  34,  20]],

       [[180, 139,  96],
        [173, 123,  42],
        [186, 144,  30],
        ...,
        [184, 148,  94],
        [ 97,  62,  34],
        [ 83,  53,  34]],

       [[177, 144, 116],
        [168, 129,  94],
        [179, 142,  87],
        ...,
        [216, 184, 140],
        [151, 118,  84],
        [123,  92,  72]]

In [8]:
len(np.unique(y_train)),len(x_train)

(10, 50000)

In [9]:
#Normalizing the pixel values
x_train = x_train / 255.0
x_test = x_test / 255.0
x_train[0]

array([[[0.23137255, 0.24313725, 0.24705882],
        [0.16862745, 0.18039216, 0.17647059],
        [0.19607843, 0.18823529, 0.16862745],
        ...,
        [0.61960784, 0.51764706, 0.42352941],
        [0.59607843, 0.49019608, 0.4       ],
        [0.58039216, 0.48627451, 0.40392157]],

       [[0.0627451 , 0.07843137, 0.07843137],
        [0.        , 0.        , 0.        ],
        [0.07058824, 0.03137255, 0.        ],
        ...,
        [0.48235294, 0.34509804, 0.21568627],
        [0.46666667, 0.3254902 , 0.19607843],
        [0.47843137, 0.34117647, 0.22352941]],

       [[0.09803922, 0.09411765, 0.08235294],
        [0.0627451 , 0.02745098, 0.        ],
        [0.19215686, 0.10588235, 0.03137255],
        ...,
        [0.4627451 , 0.32941176, 0.19607843],
        [0.47058824, 0.32941176, 0.19607843],
        [0.42745098, 0.28627451, 0.16470588]],

       ...,

       [[0.81568627, 0.66666667, 0.37647059],
        [0.78823529, 0.6       , 0.13333333],
        [0.77647059, 0

In [10]:
x_train[0].shape

(32, 32, 3)

In [11]:
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D
import os

#using the CIFAR-10 example
batch_size = 32
steps_per_epoch= len(x_train)/batch_size
num_classes = 10
epochs = 50
data_augmentation = True
#num_predictions = 20
save_dir = os.path.join(os.getcwd(), 'saved_models')
model_name = 'keras_object_recognition_model.h5'

# Convert class vectors to binary class matrices.
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

model = Sequential()
model.add(Conv2D(32, (3, 3), padding='same',input_shape=(32, 32, 3)))
model.add(Activation('relu'))
model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(64, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes))
model.add(Activation('softmax'))

# initiate RMSprop optimizer
opt = keras.optimizers.RMSprop(learning_rate=0.0001, decay=1e-6)

# Let's train the model using RMSprop
model.compile(loss='categorical_crossentropy',optimizer=opt,metrics=['accuracy'])



In [12]:
import keras
from keras.preprocessing.image import ImageDataGenerator

# create a data generation with the train truth images and dataframe
datagen = ImageDataGenerator(
    featurewise_center=True,
    featurewise_std_normalization=True,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True)

datagen.fit(x_train)



In [13]:
x_train.shape, x_test.shape, y_train.shape, y_test.shape

((50000, 32, 32, 3), (10000, 32, 32, 3), (50000, 10), (10000, 10))

In [14]:
model.fit(datagen.flow(x_train, y_train, batch_size=32),
          batch_size=batch_size,
          epochs=epochs,
          validation_data=(x_test, y_test),
          shuffle=True)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<tensorflow.python.keras.callbacks.History at 0x1ecaa8a25b0>

In [17]:
# Save model
if not os.path.isdir(save_dir):
    os.makedirs(save_dir)
model_path = os.path.join(save_dir, model_name)
model.save(model_path)
print('Saved trained model at %s ' % model_path)

Saved trained model at c:\DPHi\Deep_Learning_Bootcamp\saved_models\keras_object_recognition_model.h5 


In [19]:
test_data.shape

(2000, 3072)

In [30]:
teste = test_data
teste

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,3062,3063,3064,3065,3066,3067,3068,3069,3070,3071
0,98,105,108,92,101,106,91,101,107,93,...,171,183,182,176,175,175,168,181,181,175
1,101,108,101,101,108,101,102,109,102,103,...,103,100,109,104,100,109,103,100,109,102
2,85,115,27,63,90,25,37,66,15,69,...,141,172,193,136,173,192,138,179,192,149
3,213,213,214,215,214,218,220,218,226,223,...,216,193,194,209,201,204,216,203,201,237
4,41,74,144,41,75,139,41,75,139,41,...,133,41,77,130,44,75,133,42,73,144
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1995,156,186,205,153,183,201,153,183,201,152,...,90,40,73,92,49,79,99,46,77,97
1996,68,101,169,69,103,173,70,104,176,71,...,58,61,73,64,51,62,53,49,61,52
1997,216,190,168,219,193,169,214,189,163,188,...,135,182,167,150,184,170,152,188,171,151
1998,46,24,17,43,32,11,77,82,60,117,...,147,102,134,146,100,131,145,99,130,148


In [42]:
x_test_data = teste.astype('float32')
x_test_data /= 255
df = x_test_data.rename_axis('ID').values
dff = df.reshape(2000,32,32,3)

In [48]:
predictions = model.predict(dff)


In [50]:
predictions[0]

array([0.13061044, 0.01076612, 0.25867403, 0.1555994 , 0.10574258,
       0.09053725, 0.13180049, 0.02097947, 0.08185315, 0.01343709],
      dtype=float32)

In [51]:
res = pd.DataFrame(predictions)

In [52]:
res

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,0.130610,0.010766,0.258674,0.155599,0.105743,0.090537,0.131800,0.020979,0.081853,0.013437
1,0.097752,0.005543,0.215435,0.224033,0.122739,0.112313,0.121279,0.012563,0.080696,0.007646
2,0.065391,0.005768,0.153710,0.073965,0.406731,0.070645,0.087529,0.075747,0.045696,0.014817
3,0.118839,0.001871,0.102493,0.063977,0.096723,0.022401,0.015308,0.006922,0.565055,0.006411
4,0.146034,0.007476,0.164659,0.191601,0.070680,0.132827,0.087526,0.021997,0.160581,0.016618
...,...,...,...,...,...,...,...,...,...,...
1995,0.058980,0.000491,0.054461,0.068549,0.199417,0.023579,0.015845,0.004382,0.571122,0.003175
1996,0.042016,0.000280,0.215710,0.091384,0.486721,0.021767,0.023021,0.010294,0.107675,0.001134
1997,0.070605,0.000612,0.283826,0.322275,0.078133,0.108058,0.094500,0.002115,0.037937,0.001939
1998,0.022841,0.000650,0.134470,0.426901,0.039903,0.326474,0.021346,0.009174,0.015697,0.002544


In [54]:
len(predictions)

2000

In [53]:
np.argmax(predictions[0])

2

In [55]:
submission = []
for i in range (0,len(predictions)):
    submission.append(np.argmax(predictions[i]))

submission

[2,
 3,
 4,
 8,
 3,
 2,
 3,
 4,
 3,
 3,
 8,
 4,
 4,
 4,
 4,
 2,
 8,
 4,
 2,
 4,
 3,
 8,
 4,
 8,
 4,
 0,
 2,
 4,
 0,
 4,
 3,
 3,
 3,
 4,
 2,
 8,
 3,
 2,
 8,
 4,
 8,
 2,
 2,
 4,
 2,
 3,
 3,
 2,
 2,
 3,
 2,
 8,
 2,
 5,
 8,
 3,
 2,
 4,
 2,
 0,
 3,
 4,
 4,
 6,
 4,
 8,
 4,
 8,
 3,
 4,
 4,
 3,
 8,
 3,
 2,
 8,
 2,
 4,
 4,
 8,
 2,
 5,
 8,
 4,
 4,
 2,
 3,
 4,
 3,
 4,
 4,
 4,
 8,
 8,
 4,
 8,
 8,
 3,
 4,
 4,
 8,
 3,
 4,
 4,
 4,
 4,
 2,
 4,
 3,
 4,
 3,
 2,
 4,
 0,
 8,
 2,
 4,
 5,
 8,
 2,
 4,
 4,
 6,
 8,
 6,
 3,
 4,
 8,
 4,
 8,
 4,
 2,
 2,
 8,
 3,
 4,
 4,
 0,
 4,
 2,
 4,
 4,
 3,
 3,
 4,
 4,
 6,
 8,
 3,
 8,
 3,
 3,
 2,
 8,
 5,
 2,
 2,
 4,
 2,
 2,
 2,
 4,
 6,
 8,
 4,
 2,
 4,
 2,
 8,
 2,
 2,
 4,
 4,
 2,
 4,
 4,
 0,
 2,
 3,
 4,
 2,
 3,
 2,
 2,
 3,
 3,
 2,
 4,
 3,
 3,
 4,
 8,
 8,
 3,
 3,
 2,
 4,
 4,
 8,
 2,
 4,
 6,
 3,
 4,
 8,
 8,
 8,
 3,
 2,
 3,
 3,
 8,
 3,
 5,
 4,
 8,
 2,
 2,
 2,
 4,
 4,
 4,
 3,
 4,
 3,
 4,
 3,
 0,
 3,
 2,
 2,
 4,
 3,
 2,
 3,
 8,
 8,
 8,
 3,
 3,
 9,
 2,
 2,
 3,
 2,
 4,
 4,
 2,
 3,
 4,


In [57]:
res = pd.DataFrame(submission)
res.index = test_data.index # its important for comparison. Here "test_new" is your new test dataset
res.columns = ["prediction"]
res.to_csv("prediction_results.csv")

In [58]:
res

Unnamed: 0,prediction
0,2
1,3
2,4
3,8
4,3
...,...
1995,8
1996,4
1997,3
1998,3
