In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tarfile
import cv2
import pickle
%matplotlib inline

In [2]:
path = '256_ObjectCategories/'

In [3]:
os.chdir(path)

In [4]:
folders = os.listdir()

In [5]:
folder_paths = []
all_images = []
all_classes = []

In [6]:
img_size = 128

In [7]:
from PIL import Image

def make_square(image, min_size=img_size, fill_color=(0, 0, 0, 0)):
    size = (min_size, min_size)
    image.thumbnail(size, Image.ANTIALIAS)
    background = Image.new('RGB', size, (255, 255, 255, 0))
    background.paste(
        image, (int((size[0] - image.size[0]) / 2), int((size[1] - image.size[1]) / 2))
    )

    new_img = np.array(background)
    new_img.flatten()
    return new_img

In [8]:
for folder in range(len(folders)):
    folder_paths = path+str(folders[folder])+str('/')
    
    os.chdir(folder_paths)
    image_in_folder = os.listdir()

    for image in range(len(image_in_folder)):
        img = Image.open(image_in_folder[image])
        img = make_square(img)
        
        all_images.append(img.flatten()/255)
        all_classes.append(folders[folder])


In [9]:
len(all_classes)

30607

In [10]:
len(all_images)

30607

In [11]:
all_images[2].shape

(49152,)

In [14]:
all_images_df = np.asarray(all_images)

In [15]:
all_images_df.shape

(30607, 49152)

# Pickle 

In [17]:
import sys
sys.getsizeof(all_images_df)

12035162224

In [18]:
all_images_df1 = all_images_df[:10000,:]
all_images_df2 = all_images_df[10000:20000,:]
all_images_df3 = all_images_df[20000:,:]
print('all_images_df1:'+str(sys.getsizeof(all_images_df1)))
print('all_images_df2:'+str(sys.getsizeof(all_images_df2)))
print('all_images_df3:'+str(sys.getsizeof(all_images_df3)))

all_images_df1:112
all_images_df2:112
all_images_df3:112


In [19]:
picklepath = 'Caltech256/'
os.chdir(picklepath)

In [20]:
import pickle

pickle_out = open("pickle_all_images_df1.pickle","wb")
pickle.dump(all_images_df1, pickle_out)
pickle_out.close()

pickle_out = open("pickle_all_images_df2.pickle","wb")
pickle.dump(all_images_df2, pickle_out)
pickle_out.close()

pickle_out = open("pickle_all_images_df3.pickle","wb")
pickle.dump(all_images_df3, pickle_out)
pickle_out.close()

pickle_out = open("pickle_all_classes.pickle","wb")
pickle.dump(all_classes, pickle_out)
pickle_out.close()

In [21]:
all_images_df1.shape

(10000, 49152)

In [22]:
all_images_df2.shape

(10000, 49152)

In [23]:
all_images_df3.shape

(10607, 49152)

### Restore 

In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tarfile
import cv2
import pickle
%matplotlib inline

In [2]:
import os
picklepath = 'Caltech256/'
os.chdir(picklepath)

In [3]:
import pickle
pickle_in = open("pickle_all_images_df1.pickle","rb")
all_images_df1 = pickle.load(pickle_in)

pickle_in = open("pickle_all_images_df2.pickle","rb")
all_images_df2 = pickle.load(pickle_in)

pickle_in = open("pickle_all_images_df3.pickle","rb")
all_images_df3 = pickle.load(pickle_in)

pickle_in = open("pickle_all_classes.pickle","rb")
all_classes = pickle.load(pickle_in)


In [4]:
img_size = 128

In [5]:
all_images = np.concatenate((all_images_df1, all_images_df2,all_images_df3), axis=0)

In [6]:
del all_images_df1
del all_images_df2
del all_images_df3

In [7]:
all_images.shape

(30607, 49152)

In [9]:
set(all_classes)

{'001.ak47',
 '002.american-flag',
 '003.backpack',
 '004.baseball-bat',
 '005.baseball-glove',
 '006.basketball-hoop',
 '007.bat',
 '008.bathtub',
 '009.bear',
 '010.beer-mug',
 '011.billiards',
 '012.binoculars',
 '013.birdbath',
 '014.blimp',
 '015.bonsai-101',
 '016.boom-box',
 '017.bowling-ball',
 '018.bowling-pin',
 '019.boxing-glove',
 '020.brain-101',
 '021.breadmaker',
 '022.buddha-101',
 '023.bulldozer',
 '024.butterfly',
 '025.cactus',
 '026.cake',
 '027.calculator',
 '028.camel',
 '029.cannon',
 '030.canoe',
 '031.car-tire',
 '032.cartman',
 '033.cd',
 '034.centipede',
 '035.cereal-box',
 '036.chandelier-101',
 '037.chess-board',
 '038.chimp',
 '039.chopsticks',
 '040.cockroach',
 '041.coffee-mug',
 '042.coffin',
 '043.coin',
 '044.comet',
 '045.computer-keyboard',
 '046.computer-monitor',
 '047.computer-mouse',
 '048.conch',
 '049.cormorant',
 '050.covered-wagon',
 '051.cowboy-hat',
 '052.crab-101',
 '053.desk-globe',
 '054.diamond-ring',
 '055.dice',
 '056.dog',
 '057.dol

### Cross Validation 

In [11]:
all_classes = pd.get_dummies(all_classes) 
all_images = np.array(all_images)

In [12]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(all_images, all_classes, test_size=0.2)

### Reset Kernel

We have to reset the kernel a few times for this exercise because our RAM and our VRAM gets full quickly when running a model

In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tarfile
import cv2
import pickle
%matplotlib inline
img_size =  128
picklepath = 'Caltech256/'
os.chdir(picklepath)
pickle_in = open("pickle_all_images_df1.pickle","rb")
all_images_df1 = pickle.load(pickle_in)

pickle_in = open("pickle_all_images_df2.pickle","rb")
all_images_df2 = pickle.load(pickle_in)

pickle_in = open("pickle_all_images_df3.pickle","rb")
all_images_df3 = pickle.load(pickle_in)

pickle_in = open("pickle_all_classes.pickle","rb")
all_classes = pickle.load(pickle_in)
all_images = np.concatenate((all_images_df1, all_images_df2,all_images_df3), axis=0)
del all_images_df1
del all_images_df2
del all_images_df3

from sklearn.model_selection import train_test_split

all_images = pd.DataFrame(all_images)
X_test = all_images.groupby(all_classes).apply(lambda s: s.sample(4))
test_index = X_test.index.levels[1].values

X_test = X_test.reset_index(0).reset_index(drop=True)
X_test = X_test.drop('level_0',axis = 1)
X_train = all_images.drop(test_index)
all_classes = pd.Series(all_classes)
y_train = all_classes.drop(test_index)
y_test = all_classes.iloc[test_index]

y_train = pd.get_dummies(y_train) 
y_test = pd.get_dummies(y_test) 

X_train = X_train.as_matrix()
X_test = X_test.as_matrix()

del all_images
X_train = X_train.reshape(-1,img_size,img_size,3)
X_test = X_test.reshape(-1,img_size,img_size,3)
path = 'Caltech256/256_ObjectCategories/'

### Reshape data into Image Format

In [20]:
X_train = X_train.reshape(-1,img_size,img_size,3)

In [21]:
X_test = X_test.reshape(-1,img_size,img_size,3)

### Data Generator

In [234]:
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img

In [235]:
generator = ImageDataGenerator(
        rotation_range=40,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        fill_mode='nearest')

img = load_img(sample_image) 
x = img_to_array(img)  
x = x.reshape((1,) + x.shape)  

i = 0
for batch in datagen.flow(x, batch_size=1,
                          save_to_dir='preview', save_prefix='butterfly', save_format='jpeg'):
    i += 1
    if i > 10:
        break  

In [236]:
folder = "Caltech256/preview/"

In [237]:
butterfly_filename = os.listdir(folder)

In [None]:
generator.flow(X_train)

# Convolutional Neural Networks

In [4]:
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout
import keras.backend as K
from keras.optimizers import Adam
from keras.layers.normalization import BatchNormalization
from keras.layers import Conv2D, MaxPool2D, AvgPool2D,Flatten

# Inception

In [13]:
time.sleep(100)

In [14]:
batch_size = 16

In [15]:
from keras.applications.inception_v3 import InceptionV3
from keras.preprocessing import image
from keras.models import Model
from keras.layers import Dense, GlobalAveragePooling2D
from keras import backend as K
from keras.optimizers import Adam

base_model = InceptionV3(weights='imagenet', include_top=False)

x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)
predictions = Dense(257, activation='softmax')(x)

model = Model(inputs=base_model.input, outputs=predictions)

for layer in base_model.layers:
    layer.trainable = False

adam = Adam(lr=0.0001)
model.compile(optimizer=adam, loss='categorical_crossentropy', metrics=['accuracy'])

model.fit_generator(generator.flow(X_train, y_train.values, batch_size=batch_size),len(X_train) / batch_size, epochs=10,verbose=1,validation_data=(X_test, y_test.values))

for i, layer in enumerate(base_model.layers):
    print(i, layer.name)

for layer in model.layers[:249]:
    layer.trainable = False
for layer in model.layers[249:]:
    layer.trainable = True

model.compile(optimizer=adam, loss='categorical_crossentropy', metrics=['accuracy'])

model.fit_generator(generator.flow(X_train, y_train.values, batch_size=batch_size),len(X_train) / batch_size, epochs=10,verbose=1,validation_data=(X_test, y_test.values))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
0 input_2
1 conv2d_1
2 batch_normalization_1
3 activation_1
4 conv2d_2
5 batch_normalization_2
6 activation_2
7 conv2d_3
8 batch_normalization_3
9 activation_3
10 max_pooling2d_1
11 conv2d_4
12 batch_normalization_4
13 activation_4
14 conv2d_5
15 batch_normalization_5
16 activation_5
17 max_pooling2d_2
18 conv2d_9
19 batch_normalization_9
20 activation_9
21 conv2d_7
22 conv2d_10
23 batch_normalization_7
24 batch_normalization_10
25 activation_7
26 activation_10
27 average_pooling2d_1
28 conv2d_6
29 conv2d_8
30 conv2d_11
31 conv2d_12
32 batch_normalization_6
33 batch_normalization_8
34 batch_normalization_11
35 batch_normalization_12
36 activation_6
37 activation_8
38 activation_11
39 activation_12
40 mixed0
41 conv2d_16
42 batch_normalization_16
43 activation_16
44 conv2d_14
45 conv2d_17
46 batch_normalization_14
47 batch_normalization_17
48 activation_14
49 activation_17
50 a

Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x2bc0135bef0>

In [16]:
for layer in model.layers[:200]:
    layer.trainable = False
for layer in model.layers[200:]:
    layer.trainable = True

time.sleep(100)

model.compile(optimizer=adam, loss='categorical_crossentropy', metrics=['accuracy'])

model.fit_generator(generator.flow(X_train, y_train.values, batch_size=batch_size),len(X_train) / batch_size, epochs=20,verbose=1,validation_data=(X_test, y_test.values))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x2bbe563ef98>

In [17]:
from keras.models import load_model
model.save('inception_model.h5') 
del model  
model = load_model('inception_model.h5')

In [18]:
model.evaluate(x = X_test, y = y_test.values)



[1.9541673719419115, 0.63813229571984431]

Model Accuracy:63%

### Inception Resnet

In [19]:
batch_size = 64

In [20]:
time.sleep(100)

In [21]:
from keras.applications.inception_resnet_v2 import InceptionResNetV2
base_model = InceptionResNetV2(weights='imagenet', include_top=False)

x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)
predictions = Dense(257, activation='softmax')(x)

model = Model(inputs=base_model.input, outputs=predictions)

for layer in base_model.layers:
    layer.trainable = False

adam = Adam(lr=0.0001)
model.compile(optimizer=adam, loss='categorical_crossentropy', metrics=['accuracy'])

model.fit_generator(generator.flow(X_train, y_train.values, batch_size=batch_size),len(X_train) / batch_size, epochs=20,verbose=1,validation_data=(X_test, y_test.values))


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x2bcd3265ba8>

In [22]:

for i, layer in enumerate(base_model.layers):
    print(i, layer.name)

for layer in model.layers[:249]:
    layer.trainable = False
for layer in model.layers[249:]:
    layer.trainable = True

time.sleep(100)

from keras.optimizers import SGD
model.compile(optimizer=adam, loss='categorical_crossentropy', metrics=['accuracy'])

model.fit_generator(generator.flow(X_train, y_train.values, batch_size=batch_size),len(X_train) / batch_size, epochs=30,verbose=1,validation_data=(X_test, y_test.values))

0 input_3
1 conv2d_95
2 batch_normalization_95
3 activation_95
4 conv2d_96
5 batch_normalization_96
6 activation_96
7 conv2d_97
8 batch_normalization_97
9 activation_97
10 max_pooling2d_5
11 conv2d_98
12 batch_normalization_98
13 activation_98
14 conv2d_99
15 batch_normalization_99
16 activation_99
17 max_pooling2d_6
18 conv2d_103
19 batch_normalization_103
20 activation_103
21 conv2d_101
22 conv2d_104
23 batch_normalization_101
24 batch_normalization_104
25 activation_101
26 activation_104
27 average_pooling2d_10
28 conv2d_100
29 conv2d_102
30 conv2d_105
31 conv2d_106
32 batch_normalization_100
33 batch_normalization_102
34 batch_normalization_105
35 batch_normalization_106
36 activation_100
37 activation_102
38 activation_105
39 activation_106
40 mixed_5b
41 conv2d_110
42 batch_normalization_110
43 activation_110
44 conv2d_108
45 conv2d_111
46 batch_normalization_108
47 batch_normalization_111
48 activation_108
49 activation_111
50 conv2d_107
51 conv2d_109
52 conv2d_112
53 batch_norm

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x2bcd4a91e10>

In [None]:
from keras.models import load_model
model.save('inceptionresnet_model.h5') 
del model  
model = load_model('inceptionresnet_model.h5')

In [29]:
model.evaluate(x = X_test, y = y_test.values)



[1.655475185697777, 0.71498054474708173]

Model Accuracy: 71%