# Some weird config required

In [5]:
import sys

def config_paths(user, env_name):
    paths = ['',
             '/home/{0}/{1}/.env/bin'.format(user, env_name),
             '/usr/lib/python35.zip',
             '/usr/lib/python3.5',
             '/usr/lib/python3.5/plat-x86_64-linux-gnu',
             '/usr/lib/python3.5/lib-dynload',
             '/home/{0}/{1}/.env/lib/python3.5/site-packages'.format(user, env_name),
             '/home/{0}/{1}/.env/lib/python3.5/site-packages/IPython/extensions'.format(user, env_name),
             '/home/{0}/.ipython']

    for path in paths:
        sys.path.append(path)
        
config_paths('omri', 'my_gpu')

# Imports

In [6]:
import os
import numpy as np
import cv2
import pickle
import matplotlib.pyplot as plt
import pandas as pd

from keras.models import Sequential
from keras import Model
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.layers.convolutional import Convolution2D
from keras.layers.pooling import MaxPooling2D
from keras.utils import np_utils

# Configuration

In [7]:
TRAIN_DIR = '../'
K = 40
SAMPLE_NUM = 128 * K   
IMG_SIZE = 197

# Read Data

In [4]:
X_total = np.zeros(shape = (SAMPLE_NUM, IMG_SIZE,IMG_SIZE,3))
Y_total = np.zeros(shape = (SAMPLE_NUM,))

for category_ind in range(1,129):
    cagetory_dir = os.path.join(TRAIN_DIR, str(category_ind))
    
    cur_image_list = os.listdir(cagetory_dir)
    for im_ind, im_name in enumerate(cur_image_list[:K]):
        im = cv2.imread(os.path.join(cagetory_dir, im_name))
        im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
        im = cv2.resize(im, (IMG_SIZE, IMG_SIZE))
        im = im/255.0
        
        X_total[(category_ind-1)*K + im_ind] = im
        Y_total[(category_ind-1)*K + im_ind] = category_ind - 1

# Train-validation split

In [5]:
random_indices = np.random.permutation(SAMPLE_NUM)
X_total = X_total[random_indices]
Y_total = Y_total[random_indices]

train_num = int(SAMPLE_NUM * 0.7)
X_train = X_total[:train_num]
Y_train = Y_total[:train_num]

X_val = X_total[train_num:]
Y_val = Y_total[train_num:]

Y_val = np_utils.to_categorical(Y_val,128)
Y_train = np_utils.to_categorical(Y_train,128)

# Models

### My Alexnet

In [6]:
# model = Sequential()
# model.add(Convolution2D(32, 3, 3,
#                     border_mode='valid',
#                     input_shape=(IMG_SIZE, IMG_SIZE ,3)))
# model.add(Activation('relu'))
# model.add(MaxPooling2D(pool_size=(2, 2)))

# model.add(Convolution2D(32, 3, 3))
# model.add(Activation('relu'))
# model.add(MaxPooling2D(pool_size=(2, 2)))

# model.add(Convolution2D(32, 3, 3))
# model.add(Activation('relu'))

# model.add(Convolution2D(32, 3, 3))
# model.add(Activation('relu'))
# model.add(MaxPooling2D(pool_size=(2, 2)))

# model.add(Flatten())
# model.add(Dense(256))
# model.add(Activation('relu'))
# #model.add(Dropout(0.5))

# model.add(Dense(256))
# model.add(Activation('relu'))

# model.add(Dense(128))
# model.add(Activation('softmax'))

### VGG

In [7]:
from keras.applications.vgg16 import VGG16
# model = VGG16(include_top=False, input_shape=(IMG_SIZE, IMG_SIZE ,3), 
#               weights='imagenet', input_tensor=None, pooling=None, classes=128)

# last = model.output

# x = Flatten()(last)
# x = Dense(256, activation='relu')(x)
# preds = Dense(128, activation='softmax')(x)

# model = Model(model.input, preds)

### Resnet

In [25]:
from keras.applications.resnet50 import ResNet50
from keras import regularizers

model = VGG16(include_top=False, weights='imagenet', input_tensor=None, input_shape=(IMG_SIZE, IMG_SIZE, 3),
                 pooling=None, classes=128)

# model = Model(model.input, preds)

for layer in model.layers:
    layer.trainable = False
    
last = model.output

x = Flatten()(last)
x = Dropout(0.5)(x)
x = Dense(512, activation='sigmoid',kernel_initializer='he_normal',kernel_regularizer=regularizers.l2(0.001))(x)
preds = Dense(128, activation='softmax')(x)

model = Model(model.input, preds)

In [26]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_6 (InputLayer)         (None, 197, 197, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 197, 197, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 197, 197, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 98, 98, 64)        0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 98, 98, 128)       73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 98, 98, 128)       147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 49, 49, 128)       0         
__________

In [10]:
# Y_train.shape

In [11]:
# x = x.reshape( (-1,SIZE,SIZE,1))
# input_shape = x[0].shape
# x_train = x.astype("float32")
# y_train = y_cat
# from keras.preprocessing.image import ImageDataGenerator

# image_gen = ImageDataGenerator(
#     #featurewise_center=True,
#     #featurewise_std_normalization=True,
# #     rescale=1./255,
#     rotation_range=2,
# #     width_shift_range=.15,
# #     height_shift_range=.15,
#     horizontal_flip=True)

# #training the image preprocessing
# image_gen.fit(X_train, augment=True)

# Train

In [29]:
from keras import optimizers
from sklearn.utils import class_weight
# class_weight = class_weight.compute_class_weight('balanced',
#                                              np.unique( Y_total[:train_num]),
#                                               Y_total[:train_num])

sgd = optimizers.SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True)
# adam = optimizers.Adam(lr=0.01)

model.compile(loss='categorical_crossentropy',
          optimizer=sgd,
          metrics=['accuracy'])

# model.fit_generator(image_gen.flow(X_train, Y_train, batch_size=32),
#           epochs=40,
#           verbose=1,
#           class_weight=class_weight,
#             validation_data=(X_val, Y_val))


model.fit(X_train, Y_train, batch_size=32, 
          epochs=40,verbose=1,
          validation_data=(X_val, Y_val))

Train on 3584 samples, validate on 1536 samples
Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


<keras.callbacks.History at 0x7f0f1bc5f588>

In [20]:
score, accuracy = model.evaluate(X_val, Y_val, verbose=1)
predictions = model.predict(X_val)
print('Test score:', score)
print('Test accuracy:', accuracy)

Test score: 8.391303896903992
Test accuracy: 0.06510416666666667


# Save model

In [31]:
model_json = model.to_json()
with open("my_vgg.json", "w") as json_file:
    json_file.write(model_json)
model.save_weights("my_vgg.h5")


# Load model if required

In [8]:
from keras.models import model_from_json

json_file = open('my_vgg.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json)

# load weights into new model
loaded_model.load_weights("my_vgg.h5")

In [35]:
del X_total, X_val

In [36]:
del Y_total, Y_val

# Read test data

In [9]:
X_test = np.zeros(shape = (12800, IMG_SIZE,IMG_SIZE,3))
indices = np.zeros(12800)
test_path = '../test'
files_list = os.listdir(test_path)
for im_ind, im_name in enumerate(files_list):
    im = cv2.imread(os.path.join(test_path, im_name))
    im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
    im = cv2.resize(im, (IMG_SIZE, IMG_SIZE))
    im = im/255.0
    img_ind = int(im_name.split('.')[0])
    indices[im_ind] = img_ind
    X_test[im_ind] = im

# Fill in missing data

In [10]:
missing_imgs = set(range(12800)) - set(indices)
for i, loc in enumerate(np.where(indices==0)[0]):
    indices[loc] = list(missing_imgs)[i]

# Predict

In [11]:
predictions = loaded_model.predict(X_test)
y_test = np.argmax(predictions,axis=1)

# Create Submission

In [12]:
filename = 'my_vgg.csv'
ans = pd.DataFrame({'id': [int(x) for x in indices],
                    'predicted': [int(x) for x in y_test + np.ones(len(y_test))]})

ans.sort_values(by='id').to_csv(filename, index=False)