# Some weird config required

In [1]:
import sys

def config_paths(user, env_name):
    paths = ['',
             '/home/{0}/{1}/.env/bin'.format(user, env_name),
             '/usr/lib/python35.zip',
             '/usr/lib/python3.5',
             '/usr/lib/python3.5/plat-x86_64-linux-gnu',
             '/usr/lib/python3.5/lib-dynload',
             '/home/{0}/{1}/.env/lib/python3.5/site-packages'.format(user, env_name),
             '/home/{0}/{1}/.env/lib/python3.5/site-packages/IPython/extensions'.format(user, env_name),
             '/home/{0}/.ipython']

    for path in paths:
        sys.path.append(path)
        
config_paths('omri', 'my_gpu')

# Imports

In [2]:
import os
import numpy as np
import cv2
import pickle
import matplotlib.pyplot as plt
import pandas as pd

from keras.models import Sequential
from keras import Model
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.layers.convolutional import Convolution2D
from keras.layers.pooling import MaxPooling2D
from keras.utils import np_utils

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


# Configuration

In [3]:
TRAIN_DIR = '../'
K = 20
SAMPLE_NUM = 128 * K   
IMG_SIZE = 197

# Read Data

In [4]:
X_total = np.zeros(shape = (SAMPLE_NUM, IMG_SIZE,IMG_SIZE,3))
Y_total = np.zeros(shape = (SAMPLE_NUM,))

for category_ind in range(1,129):
    cagetory_dir = os.path.join(TRAIN_DIR, str(category_ind))
    
    cur_image_list = os.listdir(cagetory_dir)
    for im_ind, im_name in enumerate(cur_image_list[:K]):
        im = cv2.imread(os.path.join(cagetory_dir, im_name))
        im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
        im = cv2.resize(im, (IMG_SIZE, IMG_SIZE))
        im = im/255.0
        
        X_total[(category_ind-1)*K + im_ind] = im
        Y_total[(category_ind-1)*K + im_ind] = category_ind - 1

# Train-validation split

In [6]:
random_indices = np.random.permutation(SAMPLE_NUM)
X_total = X_total[random_indices]
Y_total = Y_total[random_indices]

train_num = int(SAMPLE_NUM * 0.7)
X_train = X_total[:train_num]
Y_train = Y_total[:train_num]

X_val = X_total[train_num:]
Y_val = Y_total[train_num:]

Y_val = np_utils.to_categorical(Y_val,128)
Y_train = np_utils.to_categorical(Y_train,128)

# Models

### My Alexnet

In [None]:
# model = Sequential()
# model.add(Convolution2D(32, 3, 3,
#                     border_mode='valid',
#                     input_shape=(IMG_SIZE, IMG_SIZE ,3)))
# model.add(Activation('relu'))
# model.add(MaxPooling2D(pool_size=(2, 2)))

# model.add(Convolution2D(32, 3, 3))
# model.add(Activation('relu'))
# model.add(MaxPooling2D(pool_size=(2, 2)))

# model.add(Convolution2D(32, 3, 3))
# model.add(Activation('relu'))

# model.add(Convolution2D(32, 3, 3))
# model.add(Activation('relu'))
# model.add(MaxPooling2D(pool_size=(2, 2)))

# model.add(Flatten())
# model.add(Dense(256))
# model.add(Activation('relu'))
# #model.add(Dropout(0.5))

# model.add(Dense(256))
# model.add(Activation('relu'))

# model.add(Dense(128))
# model.add(Activation('softmax'))

### VGG

In [None]:
# from keras.applications.vgg16 import VGG16
# model = VGG16(include_top=False, input_shape=(IMG_SIZE, IMG_SIZE ,3), 
#               weights='imagenet', input_tensor=None, pooling=None, classes=128)

# last = model.output

# x = Flatten()(last)
# x = Dense(256, activation='relu')(x)
# preds = Dense(128, activation='softmax')(x)

# model = Model(model.input, preds)

### Resnet

In [None]:
from keras.applications.resnet50 import ResNet50
model = ResNet50(include_top=False, weights='imagenet', input_tensor=None, input_shape=(IMG_SIZE, IMG_SIZE, 3),
                 pooling=None, classes=128)

last = model.output

x = Flatten()(last)
x = Dense(256, activation='relu')(x)
preds = Dense(128, activation='softmax')(x)

model = Model(model.input, preds)


# Train

In [None]:
from keras import optimizers

sgd = optimizers.SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True)
adam = optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)

model.compile(loss='categorical_crossentropy',
          optimizer=adam,
          metrics=['accuracy'])

model.fit(X_train, Y_train, batch_size=32, 
          nb_epoch=30,verbose=1,
          validation_data=(X_val, Y_val))

In [None]:
score, accuracy = model.evaluate(X_val, Y_val, verbose=1)
predictions = model.predict(X_val)
print('Test score:', score)
print('Test accuracy:', accuracy)

# Save model

In [None]:
model_json = model.to_json()
with open("model.json", "w") as json_file:
    json_file.write(model_json)
model.save_weights("model.h5")


# Load model if required

In [None]:
from keras.models import model_from_json

json_file = open('model.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json)

# load weights into new model
loaded_model.load_weights("model.h5")

# Read test data

In [None]:
X_test = np.zeros(shape = (12800, IMG_SIZE,IMG_SIZE,3))
indices = np.zeros(12800)
test_path = '../test'
files_list = os.listdir(test_path)
for im_ind, im_name in enumerate(files_list):
    im = cv2.imread(os.path.join(test_path, im_name))
    im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
    im = cv2.resize(im, (IMG_SIZE, IMG_SIZE))
    im = im/255.0
    img_ind = int(im_name.split('.')[0])
    indices[im_ind] = img_ind
    X_test[im_ind] = im

# Fill in missing data

In [None]:
missing_imgs = set(range(12800)) - set(indices)
for i, loc in enumerate(np.where(indices==0)[0]):
    indices[loc] = list(missing_imgs)[i]

# Predict

In [None]:
predictions = loaded_model.predict(X_test)
y_test = np.argmax(predictions,axis=1)

# Create Submission

In [None]:
filename = 'resnet_big.csv'
ans = pd.DataFrame({'id': [int(x) for x in indices],
                    'predicted': [int(x) for x in y_test + np.ones(len(y_test))]})

ans.sort_values(by='id').to_csv(filename, index=False)