# AIA
AIA mid-term

In [1]:
import os
import numpy as np
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt
%matplotlib inline

root_path = Path('/media/share/data/kaggle/TWaia')
train = root_path / 'train'
test = root_path / 'test'

## image process
- color trans
- resize to 224, 224

In [2]:
from skimage import io
from skimage.transform import resize
from skimage.color import gray2rgb
from random import shuffle

label = []
img = []

for cate in os.listdir(train):
    for idx in os.listdir(train / cate):
        if idx != '.ipynb_checkpoints':
            label.append(cate)
            img.append(resize(gray2rgb(io.imread(train / cate / idx)), (224, 224, 3), mode='edge'))

## mapping categories and codes

In [3]:
from sklearn.preprocessing import LabelEncoder

mapping = pd.read_csv(root_path / 'mid_term_mapping.txt', header=None, names=['cate', 'code'])
mapping.sort_index(by='code', inplace=True)

le = LabelEncoder().fit(mapping.cate)
labels = le.transform(label)

  after removing the cwd from sys.path.


## train-test split

In [4]:
from sklearn.model_selection import StratifiedKFold

skf = StratifiedKFold(n_splits=5)

for train_idx, val_idx in skf.split(img, labels):
    break
    
print('training size:', len(train_idx))
print('validation size:', len(val_idx))

training size: 2383
validation size: 602


In [5]:
train_img = []
train_lab = []
val_img = []
val_lab = []

# train
shuffle(train_idx)
for i in train_idx:
    train_img.append(img[i])
    train_lab.append(labels[i])
    
# val
shuffle(val_idx)
for i in val_idx:
    val_img.append(img[i])
    val_lab.append(labels[i])

## CNN model - inception-resnet-v2

In [6]:
import keras.backend as K
from keras.applications.inception_resnet_v2 import InceptionResNetV2
from keras.models import Model
from keras.layers import Dense, GlobalAveragePooling2D

K.clear_session()
basenet = InceptionResNetV2(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
x = GlobalAveragePooling2D()(basenet.output)
x = Dense(15, activation='softmax')(x)

model = Model(inputs=basenet.input, outputs=x)
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['acc'])

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.7/inception_resnet_v2_weights_tf_dim_ordering_tf_kernels_notop.h5


## model training

In [7]:
from keras.utils import to_categorical

batch_size = 16

train_hist = model.fit(np.array(train_img), to_categorical(train_lab), 
                       batch_size=batch_size, 
                       epochs=2**4, 
                       validation_data=(np.array(val_img), to_categorical(val_lab)), 
                       verbose=2)

Train on 2383 samples, validate on 602 samples
Epoch 1/16
 - 59s - loss: 0.9544 - acc: 0.6844 - val_loss: 1.1549 - val_acc: 0.6478
Epoch 2/16
 - 40s - loss: 0.4964 - acc: 0.8422 - val_loss: 1.2857 - val_acc: 0.6246
Epoch 3/16
 - 39s - loss: 0.3183 - acc: 0.8993 - val_loss: 1.6608 - val_acc: 0.5731
Epoch 4/16
 - 39s - loss: 0.2650 - acc: 0.9152 - val_loss: 2.2332 - val_acc: 0.5498
Epoch 5/16
 - 39s - loss: 0.1768 - acc: 0.9433 - val_loss: 1.6246 - val_acc: 0.6694
Epoch 6/16
 - 39s - loss: 0.1728 - acc: 0.9425 - val_loss: 1.6879 - val_acc: 0.6611
Epoch 7/16
 - 39s - loss: 0.1798 - acc: 0.9366 - val_loss: 1.1016 - val_acc: 0.7126
Epoch 8/16
 - 39s - loss: 0.1028 - acc: 0.9681 - val_loss: 1.1499 - val_acc: 0.7193
Epoch 9/16
 - 39s - loss: 0.1852 - acc: 0.9417 - val_loss: 3.0312 - val_acc: 0.4402
Epoch 10/16
 - 39s - loss: 0.2096 - acc: 0.9308 - val_loss: 1.6161 - val_acc: 0.6744
Epoch 11/16
 - 39s - loss: 0.0959 - acc: 0.9723 - val_loss: 1.7712 - val_acc: 0.6894
Epoch 12/16
 - 39s - loss: 

## testing

In [18]:
test_img = []

for idx in test.glob('*.jpg'):
    test_img.append(resize(gray2rgb(io.imread(test / idx)), (224, 224, 3), mode='edge'))

# model prediction
pred = model.predict(np.array(test_img))
pred_cate = pred.argmax(-1)
fid = [x.name.replace('.jpg','') for x in list(test.glob('*.jpg'))]
pred_final = pd.DataFrame({'id':fid,'class':pred_cate})
pred_final[['id','class']].sample(10)

Unnamed: 0,id,class
1126,ba641dfb1a6bbf7c82798290736f642ae8442c5f9df830...,0
423,4582995ab6e9661427a11cdaee85731e7cb93fe82b4af0...,7
982,a3c7858370a6ac878007f121899878042faf8bbd7ceb92...,9
514,546d7ed7ade8a5120f2cf2442d744f477ee2e9521ed611...,11
838,8c74247803c18f017a817e15a09b2e03a21bfb780589fc...,14
1255,d38c03f857af07bc3fa762a357c785eae6a822ea907e09...,1
827,8af0089e6b065da9d498bb333115d74e7ca6b6eeebfe28...,11
295,31be4c248c8f77d711d1fcab19545a897724a316c16225...,3
511,54233915b3bbaef5124de45868afc281f5fe9d75bd5b8e...,11
899,97fcb5be46159454216744960118a0ff718b313d7e6207...,13


## submition

In [20]:
pred_final[['id','class']].to_csv(root_path / 'submit01.csv', index=False)