### Imports

In [None]:
!nvidia-smi

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
import zipfile
import csv
import sys
import os


import tensorflow as tf
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import LearningRateScheduler, ModelCheckpoint
from tensorflow.keras.callbacks import Callback
from tensorflow.keras.regularizers import l2
from tensorflow.keras import optimizers
from tensorflow.keras.models import Model
from tensorflow.keras.applications.xception import Xception
from tensorflow.keras.layers import *

from sklearn.model_selection import train_test_split, StratifiedKFold

import PIL
from PIL import ImageOps, ImageFilter
#увеличим дефолтный размер графиков
from pylab import rcParams
rcParams['figure.figsize'] = 10, 5
#графики в svg выглядят более четкими
%config InlineBackend.figure_format = 'svg' 
%matplotlib inline

print(os.listdir("../input"))
print('Python       :', sys.version.split('\n')[0])
print('Numpy        :', np.__version__)
print('Tensorflow   :', tf.__version__)
print('Keras        :', tf.keras.__version__)

### Globar variables

In [None]:
BATCH_SIZE           = 8 

VAL_SPLIT            = 0.15

CLASS_NUM            = 10
IMG_SIZE             = 400
IMG_CHANNELS         = 3
input_shape          = (IMG_SIZE, IMG_SIZE, IMG_CHANNELS)

DATA_PATH = '../input/'
PATH = "../working/car/" # рабочая директория

In [None]:
os.makedirs(PATH,exist_ok=False)

RANDOM_SEED = 42
np.random.seed(RANDOM_SEED)  
PYTHONHASHSEED = 0

### Basic EDA and data unpacking

In [None]:
train_df = pd.read_csv(DATA_PATH+"train.csv")
sample_submission = pd.read_csv(DATA_PATH+"sample-submission.csv")
train_df.head()

In [None]:
train_df.info()

In [None]:
train_df.Category.value_counts()

In [None]:
print('Unpacking images')

for data_zip in ['train.zip', 'test.zip']:
    with zipfile.ZipFile("../input/"+data_zip,"r") as z:
        z.extractall(PATH)
        
print(os.listdir(PATH))

In [None]:
print('Image samples (random sample)')
plt.figure(figsize=(12,8))

random_image = train_df.sample(n=9)
random_image_paths = random_image['Id'].values
random_image_cat = random_image['Category'].values

for index, path in enumerate(random_image_paths):
    im = PIL.Image.open(PATH+f'train/{random_image_cat[index]}/{path}')
    plt.subplot(3,3, index+1)
    plt.imshow(im)
    plt.title('Class: '+str(random_image_cat[index]))
    plt.axis('off')
plt.show()

Посмотрим на примеры картинок и их размеры чтоб понимать как их лучше обработать и сжимать.

In [None]:
image = PIL.Image.open(PATH+'/train/0/100380.jpg')
imgplot = plt.imshow(image)
plt.show()
image.size

### Data augmentation and albumentation

In [None]:
!pip install git+https://github.com/mjkvaak/ImageDataAugmentor

In [None]:
!pip install albumentations

In [None]:
from ImageDataAugmentor.image_data_augmentor import *
import albumentations

    
AUGMENTATIONS = albumentations.Compose([
    albumentations.Transpose(p=0.1),
    albumentations.Flip(p=0.1),
    albumentations.OneOf([
        albumentations.RandomBrightnessContrast(brightness_limit=0.3, contrast_limit=0.3),
        albumentations.RandomBrightnessContrast(brightness_limit=0.1, contrast_limit=0.1)
    ],p=0.5),
    albumentations.GaussianBlur(p=0.05),
    albumentations.HueSaturationValue(p=0.1),
    albumentations.RGBShift(p=0.1),
    albumentations.IAAPerspective(p=0.1),
    albumentations.GridDistortion(p=0.5),    
    albumentations.ShiftScaleRotate(p=0.2,
                                    shift_limit=0.5,
                                    scale_limit=0.5,
                                    rotate_limit=40,
    ),
    albumentations.ElasticTransform(p=0.4,
                                    alpha=0.1,
                                    sigma=5,
                                    alpha_affine=2,
    ),
])

train_datagen = ImageDataAugmentor(
        rescale=1./255,
        augment = AUGMENTATIONS,
        augment_seed=RANDOM_SEED,
        preprocess_input=None,
        validation_split=VAL_SPLIT
)
        
test_datagen = ImageDataAugmentor(
        rescale=1./255
)


train_generator = train_datagen.flow_from_directory(
        PATH+'train/',
        target_size=(IMG_SIZE, IMG_SIZE),
        batch_size=BATCH_SIZE,
        class_mode='categorical',
        shuffle=True,
        seed=RANDOM_SEED,
        subset='training')
        
test_generator = train_datagen.flow_from_directory(
        PATH+'train/',
        target_size=(IMG_SIZE, IMG_SIZE),
        batch_size=BATCH_SIZE,
        class_mode='categorical',
        shuffle=True,
        seed=RANDOM_SEED,
        subset='validation')

test_sub_generator = test_datagen.flow_from_dataframe( 
        dataframe=sample_submission,
        directory=PATH+'test_upload/',
        x_col="Id",
        y_col=None,
        shuffle=False,
        class_mode=None,
        seed=RANDOM_SEED,
        target_size=(IMG_SIZE, IMG_SIZE),
        batch_size=BATCH_SIZE,)

### Building a classifier

In [None]:
!pip install -U efficientnet

In [None]:
from efficientnet import tfkeras as efn

base_model = efn.EfficientNetB6(weights='imagenet', include_top=False, input_shape=input_shape)

In [None]:
# Installing a new head

model = tf.keras.models.Sequential()
model.add(base_model)
model.add(GlobalAveragePooling2D())
model.add(Dense(1000, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(CLASS_NUM, activation='softmax'))

### Training

In [None]:
checkpoint = ModelCheckpoint('best_model.hdf5',
                             monitor = ['val_accuracy'] ,
                             verbose = 1,
                             mode = 'max')
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(
                            monitor = 'val_loss', 
                            factor = 0.5, 
                            patience = 3, 
                            min_lr=0.000001,
                            verbose=1,
                            mode='min') 
callbacks_list = [checkpoint, reduce_lr]

# https://towardsdatascience.com/finding-good-learning-rate-and-the-one-cycle-policy-7159fe1db5d6
# TODO: https://github.com/titu1994/keras-one-cycle

In [None]:
tf.keras.backend.clear_session()

#### Fitting head

In [None]:
EPOCHS = 5
LR = 0.001

base_model.trainable = False

model.compile(
              loss='categorical_crossentropy',
              optimizer=tf.optimizers.Adam(learning_rate=LR) ,
              metrics=['accuracy']
)

history = model.fit_generator(
        train_generator,
        steps_per_epoch = len(train_generator),
        validation_data = test_generator, 
        validation_steps = len(test_generator),
        epochs = EPOCHS,
        callbacks = callbacks_list
)

#### First unfreeze

In [None]:
LR = 0.001
EPOCHS = 15


base_model.trainable = True
fine_tune_at = len(base_model.layers)//2
for layer in base_model.layers[:fine_tune_at]:
    layer.trainable =  False    
print('Number of trainable layers in base model:', len(base_model.trainable_variables))

model.compile(
        loss='categorical_crossentropy',
        optimizer=tf.optimizers.Adam(learning_rate=LR) ,
        metrics=['accuracy']
)

history = model.fit_generator(
        train_generator,
        steps_per_epoch = len(train_generator),
        validation_data = test_generator, 
        validation_steps = len(test_generator),
        epochs = EPOCHS,
        callbacks = callbacks_list
)

#### Second unfreeze

In [None]:
LR = 0.0001
EPOCHS = 3


base_model.trainable = True
fine_tune_at = len(base_model.layers)//4
for layer in base_model.layers[:fine_tune_at]:
    layer.trainable =  False    
print('Number of trainable layers in base model:', len(base_model.trainable_variables))

model.compile(
        loss='categorical_crossentropy',
        optimizer=tf.optimizers.Adam(learning_rate=LR) ,
        metrics=['accuracy']
)

history = model.fit_generator(
        train_generator,
        steps_per_epoch = len(train_generator),
        validation_data = test_generator, 
        validation_steps = len(test_generator),
        epochs = EPOCHS,
        callbacks = callbacks_list
)

#### Full unfreeze

In [None]:
LR = 0.0001
EPOCHS = 10


base_model.trainable = True


model.compile(
        loss='categorical_crossentropy',
        optimizer=tf.optimizers.Adam(learning_rate=LR) ,
        metrics=['accuracy']
)

history = model.fit_generator(
        train_generator,
        steps_per_epoch = len(train_generator),
        validation_data = test_generator, 
        validation_steps = len(test_generator),
        epochs = EPOCHS,
        callbacks = callbacks_list
)

#### Evaluation

In [None]:
scores = model.evaluate_generator(test_generator, steps=len(test_generator), verbose=1)
print("Accuracy: %.2f%%" % (scores[1]*100))

### Making a submission

In [None]:
test_sub_generator.samples

In [None]:
test_sub_generator.reset()

predictions = model.predict_generator(test_sub_generator, steps=len(test_sub_generator), verbose=1)
# TTA result was somehow worse, so predicting on a simple generator

predictions = np.argmax(predictions, axis=-1) #multiple categories
label_map = (train_generator.class_indices)
label_map = dict((v,k) for k,v in label_map.items()) #flip k,v
predictions = [label_map[k] for k in predictions]

In [None]:
filenames_with_dir=test_sub_generator.filenames
submission = pd.DataFrame({'Id':filenames_with_dir, 'Category':predictions}, columns=['Id', 'Category'])
submission['Id'] = submission['Id'].replace('test_upload/','')
submission.to_csv('submission_v5.csv', index=False)
print('Save submit')

In [None]:
submission.head()