In [96]:
import os

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt

import tensorflow as tf

from tensorflow.keras.applications.efficientnet import EfficientNetB7, EfficientNetB1, preprocess_input

In [97]:
TRAIN = False

In [98]:
# CHANGE THIS TO YOUR LOCAL DIR.
data_dir = '/home/kaggle/data/train'
test_dir = '/home/kaggle/data/test/'
model_dir = 'model.h5'

CLASSES = sorted(os.listdir(data_dir))

epochs = 10
num_classes = 10
batch_size = 32
target_size_x = 244
target_size_y = 244
size = 96
channels = 3
input_shape = (target_size_x, target_size_y, channels)
seed = 111

In [99]:
train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(preprocessing_function=preprocess_input, 
                                                                validation_split=0.25,
                                                                rotation_range=2, 
                                                                horizontal_flip=True,
                                                                zoom_range=.1,
                                                                #rescale=1/255.
                                                                )


In [100]:
train_flow = train_datagen.flow_from_directory(
    directory=data_dir,
    subset="training",
    target_size=(target_size_x, target_size_y),
    color_mode="rgb",
    batch_size=batch_size,
    class_mode="categorical",
    shuffle=True,
    seed=seed
)

validation_flow = train_datagen.flow_from_directory(
    directory=data_dir,
    subset="validation",
    target_size=(target_size_x, target_size_y),
    color_mode="rgb",
    batch_size=batch_size,
    class_mode="categorical",
    shuffle=True,
    seed=seed,
)

Found 3750 images belonging to 10 classes.
Found 1250 images belonging to 10 classes.


## PREPARE TEST DATA

In [101]:
test_datagen = tf.keras.preprocessing.image.ImageDataGenerator()

test_flow = test_datagen.flow_from_directory(
    directory=test_dir,
    target_size=(target_size_x, target_size_y),
    color_mode="rgb",
    batch_size=1,
    class_mode=None,
    shuffle=False,
    seed=None,
)

Found 8000 images belonging to 1 classes.


In [102]:
from glob import glob
test_files = sorted(glob(test_dir+'/test/' + '*.bmp'))
test_idx = list(map(lambda x: x.split('/')[-1].split('.')[0], test_files))
#test_files

In [103]:
test_idx

['0',
 '1',
 '10',
 '100',
 '1000',
 '1001',
 '1002',
 '1003',
 '1004',
 '1005',
 '1006',
 '1007',
 '1008',
 '1009',
 '101',
 '1010',
 '1011',
 '1012',
 '1013',
 '1014',
 '1015',
 '1016',
 '1017',
 '1018',
 '1019',
 '102',
 '1020',
 '1021',
 '1022',
 '1023',
 '1024',
 '1025',
 '1026',
 '1027',
 '1028',
 '1029',
 '103',
 '1030',
 '1031',
 '1032',
 '1033',
 '1034',
 '1035',
 '1036',
 '1037',
 '1038',
 '1039',
 '104',
 '1040',
 '1041',
 '1042',
 '1043',
 '1044',
 '1045',
 '1046',
 '1047',
 '1048',
 '1049',
 '105',
 '1050',
 '1051',
 '1052',
 '1053',
 '1054',
 '1055',
 '1056',
 '1057',
 '1058',
 '1059',
 '106',
 '1060',
 '1061',
 '1062',
 '1063',
 '1064',
 '1065',
 '1066',
 '1067',
 '1068',
 '1069',
 '107',
 '1070',
 '1071',
 '1072',
 '1073',
 '1074',
 '1075',
 '1076',
 '1077',
 '1078',
 '1079',
 '108',
 '1080',
 '1081',
 '1082',
 '1083',
 '1084',
 '1085',
 '1086',
 '1087',
 '1088',
 '1089',
 '109',
 '1090',
 '1091',
 '1092',
 '1093',
 '1094',
 '1095',
 '1096',
 '1097',
 '1098',
 '1099',
 

In [104]:
effnet = EfficientNetB7(weights='imagenet', 
                        include_top=False, 
                        input_shape=input_shape, 
                        classes=num_classes)

In [105]:
from tensorflow.keras.models import Model

In [106]:
def bulit_model(base_model, input_shape=input_shape):

    inputs = tf.keras.layers.Input(shape=input_shape)
    
    x = base_model(inputs)
    x = tf.keras.layers.GlobalAveragePooling2D()(x)
    #x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.Dense(1024, activation='relu')(x) # we add dense layers so that the model can learn more complex functions and classify for better results.
    x = tf.keras.layers.Dropout(0.25)(x) # to avoid overfitting
    x = tf.keras.layers.Dense(512, activation='relu')(x) # dense layer 2
    #x = tf.keras.layers.BatchNormalization()(x)

    outputs = tf.keras.layers.Dense(num_classes, activation='softmax')(x)

    model = Model(inputs=inputs, outputs=outputs)

    model.layers[1].trainable = False

    #model.compile(optimizer= 'sgd', loss= 'categorical_crossentropy', metrics= ['accuracy'])

    return model

In [107]:
from tensorflow.keras.optimizers import Adam, SGD
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

# parameters for optimizers
lr = 1e-3

# Parameters for training
epochs = 25
#batch_size = 8

# parameters for callback functions
es_patience = 10
rlrop_patience = 5
decay_rate = 0.5

sgd = SGD(lr = lr, momentum = 0.9, nesterov = True)
#sgd = Adam(lr = lr,)
es = EarlyStopping(monitor = 'val_loss', mode = 'min', patience = es_patience, restore_best_weights = True, verbose = 1)
rlrop = ReduceLROnPlateau(monitor = 'val_loss', mode = 'min', patience = rlrop_patience, 
                        factor = decay_rate, min_lr = 1e-6, verbose = 1)

cp = tf.keras.callbacks.ModelCheckpoint(
        'bst_model.h5',
        monitor="val_loss",
        verbose=1,
        save_best_only=True,
        save_weights_only=False,
        mode="min",
        save_freq="epoch",
        options=None,
    )

In [108]:
model = bulit_model(effnet,)
model.summary()

Model: "model_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_8 (InputLayer)         [(None, 244, 244, 3)]     0         
_________________________________________________________________
efficientnetb7 (Functional)  (None, 8, 8, 2560)        64097687  
_________________________________________________________________
global_average_pooling2d_3 ( (None, 2560)              0         
_________________________________________________________________
dense_9 (Dense)              (None, 1024)              2622464   
_________________________________________________________________
dropout_3 (Dropout)          (None, 1024)              0         
_________________________________________________________________
dense_10 (Dense)             (None, 512)               524800    
_________________________________________________________________
dense_11 (Dense)             (None, 10)                5130

In [109]:
model.compile(optimizer= sgd, loss= 'categorical_crossentropy', metrics= ['accuracy'])

In [110]:
step_size_train = train_flow.n//train_flow.batch_size
step_size_valid = validation_flow.n//validation_flow.batch_size

step_size_train, step_size_valid

(117, 39)

In [111]:
#history = model.fit(train_images, train_labels, epochs=20, validation_data=(test_images, test_labels))

In [112]:
if TRAIN:
    history = model.fit(train_flow, 
                        steps_per_epoch=step_size_train, 
                        epochs=epochs, 
                        validation_data=validation_flow, 
                        validation_steps=step_size_valid,
                        callbacks=[es, rlrop, cp]
                        )
    model.save('all_epochs.h5')

In [113]:
model.load_weights(model_dir)
#model_eval = model.evaluate(validation_flow, verbose=1)

In [114]:
y_prob = model.predict(test_flow, )
y_prob.shape

(8000, 10)

In [115]:
test_idx

['0',
 '1',
 '10',
 '100',
 '1000',
 '1001',
 '1002',
 '1003',
 '1004',
 '1005',
 '1006',
 '1007',
 '1008',
 '1009',
 '101',
 '1010',
 '1011',
 '1012',
 '1013',
 '1014',
 '1015',
 '1016',
 '1017',
 '1018',
 '1019',
 '102',
 '1020',
 '1021',
 '1022',
 '1023',
 '1024',
 '1025',
 '1026',
 '1027',
 '1028',
 '1029',
 '103',
 '1030',
 '1031',
 '1032',
 '1033',
 '1034',
 '1035',
 '1036',
 '1037',
 '1038',
 '1039',
 '104',
 '1040',
 '1041',
 '1042',
 '1043',
 '1044',
 '1045',
 '1046',
 '1047',
 '1048',
 '1049',
 '105',
 '1050',
 '1051',
 '1052',
 '1053',
 '1054',
 '1055',
 '1056',
 '1057',
 '1058',
 '1059',
 '106',
 '1060',
 '1061',
 '1062',
 '1063',
 '1064',
 '1065',
 '1066',
 '1067',
 '1068',
 '1069',
 '107',
 '1070',
 '1071',
 '1072',
 '1073',
 '1074',
 '1075',
 '1076',
 '1077',
 '1078',
 '1079',
 '108',
 '1080',
 '1081',
 '1082',
 '1083',
 '1084',
 '1085',
 '1086',
 '1087',
 '1088',
 '1089',
 '109',
 '1090',
 '1091',
 '1092',
 '1093',
 '1094',
 '1095',
 '1096',
 '1097',
 '1098',
 '1099',
 

In [116]:
prob_df = pd.DataFrame(y_prob, index=test_idx, columns=CLASSES)

prob_df.head()

Unnamed: 0,airplane,bird,car,cat,deer,dog,horse,monkey,ship,truck
0,2.5e-05,2e-06,4e-06,6e-06,1.510693e-05,2.5e-05,0.9999169,2e-06,1e-06,3e-06
1,8e-06,5.5e-05,2e-06,1e-06,7.128413e-07,2e-05,3.830031e-07,0.999908,2e-06,3e-06
10,0.998698,0.000273,2.9e-05,2.8e-05,6.172017e-05,6.1e-05,0.0002592907,0.000279,0.000242,6.9e-05
100,0.000271,1.5e-05,6e-06,7e-06,1.339701e-06,5.8e-05,2.061961e-06,1.1e-05,0.999593,3.6e-05
1000,0.000305,0.001197,0.000142,0.001118,4.930392e-05,0.040457,0.0001172845,0.956234,0.00012,0.00026


In [117]:
y_pred = y_prob.argmax(1)
y_pred.shape

(8000,)

In [118]:
pred_df = pd.DataFrame(y_pred, index=test_idx, columns=['Predictions'])
pred_df.head()

Unnamed: 0,Predictions
0,6
1,7
10,0
100,8
1000,7


In [119]:
pred_df

Unnamed: 0,Predictions
0,6
1,7
10,0
100,8
1000,7
...,...
995,2
996,6
997,2
998,8


In [120]:
prob_df.to_csv('probability_submissions.csv', index=True, index_label=['FileName'])
pred_df.to_csv('predictions_submissions.csv', index=True, index_label=['FileName'])

## THANK YOU