In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import matplotlib.style as style
from PIL import Image
import seaborn as sns

np.random.seed(115)
from multiprocessing import cpu_count
nCores = cpu_count()

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
!pip install -U efficientnet


In [None]:
import tensorflow as tf
import cv2
from tqdm.notebook import tqdm
import tensorflow as tf
print(tf.__version__)

# import our model, different layers and activation function 
from tensorflow.keras.callbacks import CSVLogger, TensorBoard, ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
import logging
import warnings
import gc

from tensorflow.keras import regularizers
from tensorflow.keras.layers import Conv2D,MaxPool2D,Activation,GlobalAveragePooling2D,BatchNormalization,Dropout,MaxPooling2D
from tensorflow.keras.layers import Flatten,Dense,Dropout,GlobalAveragePooling2D
from tensorflow.keras import Sequential
from tensorflow.keras.applications.xception import preprocess_input

tf.random.set_seed(115)
warnings.filterwarnings('ignore')
logging.getLogger('tensorflow').setLevel(logging.INFO)
import numpy as np
# from tensorflow.keras.mixed_precision import experimental as mixed_precision
import tensorflow_addons as tfa
import efficientnet.tfkeras as efn

# from tensorflow.keras.mixed_precision import experimental as mixed_precision
# policy = mixed_precision.Policy('float32')
# mixed_precision.set_policy(policy)


In [None]:
root_path = '../input/shopee-product-detection-open'
train_folder_path = os.path.join(root_path,'train/train/train')
print(train_folder_path)


In [None]:
train = pd.read_csv(os.path.join(root_path,'train.csv'))
print(train.info())


In [None]:
working_path = os.getcwd()

In [None]:
from sklearn.model_selection import train_test_split
full_train = True
if not full_train: 
    _ ,dataset = train_test_split(train,test_size=0.05,random_state=45,stratify=train['category'])
else:    
    dataset = train
#delete when no longer needed
del train
#collect residual garbage
gc.collect()

print(dataset.info())
print(dataset.head(5))

In [None]:
# CountStatus = dataset['category'].value_counts()
# CountStatus.plot(figsize=(10,10));
# CountStatus.plot.barh()

In [None]:
from sklearn.utils import class_weight

class_weights = class_weight.compute_class_weight('balanced',
                                                  np.unique(dataset['category']),
                                                  dataset['category'])

# Convert class_weights to a dictionary to pass it to class_weight in model.fit
class_weights = dict(enumerate(class_weights))
print(class_weights)

In [None]:
dataset['filename'] = dataset.apply(lambda x : os.path.join(os.path.join(train_folder_path,str(x.category).zfill(2)),x.filename) ,axis=1)
dataset.head(5)

In [None]:
# import matplotlib.pyplot as plt
# image_path = dataset.sample(1)['filename']
# print(image_path.iloc[0])
# img = cv2.imread(image_path.iloc[0])
# plt.imshow(img)

In [None]:
train_df ,test_df = train_test_split(dataset,test_size=0.1,random_state=45,stratify=dataset['category'])
#delete when no longer needed
del dataset
#collect residual garbage
gc.collect()


In [None]:
def dataset_from_dataframe(df):
    ds = tf.data.Dataset.from_tensor_slices((df['filename'],df['category']))
    ds = ds.shuffle(buffer_size=len(df))
    return ds
train_ds = dataset_from_dataframe(train_df)
val_ds = dataset_from_dataframe(test_df)

In [None]:
CHANNELS = 3
# BATCH_SIZE = 48

# Configuration
BATCH_SIZE = 64

AUTOTUNE = tf.data.experimental.AUTOTUNE
MAX_EPOCHS = 10
LR = 1e-5
img_size= 300
buffer_size = 2048
NUM_CLASSES = 42
STEPS_PER_TRAIN_EPOCH = tf.math.ceil(train_df.shape[0]/BATCH_SIZE)
STEPS_PER_TEST_EPOCH = tf.math.ceil(test_df.shape[0]/BATCH_SIZE)


In [None]:
import random
def preprocess(path):
    image = tf.io.read_file(path)
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.resize(image, [img_size,img_size])
    # Use `convert_image_dtype` to convert to floats in the [0,1] range.
    image = tf.image.convert_image_dtype(image, tf.float32)    
    image = (image*2) - 1  # normalize to [-1,1] range
    image = tf.image.per_image_standardization(image)
    return image


def random_brightness(image):
    return tf.image.random_brightness(image, .4)

def random_contrast(image):
    return tf.image.random_contrast(image, .3,1.7)

def random_flip_left_right(image):
    return tf.image.random_flip_left_right(image)

def random_crop(image):
    image = tf.image.resize_with_crop_or_pad(image, img_size+28, img_size+28) # Add 6 pixels of padding
    image = tf.image.random_crop(image,[img_size,img_size,3])
    return image

def random_hue(image):
    return tf.image.random_hue(image, 0.05)
def random_flip_up_down(image):
    return tf.image.random_flip_up_down(image)
def random_sataration(image):
    return tf.image.random_saturation(image, 0.6, 1.6)


augs_color = [random_brightness, random_contrast, random_hue,random_sataration]
augs_str = [random_flip_left_right, random_flip_up_down, random_crop]

def augmentation(image, label):
    augs = augs_color + augs_str
    random.shuffle(augs)
    k = random.randint(1,len(augs))
    augs = random.choices(augs, k=4)
    for i in augs:
        image = i(image) 
    return image, label

def load_and_preprocess_from_path_and_label(path,label):
    return preprocess(path), label

def prepare_for_training(ds, cache=True,shuffle_buffer_size=100,augment=False):
    if cache:
        if isinstance(cache,str):
            ds = ds.cache(cache)
        else: 
            ds = ds.cache()
    if shuffle_buffer_size > 0:
        ds = ds.shuffle(buffer_size=shuffle_buffer_size)    
    # repeat forever
    ds = ds.repeat()
    if augment:
        ds.map(augmentation, num_parallel_calls=AUTOTUNE)
    ds = ds.batch(BATCH_SIZE)
    
    # `prefetch` lets the dataset fetch batches in the background while the model
    # is training.
    ds = ds.prefetch(buffer_size=AUTOTUNE)
    return ds



In [None]:
train_ds = train_ds.map(lambda path,label: load_and_preprocess_from_path_and_label(path,label),
                       num_parallel_calls=AUTOTUNE)
val_ds = val_ds.map(lambda path,label: load_and_preprocess_from_path_and_label(path,label),                      
                       num_parallel_calls=AUTOTUNE)

In [None]:
train_ds = prepare_for_training(train_ds,shuffle_buffer_size=buffer_size,augment=True,cache=False)
val_ds = prepare_for_training(val_ds,shuffle_buffer_size=buffer_size,cache=False)


In [None]:
def create_model(load_from_path=None,hparams=None):
    if load_from_path == None:
        
        base_model =  efn.EfficientNetB3(weights='noisy-student', 
                                         include_top=False, 
                                         pooling='max', 
                                         classes=NUM_CLASSES,
                                         input_shape=(img_size,img_size, 3))
        base_model.trainable= True
        
        print("Number of layers in the base model:", len(base_model.layers))
        fine_tune_at = int(0.9 * len(base_model.layers))

        for layer in base_model.layers[:fine_tune_at]:
            layer.trainable = False

        model = tf.keras.Sequential([
          base_model,
          BatchNormalization(),
          Dropout(0.25),
          Dense(units=512, activation='selu',kernel_initializer='lecun_normal',kernel_regularizer=regularizers.l2(1e-2)),
          Dropout(0.5),
          Dense(units=512, activation='selu',kernel_initializer='lecun_normal',kernel_regularizer=regularizers.l2(1e-2),),
          Dropout(0.5),
#           Dense(units=256, activation='selu',kernel_initializer='lecun_normal',kernel_regularizer=regularizers.l2(1e-3),),
#           Dropout(0.2),
          Dense(NUM_CLASSES,activation='softmax')
        ])
        model.compile(optimizer='nadam',
                      loss='sparse_categorical_crossentropy',
                      metrics=["accuracy"])
    else:
        model = tf.keras.models.load_model(load_from_path)
    return model


In [None]:
model = create_model('../input/model-efnb3-noisy-3drop-2selu-ndam/model_efnb3_noisy_3drop_2selu_ndam.h5')
model.summary()

In [None]:
model.load_weights('training/cp.ckpt')

In [None]:
earlystop = tf.keras.callbacks.EarlyStopping(monitor='val_loss',
                                             mode='min', patience=10,
                                             verbose=1) # Create EarlyStopping Callback
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', 
                                                 factor=0.25,
                                                 patience=1, 
                                                 min_lr=1e-13)


checkpoint_path = "training/cp.ckpt"

# Create a callback that saves the model's weights
# by default it saves the weights every epoch
cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
                                                 save_best_only=True,
                                                 save_weights_only=True,
                                                 verbose=1)

history = model.fit(train_ds, epochs=MAX_EPOCHS,     
                    validation_steps=STEPS_PER_TEST_EPOCH,
                    steps_per_epoch=STEPS_PER_TRAIN_EPOCH,
                    validation_data=val_ds,
                    callbacks=[cp_callback,reduce_lr,earlystop], # Add callback to training process
                    class_weight=class_weights,
                    verbose=1)

In [None]:
model.save(os.path.join(working_path,'model_efnb3_noisy_3drop_2selu_ndam.h5'))

In [None]:
test_folder_path = os.path.join(root_path,'test/test/test')
print(test_folder_path)


In [None]:
test = pd.read_csv(os.path.join(root_path,'test.csv'))
print(test.info())

In [None]:
test['filename'] = dd.from_pandas(test,npartitions=nCores).\
   map_partitions(
      lambda df : df.apply(
         lambda x : os.path.join(test_folder_path,x.filename) ,axis=1)).\
   compute(scheduler='processes')
test.head(5)

In [None]:
test_ds = dataset_from_dataframe(test)
test_ds = test_ds.map(lambda path,label: preprocess(path),                      
                       num_parallel_calls=AUTOTUNE)
test_ds = test_ds.batch(BATCH_SIZE).prefetch(buffer_size=AUTOTUNE)

In [None]:
res = model.predict(test_ds, batch_size=BATCH_SIZE,verbose=1)


In [None]:
print(test.shape[0])
print(res.shape[0])
max_res = np.argmax(res, axis=1)
print(max_res.shape[0])
print(max_res)

In [None]:
output = pd.read_csv(os.path.join(root_path,'test.csv'))
print(output.shape[0])
output['category'] = max_res
output['category'] = output.category.apply(lambda c: str(c).zfill(2))
path = os.path.join(working_path,'submission.csv')
print(output.info())
print(path)
output.to_csv(path, index = False)