In [None]:
!pip install git+https://github.com/qubvel/efficientnet

In [None]:
from efficientnet.keras import EfficientNetB7

In [None]:
from efficientnet.keras import EfficientNetB3

In [None]:
import tensorflow as tf
import zipfile
import pandas as pd
import keras
import os
import cv2
from keras.preprocessing.image import ImageDataGenerator
from keras.layers.convolutional import *
from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten, Dropout, BatchNormalization
from keras.layers import Conv2D, MaxPooling2D
from keras import regularizers, optimizers
from tensorflow.keras import datasets, layers, models
import matplotlib.pyplot as plt

In [None]:
train_path = '../input/shopee-code-league-2020-product-detection/resized/train/'
test_path = '../input/shopee-code-league-2020-product-detection/resized/test/'

In [None]:
df = pd.read_csv("../input/shopee-code-league-2020-product-detection/train.csv")
df = df.sample(frac=1)
df.head()

In [None]:
test_df = pd.read_csv("../input/shopee-code-league-2020-product-detection/test.csv")
test_df.head()

In [None]:
df['category'] = df['category'].astype(int)
df['category'] = df['category'].apply(lambda x: "{:02d}".format(x)).astype(str)
cats = df['category']

In [None]:
df['combined_filename'] = df['category'].map(lambda x: x + "/").astype(str) + df['filename']
df.head()

In [None]:
mod_df = df.drop(columns = ["filename"])
mod_df.head()

In [None]:
mod_df = mod_df.sample(frac=1)
mod_df.head()

In [None]:
for cat in cats:
  path = os.path.join(train_path, cat)
  for img in os.listdir(path):
    img_array = cv2.imread(os.path.join(path, img))
    plt.imshow(img_array, cmap='gray')
    plt.show()
    break
  break

In [None]:
new_array = cv2.resize(img_array, (256,256))
plt.imshow(new_array)
plt.show()

In [None]:
datagen=ImageDataGenerator(rescale=1./255.,validation_split=0.15)
IMG_SIZE = 300

In [None]:
train_generator=datagen.flow_from_dataframe(
dataframe=mod_df,
directory=train_path,
x_col="combined_filename",
y_col="category",
subset="training",
batch_size=32,
seed=42,
shuffle=True,
class_mode="categorical",
target_size=(IMG_SIZE,IMG_SIZE))

In [None]:
valid_generator=datagen.flow_from_dataframe(
dataframe=mod_df,
directory=train_path,
x_col="combined_filename",
y_col="category",
subset="validation",
batch_size=32,
seed=40,
shuffle=True,
class_mode="categorical",
target_size=(IMG_SIZE,IMG_SIZE))

In [None]:
test_datagen=ImageDataGenerator(rescale=1./255.)
test_generator=test_datagen.flow_from_dataframe(
dataframe=test_df,
directory=test_path,
x_col="filename",
y_col=None,
batch_size=32,
seed=42,
shuffle=False,
class_mode=None,
target_size=(IMG_SIZE,IMG_SIZE))

In [None]:
IMG_SHAPE = (IMG_SIZE, IMG_SIZE, 3)
# Create the base model from the pre-trained model EfficientNetB7
base_model = EfficientNetB7(input_shape=IMG_SHAPE,
                           include_top=False,
                            weights='imagenet')

In [None]:
# Freeze the base
base_model.trainable = False

In [None]:
global_average_layer = tf.keras.layers.GlobalAveragePooling2D()

In [None]:
prediction_layer = keras.layers.Dense(42)

In [None]:
from keras.optimizers import Adam
new_model = tf.keras.Sequential([
  base_model,
  global_average_layer,
  prediction_layer
])

In [None]:
new_model.compile(Adam(lr=3e-4),
              loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

In [None]:
STEP_SIZE_TRAIN=train_generator.n//train_generator.batch_size + 1
STEP_SIZE_VALID=valid_generator.n//valid_generator.batch_size + 1
STEP_SIZE_TEST=test_generator.n//test_generator.batch_size + 1
new_model.fit_generator(generator=train_generator,
                    steps_per_epoch=STEP_SIZE_TRAIN,
                    validation_data=valid_generator,
                    validation_steps=STEP_SIZE_VALID,
                    epochs=4
)

In [None]:
new_model.save("256x256_efficientnet_4epochs_0.15valid")

In [None]:
ls

In [None]:
base_model.trainable = True

In [None]:
# Let's take a look to see how many layers are in the base model
print("Number of layers in the base model: ", len(base_model.layers))

# Fine-tune from this layer onwards
fine_tune_at = 600

# Freeze all the layers before the `fine_tune_at` layer
for layer in base_model.layers[:fine_tune_at]:
  layer.trainable =  False

In [None]:
new_model.summary()

In [None]:
new_model.compile(Adam(lr=5e-5),
              loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

In [None]:
new_model.summary()

In [None]:
mod_df = mod_df.sample(frac=1)

In [None]:
train_generator=datagen.flow_from_dataframe(
dataframe=mod_df,
directory=train_path,
x_col="combined_filename",
y_col="category",
subset="training",
batch_size=32,
seed=42,
shuffle=True,
class_mode="categorical",
target_size=(IMG_SIZE,IMG_SIZE))

In [None]:
valid_generator=datagen.flow_from_dataframe(
dataframe=mod_df,
directory=train_path,
x_col="combined_filename",
y_col="category",
subset="validation",
batch_size=32,
seed=40,
shuffle=True,
class_mode="categorical",
target_size=(IMG_SIZE,IMG_SIZE))

In [None]:
new_model.fit_generator(generator=train_generator,
                    steps_per_epoch=STEP_SIZE_TRAIN,
                    validation_data=valid_generator,
                    validation_steps=STEP_SIZE_VALID,
                    epochs=4
)

In [None]:
test_generator.reset()
pred=new_model.predict_generator(test_generator,
steps=STEP_SIZE_TEST,
verbose=1)

In [None]:
import numpy as np
predicted_class_indices=np.argmax(pred,axis=1)

In [None]:
labels = (train_generator.class_indices)
labels = dict((v,k) for k,v in labels.items())
predictions = [labels[k] for k in predicted_class_indices]

In [None]:
res = test_df.drop(columns='category')
res['category'] = pd.DataFrame(predictions).astype(int)

In [None]:
res.head()

In [None]:
res.to_csv("256x256_EfficientNet_0.15validation.csv", index=False)

In [None]:
ls

In [None]:
final_model = new_model

In [None]:
final_model.save("256x256EfficientNetB7")

In [None]:
final_model.save_weights("256x256_8epochs_0.15valid_weights")

In [None]:
ls

In [None]:
corr_df = df
corr_df = corr_df.sample(frac=1)
corr_df

In [None]:
corr_datagen=ImageDataGenerator(rescale=1./255.)
corr_generator=corr_datagen.flow_from_dataframe(
dataframe=corr_df,
directory=train_path,
x_col="combined_filename",
y_col=None,
batch_size=32,
seed=42,
shuffle=False,
class_mode=None,
target_size=(IMG_SIZE,IMG_SIZE))

In [None]:
STEP_SIZE_CORR = corr_generator.n//corr_generator.batch_size + 1
corr_generator.reset()
corr_pred=new_model.predict_generator(corr_generator,
steps=STEP_SIZE_CORR,
verbose=1)

In [None]:
corr_predicted_class_indices=np.argmax(corr_pred,axis=1)

In [None]:
corr_labels = (train_generator.class_indices)
corr_labels = dict((v,k) for k,v in corr_labels.items())
corr_predictions = [labels[k] for k in corr_predicted_class_indices]

In [None]:
print(len(corr_predictions))

In [None]:
df['category'].value_counts()

In [None]:
corr_df['category'] = pd.DataFrame(corr_predictions).astype(int)
corr_df['category'].value_counts()

In [None]:
corr_df.drop(columns=['category'])
corr_df['category'] = pd.DataFrame(corr_predictions).astype(int)
corr_df

In [None]:

corr_df.to_csv("corr.csv", index=False)

In [None]:
ls