In [None]:
import numpy as np
import pandas as pd
from PIL import Image
import os
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
from sklearn.utils import shuffle
from sklearn.utils import class_weight
from sklearn.preprocessing import minmax_scale
import random
import cv2
from imgaug import augmenters as iaa
import warnings
warnings.filterwarnings('ignore')

import tensorflow as tf
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Dense, Dropout, Activation, Input, BatchNormalization, GlobalAveragePooling2D,MaxPooling2D
from tensorflow.keras import layers
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping
from tensorflow.keras.experimental import CosineDecay
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.applications import EfficientNetB3
from tensorflow.keras.layers.experimental.preprocessing import RandomCrop,CenterCrop, RandomRotation

In [None]:
df = pd.read_csv("../input/fashionimageclassificationdataset/train_label.csv")

In [None]:
def dataset(data,label_num):
    for i in tqdm(data['label'],total=len(data)):
        new_data=data[data['label']==label_num]
        new_data=shuffle(new_data)
    return new_data

In [None]:
label0=dataset(df,0)
label1=dataset(df,1)
label2=dataset(df,2)
label3=dataset(df,3)
label4=dataset(df,4)
label5=dataset(df,5)
label6=dataset(df,6)
label7=dataset(df,7)
label8=dataset(df,8)
label9=dataset(df,9)
label10=dataset(df,10)

In [None]:
training_folder="../input/fashionimageclassificationdataset/train/"
data=pd.concat([label0,label1,label2,label3,label4,label5,label6,label7,label8,label9,label10],axis=0)
samples_df=pd.DataFrame(data,columns=['file_name','label'])
samples_df=shuffle(samples_df,random_state=42)
samples_df["filepath"] = training_folder+samples_df["file_name"]
samples_df.head()

In [None]:
#samples_df=pd.DataFrame(data,columns=['file_name','label'])
#samples_df = pd.read_csv("../input/fashionimageclassificationdataset/train_label.csv")
#from sklearn.utils import shuffle
#samples_df=shuffle(samples_df,random_state=42)
#samples_df["filepath"] = training_folder+samples_df["file_name"]
#samples_df.head()

In [None]:
samples_df.shape

In [None]:
training_percentage = 0.9
training_item_count = int(len(samples_df)*training_percentage)
validation_item_count = len(samples_df)-int(len(samples_df)*training_percentage)
training_df = samples_df[:training_item_count]
validation_df = samples_df[training_item_count:]

In [None]:
batch_size = 8
image_size = 480
input_shape = (480, 480, 3)
dropout_rate = 0.5
classes_to_predict = sorted(training_df.label.unique())

In [None]:
classes_to_predict

In [None]:
training_data = tf.data.Dataset.from_tensor_slices((training_df.filepath.values, training_df.label.values))
validation_data = tf.data.Dataset.from_tensor_slices((validation_df.filepath.values, validation_df.label.values))

In [None]:
def load_image_and_label_from_path(image_path, label):
    img = tf.io.read_file(image_path)
    img = tf.image.decode_jpeg(img, channels=3)
    img=tf.image.resize(img,(480,480))
    return img, label

AUTOTUNE = tf.data.experimental.AUTOTUNE

training_data = training_data.map(load_image_and_label_from_path, num_parallel_calls=AUTOTUNE)
validation_data = validation_data.map(load_image_and_label_from_path, num_parallel_calls=AUTOTUNE)

In [None]:
training_data_batches = training_data.shuffle(buffer_size=1000).batch(batch_size).prefetch(buffer_size=AUTOTUNE)
validation_data_batches = validation_data.shuffle(buffer_size=1000).batch(batch_size).prefetch(buffer_size=AUTOTUNE)

In [None]:
adapt_data = tf.data.Dataset.from_tensor_slices(training_df.filepath.values)
def adapt_mode(image_path):
    img = tf.io.read_file(image_path)
    img = tf.image.decode_jpeg(img, channels=3)
    img=tf.image.resize(img,(480,480))
    img = layers.experimental.preprocessing.Rescaling(1.0 / 255)(img)
    return img

adapt_data = adapt_data.map(adapt_mode, num_parallel_calls=AUTOTUNE)
adapt_data_batches = adapt_data.shuffle(buffer_size=1000).batch(batch_size).prefetch(buffer_size=AUTOTUNE)

In [None]:
data_augmentation_layers = tf.keras.Sequential(
    [
        layers.experimental.preprocessing.RandomCrop(height=image_size, width=image_size),
        layers.experimental.preprocessing.RandomFlip("horizontal_and_vertical"),
        layers.experimental.preprocessing.RandomRotation(0.25),
        layers.experimental.preprocessing.RandomZoom((-0.2, 0)),
        layers.experimental.preprocessing.RandomContrast((0.2,0.2))
    ]
)

In [None]:
image = Image.open("../input/fashionimageclassificationdataset/train/Image_000000.jpg")
plt.imshow(image)
plt.show()

In [None]:
image = tf.expand_dims(np.array(image), 0)

In [None]:
plt.figure(figsize=(10, 10))
for i in range(9):
    augmented_image = data_augmentation_layers(image)
    ax = plt.subplot(3, 3, i + 1)
    plt.imshow(augmented_image[0])
    plt.axis("off")

In [None]:
efficientnet = EfficientNetB3(weights="../input/effib3/efficientnetb3_notop.h5", 
                              include_top=False, 
                              input_shape=input_shape, 
                              drop_connect_rate=dropout_rate)

inputs = Input(shape=input_shape)
augmented = data_augmentation_layers(inputs)
efficientnet = efficientnet(augmented)
pooling = layers.GlobalAveragePooling2D()(efficientnet)
dropout = layers.Dropout(dropout_rate)(pooling)
outputs = Dense(len(classes_to_predict), activation="softmax")(dropout)
model = Model(inputs=inputs, outputs=outputs)
    
model.summary()

In [None]:
%%time
model.get_layer('efficientnetb3').get_layer('normalization').adapt(adapt_data_batches)

In [None]:
epochs = 40
decay_steps = int(round(len(training_df)/batch_size))*epochs
cosine_decay = CosineDecay(initial_learning_rate=1e-4, decay_steps=decay_steps, alpha=0.3)

callbacks = [ModelCheckpoint(filepath='best_model.h5', monitor='val_loss', save_best_only=True)]

model.compile(loss="sparse_categorical_crossentropy", optimizer=tf.keras.optimizers.Adam(cosine_decay), metrics=["accuracy"])

In [None]:
history = model.fit(training_data_batches,
                  epochs = epochs, 
                  validation_data=validation_data_batches,
                  callbacks=callbacks)

In [None]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Loss over epochs')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='best')
plt.show()

In [None]:
submission_csv=pd.read_csv("../input/fashionimageclassificationdataset/sample_submission.csv")
submission_csv.head()

In [None]:
submission_csv['filepath']="../input/fashionimageclassificationdataset/test/"+submission_csv['file_name']

In [None]:
submission_csv.head()

In [None]:
from tensorflow.keras.preprocessing import image
li=[]
for img_path in tqdm(submission_csv['filepath']):
    img=image.load_img(img_path,target_size=(300,300))
    images=image.img_to_array(img)
    images=np.expand_dims(images,axis=0)
    prediction=model.predict(images)
    li.append(np.argmax(prediction))

In [None]:
submission_csv['label']=li

In [None]:
submission_csv.head()

In [None]:
submission=submission_csv.drop('filepath',axis=1)

In [None]:
submission.to_csv('submission1.csv',index=False)