In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
import tensorflow as tf
import keras
from keras import backend as K
from keras.layers import Input, Dense, Concatenate, Reshape, Dropout,Flatten,BatchNormalization
from keras.preprocessing.image import ImageDataGenerator
from keras.applications.resnet50 import ResNet50,preprocess_input
# from keras.applications.vgg19 import VGG19,preprocess_input
# from keras.applications.inception_resnet_v2 import InceptionResNetV2,preprocess_input

from keras.models import model_from_json
from keras.models import load_model,Model, Sequential
from keras.preprocessing import image
from keras.callbacks import ModelCheckpoint,CSVLogger,History,ReduceLROnPlateau,EarlyStopping,LearningRateScheduler
import math
 

np.random.seed(42)

In [15]:
import pandas as pd
import os
import shutil
from sklearn.model_selection import train_test_split
from tqdm import tqdm
# Home directory
import matplotlib.pyplot as plt
home_path ='C:/Users/sKuma/Desktop/Hackathon/snake breed/data'

# Create train and validation directories
train_path = os.path.join(home_path,'train_fold')
os.mkdir(train_path)
val_path = os.path.join(home_path,'val_fold')
os.mkdir(val_path)

# Original df
df=pd.read_csv('data/train.csv')
df['image']=df['image_id']+'.jpg'
df=df.rename({'breed':'breed_name'},axis=1)
df.to_csv('data/train.csv',index=False)
subclass=df['breed_name'].unique()

# Create sub-directories
for classs in subclass:
    emergency_train_path = os.path.join(home_path + '/train_fold',f'{classs}')
    os.mkdir(emergency_train_path)
    
# Create sub-directories
for classs in subclass:
    emergency_train_path = os.path.join(home_path + '/val_fold',f'{classs}')
    os.mkdir(emergency_train_path)




# Images and Labels
X = df['image']
y = df['breed_name']

# Train-Test splitfor train and validation images
train_x, val_x, train_y, val_y = train_test_split(X, y, test_size = 0.15, random_state = 42, stratify=y)
# train_x,train_y=X,y


# Train df
df_train = pd.DataFrame(columns=['image','breed_name'])
df_train['image'] = train_x
df_train['breed_name'] = train_y

# Validation df
df_valid= pd.DataFrame(columns=['image','breed_name'])
df_valid['image'] = val_x
df_valid['breed_name'] = val_y

# Reset index
df_train.reset_index(drop=True, inplace=True)
df_valid.reset_index(drop=True, inplace=True)

# Save train images
for i in tqdm(range(len(df_train))):
    image = df_train.loc[i,'image']
    for j in subclass:
        if df_train.loc[i,'breed_name'] == j:
            shutil.copy(home_path + r'/train/' + image, home_path+f"/train_fold/{j}")
        
# Save validation images
for i in tqdm(range(len(df_valid))):
    image = df_valid.loc[i,'image']
    for j in subclass:
        if df_valid.loc[i,'breed_name'] == j:
            shutil.copy(home_path + r'/train/' + image, home_path+f"/val_fold/{j}")

df_train.to_csv("data/train_fold.csv")
df_valid.to_csv("data/valid_fold.csv")

100%|██████████████████████████████████████████████████████████████████████████████| 4681/4681 [02:56<00:00, 26.48it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 827/827 [00:26<00:00, 30.73it/s]


In [32]:
train_datagen = ImageDataGenerator(preprocessing_function=preprocess_input,
                                   rotation_range=40,
                                   width_shift_range=0.2,
                                   height_shift_range=0.2,
                                   zoom_range=0.2,
                                   shear_range=0.2,
                                   horizontal_flip=True,
                                  )
valid_datagen = ImageDataGenerator(preprocessing_function=preprocess_input
                                  )
training_set = train_datagen.flow_from_directory('data/train_fold',
                                                 target_size = (128,128),
                                                 batch_size = 32,
                                                 class_mode = 'categorical')

valid_set = valid_datagen.flow_from_directory('data/val_fold',
                                              target_size = (128,128),
                                              batch_size = 32,
                                              class_mode = 'categorical')

Found 4681 images belonging to 35 classes.
Found 827 images belonging to 35 classes.


In [33]:
# pretrained_model=VGG19(weights='imagenet',
#                       input_shape=(128,128,3),
#                       include_top=False,
#                       pooling='max'
#                       )

pretrained_model=ResNet50(weights='imagenet',
                          input_shape=(128,128,3),
                          include_top=False,
                          pooling='max'
                         )

In [34]:
#149,
split_at=149
for layers in pretrained_model.layers[:split_at]: layers.trainable=False
for layers in pretrained_model.layers[split_at:]: layers.trainable=True
pretrained_model.summary()

Model: "resnet50"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 128, 128, 3)  0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, 134, 134, 3)  0           input_1[0][0]                    
__________________________________________________________________________________________________
conv1 (Conv2D)                  (None, 64, 64, 64)   9472        conv1_pad[0][0]                  
__________________________________________________________________________________________________
bn_conv1 (BatchNormalization)   (None, 64, 64, 64)   256         conv1[0][0]                      
___________________________________________________________________________________________

In [35]:
# for layers in pretrained_model.layers:
#     layers.trainable=False

model = Sequential()
# model.add(Flatten())
model.add(Dense(1024, activation='relu'))
model.add(Dropout(0.3))
model.add(BatchNormalization())
# model.add(Dense(512, activation='relu'))
# model.add(Dropout(0.15))
# model.add(BatchNormalization())
model.add(Dense(35,activation='softmax'))

pretrainedInput = pretrained_model.input
pretrainedOutput = pretrained_model.output
output = model(pretrainedOutput)
model = Model(pretrainedInput, output)

def scheduler(epoch):
    if epoch <10:
        return 0.00001
    else:
        return round(0.00001 * math.exp(0.15*(9-epoch)),7)


def get_f1(y_true, y_pred): #taken from old keras source code
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    recall = true_positives / (possible_positives + K.epsilon())
    f1_val = 2*(precision*recall)/(precision+recall+K.epsilon())
    return f1_val

# def scheduler(epoch):
#     if epoch <10:
#         return 0.0001
#     elif epoch<15:
#         return 0.0005
#     elif epoch< 22:
#         return 0.0001
#     elif epoch< 30:
#         return 0.00005
#     else:
#         return 0.00001



reduce = ReduceLROnPlateau(monitor='val_loss', factor=0.3, patience=5, mode='min')
early =  EarlyStopping(monitor='val_loss', min_delta=1e-4, patience=10, mode='min')

loss_history=History()
model.compile(optimizer = 'Adam', loss = 'categorical_crossentropy', metrics = ['accuracy',get_f1])

lr_schedular = LearningRateScheduler(scheduler)
cp1= ModelCheckpoint(filepath="finalswap/save_best.h5", monitor='val_loss',save_best_only=True,verbose=1,mode='min',save_weights_only=True)
cp2= ModelCheckpoint(filepath='finalswap/save_all.h5', monitor='loss',save_best_only=True ,verbose=2,save_weights_only=True,mode='min')



model_json = model.to_json()
with open("model/model.json", "w") as json_file:
    json_file.write(model_json)
print(model.summary())

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 128, 128, 3)  0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, 134, 134, 3)  0           input_1[0][0]                    
__________________________________________________________________________________________________
conv1 (Conv2D)                  (None, 64, 64, 64)   9472        conv1_pad[0][0]                  
__________________________________________________________________________________________________
bn_conv1 (BatchNormalization)   (None, 64, 64, 64)   256         conv1[0][0]                      
____________________________________________________________________________________________

In [94]:
# def scheduler(epoch):
#     if epoch <8:
#         return 0.001
#     else:
#         return round(0.001 * math.exp(0.15*(7-epoch)),6)
# ep=[]
# lr=[]
# for epoch in range(35):
#     ep.append(epoch)
#     lr.append(scheduler(epoch))
#     print(str(epoch)+'  '+str(scheduler(epoch)))
# sns.lineplot(ep,lr);

In [36]:
# K.set_value(model.optimizer.learning_rate, 0.001)
callbacks_list = [lr_schedular,cp1,cp2,early,reduce,loss_history]
model_history=model.fit_generator(generator=training_set,
                                  validation_data=valid_set,
                                  epochs=1,callbacks=callbacks_list)

Epoch 1/1
 13/147 [=>............................] - ETA: 11:34 - loss: 4.4400 - accuracy: 0.0168 - get_f1: 0.0000e+00

KeyboardInterrupt: 

In [98]:
# json_file = open('model/model.json', 'r')
# loaded_model_json = json_file.read()
# json_file.close()
# loaded_model = model_from_json(loaded_model_json)

In [99]:
# loaded_model.load_weights('finalswap/save_best.h5')

In [100]:
# loaded_model.compile(optimizer = 'Adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])

In [102]:
# K.set_value(loaded_model.optimizer.learning_rate, 0.0001)
# callbacks_list = [cp1,cp2,early,reduce,loss_history]
# model_history=loaded_model.fit_generator(generator=training_set,
#                    epochs=5,callbacks=callbacks_list)

In [8]:
# # lr=loaded_model.optimizer.learning_rate.numpy()
# # def scheduler(epoch,lr):
# #     if epoch<1:
# #         return lr
# #     return round(lr * math.exp(-0.02*epoch),6)
# lr_schedular=keras.callbacks.LearningRateScheduler(scheduler)
# callbacks_list = [cp1,cp2,early,reduce,early]
# K.set_value(model.optimizer.learning_rate, 0.00011)
# print("Learning rate before second fit:", model.optimizer.learning_rate.numpy())

# loaded_model.fit_generator(generator=training_set,
#                    epochs=10,callbacks=callbacks_list)

In [101]:
df=pd.read_csv('data/train.csv')
df['breed']=df['breed_name']
df['breed']=df['breed'].map(training_set.class_indices)
df.to_csv('data/train.csv',index=False)

# dic={}
# for i in df['breed'].unique():
#     if i in dic:
#         pass
#     else:
#         dic[i]=df[df['breed']==i]['breed_name'].unique()[0]


result=pd.DataFrame()
result['image_id']=0
filename=pd.read_csv('data/test.csv')

for file in filename.values:
    test_image = image.load_img('data/test/'+ file[0] + '.jpg', target_size = (128,128))
    test_image = image.img_to_array(test_image)
    test_image = np.expand_dims(test_image, axis = 0)
    test_image = preprocess_input(test_image)
    output=loaded_model.predict(test_image)
    classs=output.argmax()
    
    result.loc[file[0]]=df.loc[df['breed']==classs,'breed_name'].unique()[0]

a=pd.read_csv('data/test.csv').image_id
pd.Series(data=result.image_id.values,index=a,name='breed').to_csv('result/rough.csv')