In [None]:
from google.colab import drive
drive.mount('/content/drive')

내가 만들어 보기

In [None]:
import pandas as pd
import numpy as np

# handle os specific
import os

# randomization
import random

# visualization imports 
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.image import imread
%matplotlib inline

# consistent plot size
from pylab import rcParams
rcParams['figure.figsize'] = 17,7
rcParams['axes.labelsize'] = 14
rcParams['xtick.labelsize'] = 8
rcParams['ytick.labelsize'] = 8
rcParams['axes.titlesize'] = 16
 
# ignore deprecated and future warnings
import warnings
warnings.filterwarnings(action='ignore',category=DeprecationWarning)
warnings.filterwarnings(action='ignore',category=FutureWarning)



In [None]:
# check the root dir
os.listdir('/content/drive/MyDrive/')

In [None]:
# set the path
ROOT_DIR = '/content/drive/MyDrive/'
FILE_PATH = os.path.join(ROOT_DIR,'Term_Dataset')
FILE_PATH1 = os.path.join(ROOT_DIR,'Term_Test')

In [None]:
# LIST DIRCTORIES INSIDE THE FILE PATH
os.listdir(FILE_PATH)

In [None]:
# SET THE TRAINING AND THE TEST PATH
TRAIN_IMAGES = os.path.join(FILE_PATH,'train')
VAL_IMAGES = os.path.join(FILE_PATH,'val')
TEST_IMAGES = os.path.join(FILE_PATH1,'test')

In [None]:
len(os.listdir(TRAIN_IMAGES))

In [None]:
len(os.listdir(VAL_IMAGES))

In [None]:
len(os.listdir(TEST_IMAGES))

In [None]:
# READ THE TRAIN csv FILE CONTAINING THE SPORTS LABEL
train_df = pd.read_csv(os.path.join(FILE_PATH,'train_data.csv'))
train_df.head(10)

In [None]:
test_df = pd.read_csv(os.path.join(FILE_PATH1,'test_data_emp.csv'))
test_df.tail()

In [None]:
len(train_df['class'].unique())

In [None]:
# Extract the unique labels
sports_labels = train_df['class'].unique()
# LIST THE SPORTS NAME INCLUDED IN THE DATASET
sports_labels

In [None]:
# CHECK ONE OF THE IMAGES -- > LETS PICK THE FIRST IN THE LIST
plt.imshow(imread(os.path.join(TRAIN_IMAGES, 'img00008.png')));

In [None]:
# CHECK THE CLASS NAME OF THE DISPLAYED SPORT
train_df['class'][499]

In [None]:
# IMAGE COUNT PER SPORT
train_df['class'].value_counts()

In [None]:
# VISUALIZE IMAGE COUNT PER SPORT
sns.countplot(train_df['class'],palette='viridis')
plt.title('Images per type',)
plt.ylabel('Number of images')
plt.xlabel('Tools Name')
plt.tight_layout()

In [None]:
# PICK A RANDOM SPORTS
x = random.randint(0,len(train_df['class'].unique()))
random_sport = sports_labels[x]

sports_show = train_df[train_df['class']==random_sport]
print(f'The selected sport to display various images is {random_sport.upper()}')
sports_show.head()

In [None]:
# DISPLAY THE SPORTS IMAGES --- > NOTICE THE VARIATION IN THE IMAGES OF THE SAME SPORT
n_rows = 3
n_cols = 4

for row in range(n_rows):
    for col in range(n_cols):
        index = n_cols * row + col
        plt.subplot(n_rows,n_cols,index+1)
        # PICK RANDOM IMAGES OF THE SELECTED SPORT
        sport_img = random.randint(0,len(sports_show))
        image_ = plt.imread(os.path.join(TRAIN_IMAGES,sports_show.iloc[sport_img]['file_name']))
        # DISPLAY THE IMAGE
        plt.imshow(image_,cmap='binary',interpolation='nearest')
        #plt.axis('off')
        rcParams['axes.titlesize']= 12
        #plt.title(random_sport)  

In [None]:
IMAGE_SHAPE = (224,224,3)

In [None]:
# IMPORT THE REQUIRED KERAS LIBRARIES FOR IMAGE AUGMENTATION
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [None]:
#from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.applications.xception import preprocess_input

In [None]:
image_generator = ImageDataGenerator(    
    featurewise_center=False,
    samplewise_center=False,
    featurewise_std_normalization=False,
    samplewise_std_normalization=False,
    zca_whitening=False,
    zca_epsilon=1e-06,
    rotation_range=0.1,
    width_shift_range=0.1,
    height_shift_range=0.1,
    brightness_range=None,
    shear_range=0.5,
    zoom_range=[0.5,1.8],
    channel_shift_range=0.0,
    fill_mode='nearest',
    cval=0.0,
    horizontal_flip=True,
    vertical_flip=False,
    rescale=1./255,
    preprocessing_function=preprocess_input,
    data_format=None,
    validation_split=0.2,
    dtype= 'float32'
)

In [None]:
# DISPLAY THE ORIGINAL AND THE GENERATED IMAGES
image = imread(os.path.join(TRAIN_IMAGES,'img00011.png'))
plt.imshow(image)

In [None]:
gen_image = image_generator.random_transform(image)
plt.imshow(gen_image)

In [None]:
# DEFINE THE BATCH SIZE - --- > THIS IS USED AS AN INPUT WHILE FITTING THE MODEL USING GENERATORS
BATCH_SIZE = 8 

In [None]:
# CREATE THE TRAINING GENERATOR 
train_generator = image_generator.flow_from_dataframe(dataframe=train_df,
                                                      directory=TRAIN_IMAGES,
                                                      x_col='file_name',
                                                      y_col='class',
                                                      subset='training',
                                                      color_mode='rgb',
                                                      batch_size=BATCH_SIZE,
                                                      seed=42,
                                                      shuffle=True,
                                                      class_mode='categorical',
                                                      target_size=(224,224))

train_generator

In [None]:
# CREATE THE VALIDATION GENERATOR 
validation_generator = image_generator.flow_from_dataframe(dataframe=test_df,
                                                           directory=VAL_IMAGES,
                                                           x_col='file_name',
                                                           y_col='class',
                                                           subset='validation',
                                                           color_mode='rgb',
                                                           batch_size=BATCH_SIZE, 
                                                           seed=42,
                                                           shuffle=False, 
                                                           class_mode='categorical',
                                                           target_size=(224,224))

validation_generator

In [None]:
# TEST GENERATOR ... NO SHUFFLE & CLASS MODE SET TO NONE
test_generator = image_generator.flow_from_dataframe(dataframe=test_df,
                                                     directory=TEST_IMAGES,
                                                     x_col='file_name',
                                                     y_col=None,
                                                     batch_size=BATCH_SIZE,
                                                     color_mode='rgb',
                                                     seed=42,
                                                     shuffle=False, 
                                                     class_mode=None,
                                                     target_size=(224,224))

test_generator

In [None]:
## IMPORT THE LIBRARIES

from tensorflow import keras
from tensorflow.keras.utils import Sequence
from tensorflow.keras.utils  import to_categorical
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.optimizers import SGD,Adam
from tensorflow.keras.layers import Dense,Dropout,Conv2D,MaxPool2D,AvgPool2D,GlobalMaxPool2D,Flatten,MaxPooling2D
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.applications.resnet50 import ResNet50
from tensorflow.keras.applications.xception import Xception
from tensorflow.keras.callbacks import EarlyStopping,ReduceLROnPlateau
from tensorflow.keras.callbacks import CSVLogger

In [None]:
trial = 4

In [None]:
if trial==1: # Basic Model
    ## DEFINE THE BASIC MODEL
    model =  Sequential()

    # ADD CONVOLUTIONAL LAYERS and MaxPooling Layer -- > Typical CNN Model
    model.add(Conv2D(filters=32,kernel_size=(7,7),input_shape=IMAGE_SHAPE,strides=1,padding='same',activation='relu'))
    model.add(MaxPool2D(pool_size=(2,2)))
        
    model.add(Conv2D(filters=64,kernel_size=(3,3),strides=1,padding='same',activation='relu'))
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(Conv2D(filters=128,kernel_size=(3,3),strides=1,padding='same',activation='relu'))
    model.add(MaxPooling2D(pool_size=(2,2)))
        
    model.add(Conv2D(filters=256,kernel_size=(3,3),strides=1,padding='same',activation='relu'))
    model.add(MaxPooling2D(pool_size=(2,2)))
        
    # ADD DENSE LAYERS
    model.add(Flatten())
    model.add(Dense(units=512,activation='relu'))
    model.add(Dense(units=128,activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(units=11,activation='softmax'))
    
    
    # COMPILE THE BASIC MODEL 
    model.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'])  
    # PRINT MODEL SUMMARY
    print('BASIC MODEL')
    model.summary()
    
elif trial==2: # Batch Normalization plus selu actiovation and lecun-normal kernel initializer
    model = Sequential()
    
    # Add the CNN layers 
    model.add(Conv2D(filters=32,input_shape=IMAGE_SHAPE,padding='same',kernel_size=(3,3),activation='relu'))
    model.add(MaxPooling2D(2,2))
    
    model.add(Conv2D(filters=64,padding='same',kernel_size=(3,3),activation='relu'))
    model.add(MaxPooling2D(2,2))
    model.add(Conv2D(filters=128,padding='same',kernel_size=(3,3),activation='relu'))
    model.add(MaxPooling2D(2,2))
    
    # ADD THE DNN LAYERS
    model.add(Flatten()) # 여기가 뭐지???
    model.add(BatchNormalization())
    
    model.add(Dense(units=256,activation='selu',kernel_initializer='lecun_normal',use_bias=False))
    model.add(BatchNormalization())
    
    model.add(Dense(units=128,activation='selu',kernel_initializer='lecun_normal',use_bias=False))
    model.add(BatchNormalization())
    
    model.add(Dense(units=64,activation='selu',kernel_initializer='lecun_normal',use_bias=False))
    model.add(BatchNormalization())
    model.add(Dense(units=32,activation='selu',kernel_initializer='lecun_normal',use_bias=False))
    model.add(BatchNormalization())
    
    model.add(Dense(units=11,activation='softmax'))
    
    # COMPILE THE MODEL
    model.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'])
    
    print('Model with Batch Normalization')
    model.summary()
    
elif trial==3:    # VGG16 Pre Trained Model - SGD as well as Adam -- > Adam performs faster and returns better accuracy
    base_model = VGG16(include_top=False,input_shape=(224,224,3),weights='imagenet')
    # mark loaded layers as not trainable
    for layer in base_model.layers:
        layer.trainable = False
    # add new classification layers
    flat1 = Flatten()(base_model.layers[-1].output)
    class1 = Dense(512,activation='relu',kernel_initializer='he_normal')(flat1)
    class2 = Dense(256,activation='relu',kernel_initializer='he_normal')(class1)
    class3 = Dense(128,activation='relu',kernel_initializer='he_normal')(class2)
    output = Dense(11,activation='softmax')(class3)
    # define new model
    model = Model(inputs=base_model.inputs,outputs=output)
    #compile the model
    # opt = SGD(lr=0.001,momentum=0.9,nesterov=True)
    opt = Adam(lr=0.00001)
    model.compile(optimizer=opt,loss='categorical_crossentropy',metrics='accuracy')
    print('Transfer Learning based on VGG16')
    model.summary()
    
else:
    base_model = Xception(include_top=False,input_shape=(224,224,3),weights='imagenet')
    # mark loaded layers as not trainable
    for layer in base_model.layers:
        layer.trainable = False
    # add new classification layers
    #avg = keras.layers.GlobalAveragePooling2D()(base_model.output)
    flat1 = Flatten()(base_model.layers[-1].output)
    class1 = Dense(512,activation='relu',kernel_initializer='he_normal')(flat1)
    class2 = Dense(256,activation='relu',kernel_initializer='he_normal')(class1)
    class3 = Dense(128,activation='relu',kernel_initializer='he_normal')(class2)
    output = Dense(11,activation='softmax')(class3)
     # define new model
    model = Model(inputs=base_model.inputs,outputs=output)
    #compile the model
    # opt = SGD(lr=0.001,momentum=0.9,nesterov=True)
    opt = Adam(lr=0.00001) # learning rate 도 손봐야된다아
    model.compile(optimizer=opt,loss='categorical_crossentropy',metrics='accuracy')
    
    print('Transfer Learning based on Xception Module')
    model.summary() 

In [None]:
# DEFINE CALLBACKS ---- > EARLY STOP AND REDUCE LEARNING RATE ON PLATEAU
early_stop = EarlyStopping(patience=15,monitor='val_loss',restore_best_weights=True)

In [None]:
# DEFINE THE STEPS_PER_EPOCH
STEP_SIZE_TRAIN = (train_generator.n // train_generator.batch_size)   
STEP_SIZE_VALIDATION = (validation_generator.n // validation_generator.batch_size)   # 가진 데이터 수보다 배치 사이즈가 커서 검증이 안되어서 warning이 뜬거였어...
STEP_SIZE_TEST = (test_generator.n // test_generator.batch_size) 

In [None]:
 # TRAIN THE MODEL 

model.fit(train_generator, validation_data=validation_generator, 
steps_per_epoch=STEP_SIZE_TRAIN, validation_steps=STEP_SIZE_VALIDATION,
callbacks=[early_stop], 
epochs=40)



In [None]:
base_model.trainable = True
opt = Adam(lr=0.000001)
model.compile(optimizer=opt,loss='categorical_crossentropy',metrics='accuracy')


model.fit_generator(generator=train_generator,
                   steps_per_epoch=STEP_SIZE_TRAIN,
                   validation_data=validation_generator,
                   validation_steps=STEP_SIZE_VALIDATION,
                   epochs=16,
                   callbacks=[early_stop])

In [None]:
# EVALUATE THE MODEL 
model.evaluate_generator(generator=validation_generator)

In [None]:
data_generator = ImageDataGenerator(    
    featurewise_center=False,
    samplewise_center=False,
    featurewise_std_normalization=False,
    samplewise_std_normalization=False,
    zca_whitening=False,
    zca_epsilon=1e-06,
    rotation_range=0.1,
    width_shift_range=0.1,
    height_shift_range=0.1,
    brightness_range=None,
    shear_range=0.5,
    zoom_range=[0.5,1.8],
    channel_shift_range=0.0,
    fill_mode='nearest',
    cval=0.0,
    horizontal_flip=True,
    vertical_flip=False,
    rescale=1./255,
    preprocessing_function=preprocess_input,
    data_format=None,
    validation_split=0.0,
    dtype= 'float32'
) 

In [None]:
train_generator_full = data_generator.flow_from_dataframe(dataframe=train_df,
                                                      directory=TRAIN_IMAGES,
                                                      x_col='file_name',
                                                      y_col='class',
                                                      subset='training',
                                                      color_mode='rgb',
                                                      batch_size=BATCH_SIZE,
                                                      seed=42,
                                                      shuffle=True, 
                                                      class_mode='categorical',
                                                      target_size=(224,224))

train_generator_full

In [None]:
model.fit(train_generator_full,
                   steps_per_epoch=train_generator_full.n//train_generator_full.batch_size,
                   epochs=20,
                   callbacks=[early_stop]
                   )

In [None]:
model.save('/content/drive/MyDrive/JAVIS.h5')

In [None]:
# RESET THE GENERATOR TO GET THE RESULTS IN THE RIGHT ORDER
test_generator.reset()


In [None]:
# GENERATE PREDICTIONS ON THE TEST DATA
predictions = model.predict_generator(test_generator)

In [None]:
# RETRIEVE THE CLASS INDEX FOR WHICH THE PROBABILITY IS MAXIMUM ...hence np.argmax
predictions_class_index = np.argmax(predictions,axis=1)

In [None]:
# CHECK THE PREDICTED CLASS INDICES
predictions_class_index

In [None]:
# EXTRACT THE PREDICTION LABELS
label = (train_generator.class_indices)
label = dict((v,k) for k,v in label.items())
final_predictions = [label[k] for k in predictions_class_index]

In [None]:
# LIST THE FINAL PREDICTED LABELS
final_predictions

In [None]:
len(final_predictions)

In [None]:
# PREPARE FOR SUBMISSION
filenames=test_generator.filenames
results=pd.DataFrame({"file_name":filenames,
                      "class":final_predictions})
results.to_csv("submission.csv",index=False)

In [None]:
final_df = pd.read_csv('/content/submission.csv')
final_df

In [None]:
compare_df = pd.read_csv('/content/drive/MyDrive/Term_Test/test_data_emp.csv', index_col='Unnamed: 0')
compare_df

In [None]:
for i in range(0, len(final_predictions)):
  for j in range(0, len(final_predictions)):
    if(final_df['file_name'][i] == compare_df['file_name'][j]):
      compare_df['class'][j]=final_df['class'][i]

In [None]:
compare_df

In [None]:
compare_df.to_csv("JAVIS.csv",index=True)