In [1]:
import tensorflow as tf
import json
import os
from distutils.dir_util import copy_tree
import shutil
import pandas as pd
import matplotlib.pyplot as plt

# now import TensorFlow and tf.keras

from tensorflow.keras import backend as K
from keras.preprocessing.image import ImageDataGenerator, load_img
from tensorflow.keras import applications
from tensorflow.keras.applications.densenet import DenseNet121
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.models import load_model

In [3]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [4]:
!cp '/content/gdrive/MyDrive/Hackathon/DLHackathonFiles/DLHack_RFNN.zip' 'DLHack_RFNN.zip'

In [5]:
!unzip -q DLHack_RFNN.zip

In [6]:
dataset_inpt = '/content/DLHack_RFNN/dataset/'
input_size = 256
batch_size = 64
train_inpt = os.path.join(dataset_inpt,'train')
test_inpt = os.path.join(dataset_inpt, 'test')

In [7]:
#Read training and test Dataframe
rf_train = pd.read_csv('/content/DLHack_RFNN/train.csv')
rf_train.tail()

Unnamed: 0,file_id,label
1704,1704,1
1705,1705,1
1706,1706,1
1707,1707,1
1708,1708,1


In [8]:
rf_test = pd.read_csv('/content/DLHack_RFNN/test.csv')
rf_test.head()

Unnamed: 0,file_id
0,0
1,1
2,2
3,3
4,4


In [9]:
rf_train_datagen = ImageDataGenerator(
    rescale = 1/255,    
    horizontal_flip = True, 
    validation_split=0.2
)

rf_train_generator = rf_train_datagen.flow_from_directory(
    train_inpt,
    target_size = (input_size, input_size),
    color_mode = "rgb",
    class_mode = "binary",  
    batch_size = batch_size,
    shuffle = True,
    subset='training') 

Found 1368 images belonging to 2 classes.


In [10]:
validation_generator = rf_train_datagen.flow_from_directory(
    train_inpt, 
    target_size=(input_size, input_size),
    batch_size=batch_size,
    class_mode='binary',
    subset='validation') 

Found 341 images belonging to 2 classes.


In [16]:
paths = []
for i in rf_test['file_id']:   
    path = test_inpt + '/' + str(rf_test.iloc[i]['file_id'])+'.jpg'
    paths.append(path)
    
rf_test['file_path'] = paths
rf_test.tail()

Unnamed: 0,file_id,file_path
327,327,/content/DLHack_RFNN/dataset/test/327.jpg
328,328,/content/DLHack_RFNN/dataset/test/328.jpg
329,329,/content/DLHack_RFNN/dataset/test/329.jpg
330,330,/content/DLHack_RFNN/dataset/test/330.jpg
331,331,/content/DLHack_RFNN/dataset/test/331.jpg


In [17]:
test_datagen = ImageDataGenerator(
    rescale = 1/255    
)

rf_test_generator = test_datagen.flow_from_dataframe(
    rf_test,
    x_col='file_path', 
    target_size = (input_size, input_size),
    color_mode = "rgb",
    class_mode = None,
    batch_size = 1,
    shuffle = False
)

Found 332 validated image filenames.


In [33]:
tf.keras.backend.clear_session()
densenet = DenseNet121(weights='imagenet',
                           include_top=False,
                           input_shape=(input_size,input_size,3)
                          )
model = tf.keras.models.Sequential([densenet,
                                        tf.keras.layers.GlobalAveragePooling2D(),
                                        tf.keras.layers.Dense(512, activation='relu'),
                                        tf.keras.layers.BatchNormalization(),
                                        tf.keras.layers.Dropout(0.3),
                                        tf.keras.layers.Dense(256, activation='relu'),
                                        tf.keras.layers.BatchNormalization(),
                                        tf.keras.layers.Dropout(0.4),
                                        tf.keras.layers.Dense(128, activation='relu'),
                                        tf.keras.layers.BatchNormalization(),
                                        tf.keras.layers.Dropout(0.5),
                                        Dense(1, activation='sigmoid')
                                      ])
model.compile(optimizer=Adam(lr=0.001),
                loss='binary_crossentropy',
                metrics=['accuracy']
                )

  super(Adam, self).__init__(name, **kwargs)


In [34]:

custom_callbacks = [
    
    ModelCheckpoint(
        filepath = os.path.join( './best_result.h5'),
        monitor = 'val_accuracy',
        mode = 'max',
        verbose = 1,
        save_best_only = True
    )
]


In [35]:

model_result = model.fit(
    rf_train_generator,
    epochs = 10,
    steps_per_epoch = len(rf_train_generator),
    validation_data = validation_generator,
    validation_steps = len(validation_generator),
    callbacks = custom_callbacks
)
print(model_result.history)

Epoch 1/10
Epoch 1: val_accuracy improved from -inf to 0.55425, saving model to ./best_result.h5
Epoch 2/10
Epoch 2: val_accuracy did not improve from 0.55425
Epoch 3/10
Epoch 3: val_accuracy did not improve from 0.55425
Epoch 4/10
Epoch 4: val_accuracy did not improve from 0.55425
Epoch 5/10
Epoch 5: val_accuracy improved from 0.55425 to 0.60704, saving model to ./best_result.h5
Epoch 6/10
Epoch 6: val_accuracy did not improve from 0.60704
Epoch 7/10
Epoch 7: val_accuracy did not improve from 0.60704
Epoch 8/10
Epoch 8: val_accuracy did not improve from 0.60704
Epoch 9/10
Epoch 9: val_accuracy improved from 0.60704 to 0.65103, saving model to ./best_result.h5
Epoch 10/10
Epoch 10: val_accuracy did not improve from 0.65103
{'loss': [0.9623860120773315, 0.8732355237007141, 0.7805743217468262, 0.7708919644355774, 0.7563226819038391, 0.7156460881233215, 0.6963241100311279, 0.65773606300354, 0.6119428277015686, 0.5346137285232544], 'accuracy': [0.5416666865348816, 0.5592105388641357, 0.618

In [36]:
# load the saved model that is considered the best
best_result = load_model('./best_result.h5')

# Generate predictions
rf_test_generator.reset()

preds = best_result.predict(
    rf_test_generator,
    verbose = 1
)



In [37]:
rf_test['pred'] = preds.flatten()
submission = pd.read_csv('/content/DLHack_RFNN/test.csv')
submission['label'] =  rf_test['pred'].apply(lambda score : 1 if score > 0.5 else 0)
submission.to_csv("/content/gdrive/MyDrive/Hackathon/DLHackathonFiles/SubRealFake_1.csv",index=False)
submission.head(5)

Unnamed: 0,file_id,label
0,0,1
1,1,1
2,2,0
3,3,0
4,4,1
