In [1]:
import os
import glob
import csv
from shutil import copyfile

import pandas as pd
from sklearn.model_selection import KFold, StratifiedKFold

from keras.applications import mobilenet
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import EarlyStopping
from keras import layers, models
import numpy as np
import tensorflow as tf

from csv import reader

In [None]:
# moving images of both classes to one folder
# and writing the info on them to a csv file
csv_filename = 'data/train_cv_224/training_labels.csv'
class_label = '0'
originDir = f'data/train_224/{class_label}'
targetDir = 'data/train_cv_224'
os.makedirs(targetDir, exist_ok=True)
with open(csv_filename, 'a') as f:
    f.write('filename label\n')
    for fIdx, filename in enumerate(os.listdir(originDir)):
        newFilename = f'{class_label}_{fIdx}.jpg'
        copyfile(originDir+'/'+filename, targetDir+'/'+newFilename)
#         os.rename(originDir+'/'+filename, targetDir+'/'+newFilename)
        f.write(f'{newFilename} {class_label}\n')

In [13]:
csv_filename = 'data/train_cv_224/training_labels.csv'
class_label = '0'
originDir = f'data/train_224/{class_label}'
targetDir = 'data/train_cv_224'

# reading the csv file
train_data = pd.read_csv(csv_filename, sep=' ', header=0)
train_data = train_data[train_data.label != 'label']
# train_data = train_data.sample(frac=1).reset_index(drop=True) # I am not sure if I need to shuffle here
Y = train_data[['label']]

# settuping the kfolds
kf = KFold(n_splits=5, shuffle=True)
skf = StratifiedKFold(n_splits=5, shuffle=True)

In [4]:
idg = ImageDataGenerator(preprocessing_function=mobilenet.preprocess_input)

In [6]:
img_size=224

def create_new_model():
    base_model = mobilenet.MobileNet(weights='imagenet', include_top=False, input_shape=(img_size, img_size, 3))
    for layer in base_model.layers:
        layer.trainable = False
    x = base_model.output
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dense(1024, activation='relu')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dense(512, activation='relu')(x)
    x = layers.Dense(1, activation='sigmoid')(x)
    model = models.Model(inputs=base_model.input, outputs=x)
    return model

early_stopping_monitor = EarlyStopping(
    monitor='val_loss',
    min_delta=0,
    patience=0,
    verbose=0,
    mode='auto',
    baseline=None,
    restore_best_weights=True
)

In [16]:
## Cross Validation 
valid_accuracy = []
valid_loss = []
batch_size=32
num_epochs=10
num_samples = train_data.shape[0]

for train_index, val_index in kf.split(np.zeros(num_samples), Y):
    training_data = train_data.iloc[train_index]
    validation_data = train_data.iloc[val_index]
    
    train_data_generator = idg.flow_from_dataframe(training_data, directory=targetDir,
                                                  x_col="filename", y_col="label",
                                                  class_mode="binary", shuffle=True,
                                                  target_size=(img_size, img_size), batch_size=batch_size)
    valid_data_generator = idg.flow_from_dataframe(validation_data, directory=targetDir,
                                                  x_col="filename", y_col="label",
                                                  class_mode="binary", shuffle=True,
                                                  target_size=(img_size, img_size), batch_size=batch_size)
    
    model = create_new_model()
    model.compile(optimizer='rmsprop', 
                  loss='binary_crossentropy', 
                  metrics=['accuracy'])
    
    step_size = train_data_generator.n//train_data_generator.batch_size
    step_size_valid = valid_data_generator.n//valid_data_generator.batch_size

    with tf.device('/device:GPU:0'):
        history = model.fit(train_data_generator, epochs=num_epochs, steps_per_epoch=step_size, 
                        validation_data=valid_data_generator,
                       validation_steps=step_size_valid,
                        callbacks=[early_stopping_monitor])
    
    results = model.evaluate(valid_data_generator)
    results = dict(zip(model.metrics_names, results))
    
    valid_accuracy.append(results['accuracy'])
    valid_loss.append(results['loss'])
    
    tf.keras.backend.clear_session()

Found 3217 validated image filenames belonging to 2 classes.
Found 805 validated image filenames belonging to 2 classes.
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Found 3217 validated image filenames belonging to 2 classes.
Found 805 validated image filenames belonging to 2 classes.
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Found 3218 validated image filenames belonging to 2 classes.
Found 804 validated image filenames belonging to 2 classes.
Epoch 1/10
Epoch 2/10
Found 3218 validated image filenames belonging to 2 classes.
Found 804 validated image filenames belonging to 2 classes.
Epoch 1/10
Epoch 2/10
Found 3218 validated image filenames belonging to 2 classes.
Found 804 validated image filenames belonging to 2 classes.
Epoch 1/10
Epoch 2/10


In [2]:
csv_filename = 'data/train_cv_224/training_labels_handOverFace_unique.csv'
class_label = '0'
targetDir = 'data/train_cv_224'

# reading the csv file
train_data = pd.read_csv(csv_filename, sep=' ', header=0)
train_data = train_data[train_data.label != 'label']
# train_data = train_data.sample(frac=1).reset_index(drop=True) # I am not sure if I need to shuffle here
Y = train_data[['label']]

# settuping the kfolds
kf = KFold(n_splits=5, shuffle=True)
skf = StratifiedKFold(n_splits=5, shuffle=True)

In [5]:
## running only one of it
valid_accuracy = []
valid_loss = []
batch_size=32
num_epochs=10
num_samples = train_data.shape[0]

for train_index, val_index in list(kf.split(np.zeros(num_samples), Y)):
    training_data = train_data.iloc[train_index]
    validation_data = train_data.iloc[val_index]
    
    train_data_generator = idg.flow_from_dataframe(training_data, directory=targetDir,
                                                  x_col="filename", y_col="label",
                                                  class_mode="raw", shuffle=True,
                                                  target_size=(img_size, img_size), batch_size=batch_size)
    valid_data_generator = idg.flow_from_dataframe(validation_data, directory=targetDir,
                                                  x_col="filename", y_col="label",
                                                  class_mode="raw", shuffle=True,
                                                  target_size=(img_size, img_size), batch_size=batch_size)
    
    model = create_new_model()
    model.compile(optimizer='rmsprop', 
                  loss='categorical_crossentropy', 
                  metrics=['accuracy'])
    
    step_size = train_data_generator.n//train_data_generator.batch_size
    step_size_valid = valid_data_generator.n//valid_data_generator.batch_size

    with tf.device('/device:GPU:0'):
        history = model.fit(train_data_generator, epochs=num_epochs, steps_per_epoch=step_size, 
                        validation_data=valid_data_generator,
                       validation_steps=step_size_valid,
                        callbacks=[])
    
    results = model.evaluate(valid_data_generator)
    results = dict(zip(model.metrics_names, results))
    
    valid_accuracy.append(results['accuracy'])
    valid_loss.append(results['loss'])
    break

NameError: name 'img_size' is not defined