In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
import tensorflow_addons as tfa
import os

In [2]:
tf.config.list_physical_devices()

[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'),
 PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [3]:
physical_devices = tf.config.list_physical_devices('GPU')
try:
  # Disable first GPU
  tf.config.set_visible_devices(physical_devices[1:], 'GPU')
  logical_devices = tf.config.list_logical_devices('GPU')
  # Logical device was not created for first GPU
  assert len(logical_devices) == len(physical_devices) - 1
except:
  # Invalid device or cannot modify virtual devices once initialized.
  pass

In [4]:
tf.config.list_logical_devices()

[LogicalDevice(name='/device:CPU:0', device_type='CPU')]

In [5]:
def resize_fun(x):
    return tf.image.resize_with_crop_or_pad(x, target_height=640,target_width=640)

In [6]:
def create_gen(df, directory=os.path.join(os.path.dirname(os.getcwd()), 'ava-data', 'AVA_dataset','images_ext','images')):
    datagen = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255)
    return datagen.flow_from_dataframe(df, 
                                       directory=directory, batch_size=32,
                                       x_col='data', y_col='score', class_mode='raw', 
                                       target_size=(640,640), resizing_function=resize_fun)

In [7]:
ava = pd.read_csv('../ava-data/AVA_dataset/AVA.txt', sep=' ', names=['index', 'ID', *['r'+str(x) for x in range(1,11)], 't1', 't2', 'CID'])

In [8]:
try:
    test_df = pd.read_csv('test_df', index_col=0)
except:
    test_df = pd.DataFrame(columns=['data', 'score'])
    with open('../ava-data/AVA_dataset/aesthetics_image_lists/generic_ss_train.jpgl') as file:
        for line in file.readlines():
            try:
                fID = line.strip()
                #print(fID),print(type(fID))
                raw_scores = ava.loc[ava.ID == int(fID), 'r1':'r10'].values
                score = raw_scores.dot(np.arange(1,11))/raw_scores.sum()
                test_df = test_df.append({'data':str(fID)+'.jpg', 'score':np.float32(score[0])}, ignore_index=True)
            except:
                continue

In [9]:
try:
    train_df = pd.read_csv('train_df', index_col=0)
except:
    train_df = pd.DataFrame(columns=['data', 'score'])
    with open('../ava-data/AVA_dataset/aesthetics_image_lists/generic_test.jpgl') as file:
        for line in file.readlines():
            try:
                fID = line.strip()
                #print(fID),print(type(fID))
                raw_scores = ava.loc[ava.ID == int(fID), 'r1':'r10'].values
                score = raw_scores.dot(np.arange(1,11))/raw_scores.sum()
                train_df = train_df.append({'data':str(fID)+'.jpg', 'score':np.float32(score[0])}, ignore_index=True)
            except:
                continue

In [10]:
training_gen = create_gen(train_df)

Found 19929 validated image filenames.


  .format(n_invalid, x_col)


In [11]:
val_gen = create_gen(test_df)

Found 2494 validated image filenames.


In [12]:
im_batch, label_batch = next(training_gen)

In [13]:
MNv2_base = tf.keras.applications.MobileNetV2(input_shape=(640, 640, 3), alpha=1, include_top=False, weights=None)

In [14]:
MNv2_base_batch = MNv2_base(im_batch)

In [15]:
MNv2_avg_pool = tf.keras.layers.GlobalAveragePooling2D()
MNv2_avg_pool_batch = MNv2_avg_pool(MNv2_base_batch)

In [16]:
MNv2_dense = tf.keras.layers.Dense(1)
MNv2_dense_batch = MNv2_dense(MNv2_avg_pool_batch)

In [17]:
MNv2_model = tf.keras.models.Sequential([MNv2_base, MNv2_avg_pool, MNv2_dense])

In [18]:
initial_learning_rate = 0.45
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate,
    decay_steps=100000,
    decay_rate=0.98,
    staircase=True)

optimizer = tf.keras.optimizers.RMSprop(learning_rate=lr_schedule, momentum=0.9)
tqdm_callback = tfa.callbacks.TQDMProgressBar()

checkpoint_filepath = os.path.join(os.getcwd(), 'checkpoints', 'trial',' {epoch:02d}.hdf5')
model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_filepath,
    save_weights_only=True,
    monitor='val_loss',
    mode='min',
    save_best_only=False)

In [19]:
MNv2_model.compile(optimizer=optimizer, loss='mean_squared_error', metrics=['MAE'])

In [99]:
loss, MSE, MAE = MNv2_model.evaluate(val_gen, steps=20)



In [20]:
MNv2_model.fit(x=training_gen, epochs=5, validation_data=val_gen, callbacks=[model_checkpoint_callback, tqdm_callback])

  ...
    to  
  ['...']
  ...
    to  
  ['...']
Train for 623 steps, validate for 78 steps


HBox(children=(FloatProgress(value=0.0, description='Training', layout=Layout(flex='2'), max=5.0, style=Progre…

Epoch 1/5


HBox(children=(FloatProgress(value=0.0, layout=Layout(flex='2'), max=623.0), HTML(value='')), layout=Layout(di…

Epoch 1/5

  " Skipping tag %s" % (size, len(data), tag)
  " Skipping tag %s" % (size, len(data), tag)
  " Skipping tag %s" % (size, len(data), tag)




KeyError: 'val_loss'

In [48]:
corrupt_df = pd.DataFrame(columns=['ID', 'miss', 'corr'])

In [None]:
from PIL import Image
from tqdm import tqdm
path = os.path.join(os.path.dirname(os.getcwd()), 'ava-data', 'AVA_dataset','images_ext','images')
for image in tqdm(ava.ID):
    try:
        try:
            im = Image.open(os.path.join(path, str(image)+'.jpg'))
        except:
            corrupt_df = corrupt_df.append({'ID': image, 'miss':True, 'corr':False}, ignore_index=True)
            continue
        im.getdata()
    except:
        corrupt_df = corrupt_df.append({'ID': image, 'miss':False, 'corr':True}, ignore_index=True)

  3%|█▏                                  | 8395/255530 [00:52<41:32, 99.14it/s]

In [54]:
corrupt_df

Unnamed: 0,ID,miss,corr
0,953619,True,False
1,953958,True,False
2,954184,True,False
3,954113,True,False
4,953980,True,False
5,954175,True,False
6,953349,True,False
7,953645,True,False
8,953897,True,False
9,953841,True,False


In [61]:
im = Image.open(os.path.join(path, str(724198)+'.jpg'))
np.array(im.getdata())

(307200, 3)