In [1]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img

datagen = ImageDataGenerator(
        rotation_range=40,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        fill_mode='nearest')

img = load_img('data/0Xf5QSSF2b.jpg')  # this is a PIL image
x = img_to_array(img)  # this is a Numpy array with shape (3, 150, 150)
x = x.reshape((1,) + x.shape)  # this is a Numpy array with shape (1, 3, 150, 150)

# the .flow() command below generates batches of randomly transformed images
# and saves the results to the `preview/` directory
i = 0
for batch in datagen.flow(x, batch_size=1,
                          save_to_dir='view', save_prefix='example', save_format='jpeg'):
    i += 1
    if i > 20:
        break  # otherwise the generator would loop indefinitely

In [2]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras.layers import Activation, Dropout, Flatten, Dense, BatchNormalization

model = Sequential()

model.add(Conv2D(32, (3, 3), input_shape=(3, 150, 150), data_format="channels_first"))
model.add(Activation('relu'))
model.add(BatchNormalization(axis=1))
model.add(MaxPooling2D(pool_size=(2, 2), data_format="channels_first"))

model.add(Conv2D(32, (3, 3), data_format="channels_first"))
model.add(Activation('relu'))
model.add(BatchNormalization(axis=1))
model.add(MaxPooling2D(pool_size=(2, 2), data_format="channels_first"))

model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(BatchNormalization(axis=1))
model.add(MaxPooling2D(pool_size=(2, 2), data_format="channels_first"))

model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(1))
model.add(Activation('sigmoid'))

model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])


In [3]:
# load data into dataframe with 'path' and 'target' columns
import pandas as pd
train_label_df = pd.read_csv('data.csv', delimiter=' ', header=1, names=["uid"])
train_label_df["path"] = train_label_df['uid'].apply(lambda x : x.split(',')[-1])
train_label_df["target"] = train_label_df['uid'].apply(lambda x : int(x.split(',')[1])/int(x.split(',')[2]))
train_label_df =train_label_df.drop(['uid'], axis=1)

train_label_df2 = pd.read_csv('data_temp.csv', delimiter=' ', header=1, names=["uid"])
train_label_df2["path"] = train_label_df2['uid'].apply(lambda x : "./data/" + x.split(',')[0] + ".jpg")
train_label_df2["target"] = train_label_df2['uid'].apply(lambda x : int(x.split(',')[1])/int(x.split(',')[3]))
train_label_df2 =train_label_df2.drop(['uid'], axis=1)
#uid,likes,comments,followers
#B-A-bW9l1Gm,601,43,6715

In [4]:
train_label_df

Unnamed: 0,path,target
0,./data/B9dZzI1n-EV.jpg,0.112025
1,./data/B9cQMfVnkMe.jpg,0.638448
2,./data/B9a0IQWnY9M.jpg,0.053981
3,./data/B9NnTUrHoNX.jpg,0.143885
4,./data/B9XuP83gx_j.jpg,0.030143
...,...,...
614,./data/B9eu_7jHaaN.jpg,0.030105
615,./data/B9eyOJ3gMBl.jpg,0.020260
616,./data/B9erbIMHAhH.jpg,0.195320
617,./data/B9e2JraBbFd.jpg,0.026655


In [5]:
train_label_df2

Unnamed: 0,path,target
0,./data/B-A4q2_HbZM.jpg,0.021855
1,./data/B-A67qNleZJ.jpg,0.155491
2,./data/B-AaOGUhKbv.jpg,0.040524
3,./data/B-ACyP7JemN.jpg,0.027340
4,./data/B-Ar4y3hXX2.jpg,0.067915
...,...,...
810,./data/B9M_-rNg0Er.jpg,0.289023
811,./data/B9WejSrBIsL.jpg,0.917219
812,./data/B9wPREWhq0l.jpg,0.128363
813,./data/B9Y_fqoHwUn.jpg,0.286299


In [6]:
train_df = pd.concat([train_label_df, train_label_df2], axis=0).reset_index().drop(['index'], axis=1)

In [7]:
train_df

Unnamed: 0,path,target
0,./data/B9dZzI1n-EV.jpg,0.112025
1,./data/B9cQMfVnkMe.jpg,0.638448
2,./data/B9a0IQWnY9M.jpg,0.053981
3,./data/B9NnTUrHoNX.jpg,0.143885
4,./data/B9XuP83gx_j.jpg,0.030143
...,...,...
1429,./data/B9M_-rNg0Er.jpg,0.289023
1430,./data/B9WejSrBIsL.jpg,0.917219
1431,./data/B9wPREWhq0l.jpg,0.128363
1432,./data/B9Y_fqoHwUn.jpg,0.286299


In [8]:
from os import listdir
from os.path import isfile, join
onlyfiles = [f for f in listdir('data') if isfile(join('data', f))]
onlyfiles[0:5]

['.DS_Store',
 '0Xf5QSSF2b.jpg',
 '0Xh5lwSF4_.jpg',
 '0XhcghSF4Y.jpg',
 'B-A-bW9l1Gm.jpg']

In [9]:
new_df = train_df
for i in train_df['path']:
    if i.split("/")[-1] not in onlyfiles:
        print(train_df.index[train_df['path'] == i].tolist())
# All are found

In [10]:
batch_size = 32
# this is the augmentation configuration we will use for training
train_datagen = ImageDataGenerator(
        rescale=1./255,
        shear_range=0.2,
        zoom_range=0.2,
    rotation_range=1,
    
        horizontal_flip=True)

# this is the augmentation configuration we will use for testing:
# only rescaling
test_datagen = ImageDataGenerator(rescale=1./255)

# this is a generator that will read pictures found in
# subfolers of 'data/train', and indefinitely generate
# batches of augmented image data
train_generator = train_datagen.flow_from_dataframe(
        dataframe=train_df,
        x_col = 'path',
        y_col = 'target',
        directory='data',
        target_size=(150, 150),  # all images will be resized to 150x150
        batch_size=batch_size,
        class_mode='other')  # since we use binary_crossentropy loss, we need binary labels

# this is a similar generator, for validation data
#validation_generator = test_datagen.flow_from_directory(
#        'data/validation',
#        target_size=(150, 150),
#        batch_size=batch_size,
#        class_mode='binary')

Found 0 validated image filenames.


  .format(n_invalid, x_col)


In [11]:
from tensorflow.python.client import device_lib

def get_available_devices():
    local_device_protos = device_lib.list_local_devices()
    return [x.name for x in local_device_protos]

print(get_available_devices())  

['/device:CPU:0']


In [15]:
import tensorflow as tf
tf.test.is_gpu_available()
tf.test.is_built_with_cuda()

False

In [None]:
model.fit_generator(
        train_generator,
        steps_per_epoch=800 // batch_size,
        epochs=50,
        #validation_data=validation_generator,
        #validation_steps=800 // batch_size)
        verbose = 2,
        max_queue_size = 20,
        use_multiprocessing=False,
        shuffle=True
)
model.save_weights('first_try.h5')

Epoch 1/50


Exception in thread Thread-6:
Traceback (most recent call last):
  File "c:\users\weustis\miniconda3\lib\threading.py", line 916, in _bootstrap_inner
    self.run()
  File "c:\users\weustis\miniconda3\lib\threading.py", line 864, in run
    self._target(*self._args, **self._kwargs)
  File "c:\users\weustis\miniconda3\lib\site-packages\tensorflow_core\python\keras\utils\data_utils.py", line 748, in _run
    with closing(self.executor_fn(_SHARED_SEQUENCES)) as executor:
  File "c:\users\weustis\miniconda3\lib\site-packages\tensorflow_core\python\keras\utils\data_utils.py", line 727, in pool_fn
    initargs=(seqs, None, get_worker_id_queue()))
  File "c:\users\weustis\miniconda3\lib\multiprocessing\context.py", line 119, in Pool
    context=self.get_context())
  File "c:\users\weustis\miniconda3\lib\multiprocessing\pool.py", line 174, in __init__
    self._repopulate_pool()
  File "c:\users\weustis\miniconda3\lib\multiprocessing\pool.py", line 239, in _repopulate_pool
    w.start()
  File