### Using the CNN model on database images

In [12]:
import tensorflow as tf
import numpy as np
import pandas as pd
import cv2
import shutil, os

In [13]:
DATADIR = '/Users/brynronalds/Insight/directory/data/raw/'

csv_name = os.path.join(DATADIR,'all_images.csv')
image_df = pd.read_csv(csv_name)
# clean the dataframe, there's an extra ID column
col_names = image_df.columns
image_df = image_df.drop([col_names[0]], axis=1)

# load the highest performing keras model:
# loss: 0.0796 - accuracy: 0.9728 - val_loss: 0.1647 - val_accuracy: 0.9383
model = tf.keras.models.load_model('CNN_12epoch_64x3_EarlyStop3')

In [14]:
image_df

Unnamed: 0,filename,artist handle,studio handle
0,studioartease/2020-05-02_13-53-39_UTC.jpg,gettattooedbymae,studioartease
1,studioartease/2020-04-24_12-52-25_UTC.jpg,tattoosbyrian,studioartease
2,studioartease/2020-04-17_23-41-00_UTC.jpg,magdalena.mardini,studioartease
3,studioartease/2020-04-09_16-06-58_UTC.jpg,tattoosbyrian,studioartease
4,studioartease/2020-04-02_21-50-04_UTC.jpg,cultmtl,studioartease
...,...,...,...
8662,slickstyledsteel/2015-02-28_22-36-00_UTC.jpg,stgo_tattoo,slickstyledsteel
8663,slickstyledsteel/2015-02-28_19-25-27_UTC.jpg,anatometalinc,slickstyledsteel
8664,slickstyledsteel/2015-02-23_15-09-43_UTC.jpg,jakeboomhower,slickstyledsteel
8665,slickstyledsteel/2014-11-25_22-52-19_UTC.jpg,smashleydale,slickstyledsteel


In [15]:
STUDIO_HANDLES = ['studioartease','tatouage_dfa_tattoos','minuitdix_tattoo',
                  'paradise.montreal','lovelesstattoo','tattooabyss',
                  'tatouageroyal','lechalettattoo','tattoostudio1974','sgtattoondg',
                  'bloodlinemontreal','blackrosetattoomtl','slickstyledsteel']
STUDIO_NAMES = ['Studio Artease',' DFA Tattoos','Minuit Dix','Paradise',
               'Loveless','Tattoo Abyss','Tatouage Royal','Le Chalet','Tattoo 1974',
               'Saving Grace Tattoo','Bloodline Tattoo','The Black Rose','Slick Styled Steel']

In [16]:
# clean up: replace filename for any that don't work (multiple images in post)
IMG_SIZE = 200
replace_name = {}
for index in image_df.index:
    img_name = image_df.loc[index,'filename']
    img_array = cv2.imread(os.path.join(DATADIR,img_name))
    try:
        new_array = cv2.resize(img_array, (IMG_SIZE, IMG_SIZE))
    except:
        new_name = img_name[:-4] + '_1.jpg'
        replace_name[img_name] = new_name
        continue
image_df = image_df.replace(replace_name)    

In [17]:
print(len(replace_name))
print(len(image_df))

818
8667


In [18]:
# Load in images and get model predictions/add to dataframe
image_df['studio name'] = ''
image_df['prediction'] = ''
image_df['is_tat'] = ''
IMG_SIZE = 200
broken_id = []
col_names = image_df.columns
i = 0
for studio in STUDIO_HANDLES:
    studio_name = STUDIO_NAMES[i]
    df_st = image_df[image_df[col_names[2]] == studio]
    for index in df_st.index:
        img_name = df_st.loc[index,'filename']
        img_array = cv2.imread(os.path.join(DATADIR,img_name))
        try:
            new_array = cv2.resize(img_array, (IMG_SIZE, IMG_SIZE))
        except:
            broken_id.append(index)
            continue
        data = np.array(new_array).reshape(-1, IMG_SIZE, IMG_SIZE, 3)
        data = data/255.0
        pred = model.predict(data)
        predval = float(pred)
        image_df.loc[index,'prediction'] = predval
        # set is_tat threshold:
        if predval > 0.5:
            image_df.loc[index,'is_tat'] = 1
        else:
            image_df.loc[index,'is_tat'] = 0
        image_df.loc[index,'studio name'] = studio_name
    i += 1
        

# Had some posts that only downloaded text, no jpg: delete these
image_df = image_df.drop(broken_id, axis=0)

In [19]:
print(len(broken_id))
print(len(image_df))

81
8586


In [20]:
# Need to create a csv file of the tattoo images:
tattoo_df = image_df[image_df['is_tat'] == 1]

tattoo_df.to_csv(os.path.join(DATADIR,'tattoo_images.csv'))
image_df.to_csv(os.path.join(DATADIR,'all_images_cl.csv'))

In [21]:
tattoo_df

Unnamed: 0,filename,artist handle,studio handle,studio name,prediction,is_tat
1,studioartease/2020-04-24_12-52-25_UTC.jpg,tattoosbyrian,studioartease,Studio Artease,0.998512,1
2,studioartease/2020-04-17_23-41-00_UTC.jpg,magdalena.mardini,studioartease,Studio Artease,0.999967,1
5,studioartease/2020-03-29_15-34-08_UTC.jpg,gettattooedbymae,studioartease,Studio Artease,0.665399,1
6,studioartease/2020-03-24_14-16-42_UTC.jpg,magdalena.mardini,studioartease,Studio Artease,0.981827,1
7,studioartease/2020-03-17_19-28-03_UTC.jpg,artease,studioartease,Studio Artease,0.93761,1
...,...,...,...,...,...,...
8657,slickstyledsteel/2015-05-27_19-01-45_UTC.jpg,stgo_tattoo,slickstyledsteel,Slick Styled Steel,0.998509,1
8658,slickstyledsteel/2015-05-24_19-46-07_UTC.jpg,jakeboomhower,slickstyledsteel,Slick Styled Steel,0.999394,1
8659,slickstyledsteel/2015-05-24_19-33-07_UTC.jpg,stgo_tattoo,slickstyledsteel,Slick Styled Steel,0.996602,1
8662,slickstyledsteel/2015-02-28_22-36-00_UTC.jpg,stgo_tattoo,slickstyledsteel,Slick Styled Steel,0.999769,1


In [22]:
# Extra section: copy tattoo images to local dir to test image_retrieve
RUNDIR = '/Users/brynronalds/Insight/ImageSim/image_retrieval/data/train/'
for index in tattoo_df.index:
    img_name = tattoo_df.loc[index,'filename']
    filen = os.path.join(DATADIR,img_name)
    shutil.copy(filen,RUNDIR)