### Using the CNN model on database images

In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
import cv2
import shutil, os

In [2]:
projdir = '/Users/brynronalds/Insight/proj_dir/'
datadir = os.path.join(projdir,'data/raw/')
procdatadir = os.path.join(projdir,'data/processed/')

csv_name = os.path.join(datadir,'all_images.csv')
image_df = pd.read_csv(csv_name)
# clean the dataframe, there's an extra ID column
image_df = image_df.drop('Unnamed: 0', axis=1)

# load the highest performing keras model:
# loss: 0.0796 - accuracy: 0.9728 - val_loss: 0.1647 - val_accuracy: 0.9383
model = tf.keras.models.load_model(os.path.join(projdir,'models/CNN_12epoch_64x3_EarlyStop3'))


In [3]:
# Removed slicks from database, need to remove it from dataframe:
idx = image_df[image_df['studio handle'] == 'slickstyledsteel'].index
image_df = image_df.drop(idx)
image_df

Unnamed: 0,filename,artist handle,studio handle
0,studioartease/2020-05-02_13-53-39_UTC.jpg,gettattooedbymae,studioartease
1,studioartease/2020-04-24_12-52-25_UTC.jpg,tattoosbyrian,studioartease
2,studioartease/2020-04-17_23-41-00_UTC.jpg,magdalena.mardini,studioartease
3,studioartease/2020-04-09_16-06-58_UTC.jpg,tattoosbyrian,studioartease
4,studioartease/2020-04-02_21-50-04_UTC.jpg,cultmtl,studioartease
...,...,...,...
8092,blackrosetattoomtl/2017-03-30_19-37-23_UTC.jpg,christinastattoos,blackrosetattoomtl
8093,blackrosetattoomtl/2017-02-24_19-44-12_UTC.jpg,emtattooo,blackrosetattoomtl
8094,blackrosetattoomtl/2017-02-24_19-32-13_UTC.jpg,sarahlangtattoos,blackrosetattoomtl
8095,blackrosetattoomtl/2017-02-10_15-49-30_UTC.jpg,christinastattoos,blackrosetattoomtl


In [4]:
STUDIO_HANDLES = ['studioartease','tatouage_dfa_tattoos','minuitdix_tattoo',
                  'paradise.montreal','lovelesstattoo','tattooabyss',
                  'tatouageroyal','lechalettattoo','tattoostudio1974','sgtattoondg',
                  'bloodlinemontreal','blackrosetattoomtl']
STUDIO_NAMES = ['Studio Artease',' DFA Tattoos','Minuit Dix','Paradise',
               'Loveless','Tattoo Abyss','Tatouage Royal','Le Chalet','Tattoo 1974',
               'Saving Grace Tattoo','Bloodline Tattoo','The Black Rose']

In [6]:
# clean up: replace filename for any that don't work (multiple images in post)
IMG_SIZE = 200
replace_name = {}
for index in image_df.index:
    img_name = image_df.loc[index,'filename']
    img_array = cv2.imread(os.path.join(datadir,img_name))
    try:
        new_array = cv2.resize(img_array, (IMG_SIZE, IMG_SIZE))
    except:
        new_name = img_name[:-4] + '_1.jpg'
        replace_name[img_name] = new_name
        continue
image_df = image_df.replace(replace_name)    

In [7]:
print(len(replace_name))
print(len(image_df))

760
8097


In [8]:
# Load in images and get model predictions/add to dataframe
image_df['studio name'] = ''
image_df['prediction'] = ''
image_df['is_tat'] = ''
IMG_SIZE = 200
broken_id = []
col_names = image_df.columns
i = 0
for studio in STUDIO_HANDLES:
    studio_name = STUDIO_NAMES[i]
    df_st = image_df[image_df[col_names[2]] == studio]
    for index in df_st.index:
        img_name = df_st.loc[index,'filename']
        img_array = cv2.imread(os.path.join(datadir,img_name))
        try:
            new_array = cv2.resize(img_array, (IMG_SIZE, IMG_SIZE))
        except:
            broken_id.append(index)
            continue
        data = np.array(new_array).reshape(-1, IMG_SIZE, IMG_SIZE, 3)
        data = data/255.0
        pred = model.predict(data)
        predval = float(pred)
        image_df.loc[index,'prediction'] = predval
        # set is_tat threshold:
        if predval > 0.5:
            image_df.loc[index,'is_tat'] = 1
        else:
            image_df.loc[index,'is_tat'] = 0
        image_df.loc[index,'studio name'] = studio_name
    i += 1
        

# Had some posts that only downloaded text, no jpg: delete these
image_df = image_df.drop(broken_id, axis=0)

In [9]:
print(len(broken_id))
print(len(image_df))

103
7994


In [10]:
# Need to create a csv file of the tattoo images:
tattoo_df = image_df[image_df['is_tat'] == 1]

tattoo_df.to_csv(os.path.join(procdatadir,'tattoo_images.csv'))
image_df.to_csv(os.path.join(procdatadir,'all_images_cl.csv'))

In [11]:
tattoo_df

Unnamed: 0,filename,artist handle,studio handle,studio name,prediction,is_tat
1,studioartease/2020-04-24_12-52-25_UTC.jpg,tattoosbyrian,studioartease,Studio Artease,0.998512,1
2,studioartease/2020-04-17_23-41-00_UTC.jpg,magdalena.mardini,studioartease,Studio Artease,0.999967,1
5,studioartease/2020-03-29_15-34-08_UTC.jpg,gettattooedbymae,studioartease,Studio Artease,0.665399,1
6,studioartease/2020-03-24_14-16-42_UTC.jpg,magdalena.mardini,studioartease,Studio Artease,0.981827,1
7,studioartease/2020-03-17_19-28-03_UTC.jpg,artease,studioartease,Studio Artease,0.93761,1
...,...,...,...,...,...,...
8087,blackrosetattoomtl/2017-06-09_16-20-05_UTC.jpg,sarahlangtattoos,blackrosetattoomtl,The Black Rose,0.958405,1
8089,blackrosetattoomtl/2017-04-26_22-54-12_UTC.jpg,sabootattoos,blackrosetattoomtl,The Black Rose,0.708704,1
8090,blackrosetattoomtl/2017-04-22_16-54-50_UTC.jpg,emtattooo,blackrosetattoomtl,The Black Rose,0.826406,1
8091,blackrosetattoomtl/2017-04-20_20-44-52_UTC.jpg,christinastattoos,blackrosetattoomtl,The Black Rose,0.999256,1


In [21]:
tatdir = os.path.join(procdatadir,'Inklusive_database/train/')

# csv_name = os.path.join(procdatadir,'tattoo_images.csv')
# tattoo_df = pd.read_csv(csv_name)

replace_name = {}
for index in tattoo_df.index:
    img_name = tattoo_df.loc[index,'filename']
    filen = os.path.join(datadir, img_name)
    shutil.copy(filen,tatdir)
    
    # rename the filename column:
    studio_str = tattoo_df.loc[index,'studio handle']
    old_folder = studio_str + '/'
    new_name = img_name.replace(old_folder, '')
    replace_name[img_name] = new_name
        
tattoo_df = tattoo_df.replace(replace_name)          
tattoo_df.to_csv(os.path.join(tatdir,'tattoo_info.csv'))        