### Using the CNN model on database images

In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
import cv2
import os

In [2]:
DATADIR = '/Users/brynronalds/Insight/directory/data/raw/'

csv_name = os.path.join(DATADIR,'all_images.csv')
image_df = pd.read_csv(csv_name)
# clean the dataframe, there's an extra ID column
col_names = image_df.columns
image_df = image_df.drop([col_names[0]], axis=1)

# load the highest performing keras model:
# loss: 0.0796 - accuracy: 0.9728 - val_loss: 0.1647 - val_accuracy: 0.9383
model = tf.keras.models.load_model('CNN_12epoch_64x3_EarlyStop3')

In [3]:
STUDIO_HANDLES = ['studioartease','tatouage_dfa_tattoos','minuitdix_tattoo',
          'paradise.montreal','lovelesstattoo','tattooabyss',
          'tatouageroyal','lechalettattoo','tattoostudio1974','sgtattoondg']
STUDIO_NAMES = ['Studio Artease',' DFA Tattoos','Minuit Dix','Paradise',
               'Loveless','Tattoo Abyss','Tatouage Royal','Le Chalet','Tattoo 1974',
               'Saving Grace Tattoo']

In [4]:
# clean up: replace filename for any that don't work (multiple images in post)
replace_name = {}
for index in image_df.index:
    img_name = image_df.loc[index,'filename']
    img_array = cv2.imread(os.path.join(DATADIR,img_name))
    try:
        new_array = cv2.resize(img_array, (IMG_SIZE, IMG_SIZE))
    except:
        new_name = img_name[:-4] + '_1.jpg'
        replace_name[img_name] = new_name
        continue
image_df = image_df.replace(replace_name)    

In [5]:
# Load in images and get model predictions/add to dataframe
image_df['studio name'] = ''
image_df['prediction'] = ''
image_df['is_tat'] = ''
IMG_SIZE = 200
broken_id = []
col_names = image_df.columns
i = 0
for studio in STUDIO_HANDLES:
    studio_name = STUDIO_NAMES[i]
    df_st = image_df[image_df[col_names[2]] == studio]
    for index in df_st.index:
        img_name = df_st.loc[index,'filename']
        img_array = cv2.imread(os.path.join(DATADIR,img_name))
        try:
            new_array = cv2.resize(img_array, (IMG_SIZE, IMG_SIZE))
        except:
            broken_id.append(index)
            continue
        data = np.array(new_array).reshape(-1, IMG_SIZE, IMG_SIZE, 3)
        data = data/255.0
        pred = model.predict(data)
        predval = float(pred)
        image_df.loc[index,'prediction'] = predval
        # set is_tat threshold: note that the model is uber sensitive 
        # **model prediction weighted to non-tat, especially if 2 tattoos in image
        if predval > 0.001:
            image_df.loc[index,'is_tat'] = 1
        else:
            image_df.loc[index,'is_tat'] = 0
        image_df.loc[index,'studio name'] = studio_name
    i += 1
        

# Had some posts that only downloaded text, no jpg: delete these
image_df = image_df.drop(broken_id, axis=0)

In [6]:
# Need to create a csv file of the tattoo images:
tattoo_df = image_df[image_df['is_tat'] == 1]

tattoo_df.to_csv(os.path.join(DATADIR,'tattoo_images.csv'))
image_df.to_csv(os.path.join(DATADIR,'all_images_cl.csv'))