# Redo Big Run: apply small color 3-class CNN to mixed patches

This file is similar to DF_Filter.020
Use the CNN was trained on the obviously blue, green, black from DF_Filter.021      
Predict the 'mixed' patches i.e. not obviously blue, green, black from DF_Filter.021  
Generate csv file.

DF_Filter.021: 
* Read from 'raw' images. Make patches. 
* Write to 'cache/train' and 'cache/valid'.
* Write mixed uncertain images to 'patches'.
DF_Filter.022: 
* Train the CNN on {black,green,blue}={3930,2946,3410}
* use train/valid split 80:20.  
DF_Filter.023: 
* Run the CNN on 36739 files in 'patches'. 
* Write labels to 'predictions.csv' file.
* 

In [1]:
import time
import os
#  os.environ['CUDA_VISIBLE_DEVICES'] = "0"   # turns off GPU?
import glob
import cv2 # OpenCV-Python
from PIL import Image
Image.MAX_IMAGE_PIXELS = None
import numpy as np
import json
import tensorflow as tf
from tensorflow import keras
import keras.layers as kl
import pandas as pd
import matplotlib.pyplot as plt


In [2]:
DIR_IMAGES_IN = "/home/jrm/Martinez/images/patches/"
#DIR_IMAGES_IN = "/home/jrm/Martinez/images/tinytest/"   # just for testing
PATTERN_PATCHES = "*.jpg"
OUTPUT_CSV = "predictions.csv"
PATCH_SIZE=224  # matches VGG
IMAGE_SIZE=(PATCH_SIZE,PATCH_SIZE)
DIR_MODELS = "/home/jrm/Martinez/models/"
FILE_MODEL = "CNN_SMALL_REDO"
filepath=DIR_MODELS+FILE_MODEL
print("Will use model from "+filepath)
cnn = keras.models.load_model(filepath)

Will use model from /home/jrm/Martinez/models/CNN_SMALL_REDO


In [3]:
def get_image_names(path,pattern):
    paths = glob.glob(path+pattern)
    names = [os.path.basename(x) for x in paths]
    return names
FILENAMES_PATCHES = get_image_names(DIR_IMAGES_IN,PATTERN_PATCHES)

In [4]:
normalization_layer = kl.Rescaling(1.0/255.0)

In [None]:

# With keras models, augmentation and dropout are inactive during prediction.
import csv
csvpath = DIR_IMAGES_IN+OUTPUT_CSV
print("Patches to process: ",len(FILENAMES_PATCHES))
color_counts={'black':0,'blue':0,'green':0,'mixed':0}
pcount = mcount = 0
with open(csvpath, 'w', newline='') as csvfile:
    datawriter = csv.writer(csvfile, delimiter=',')
    start = time.time()
    for filename in FILENAMES_PATCHES:
        path = DIR_IMAGES_IN+filename
        img = keras.preprocessing.image.load_img(
            path, target_size=IMAGE_SIZE
        )
        img_array = keras.preprocessing.image.img_to_array(img)
        img_array = normalization_layer(img_array)
        img_array = tf.expand_dims(img_array, 0)  # create tensor
        # TODO: consider using predict_classes() instead
        predictions = cnn.predict(img_array)
        score = predictions[0]  # TODO: explore other array slices
        blackscore,bluescore,greenscore = score
        if blackscore >= 0.90:
            word = "black"
            color_counts['black'] += 1
        elif bluescore >= 0.90:
            word = "blue"
            color_counts['blue'] += 1
        elif greenscore >= 0.99:
            word = "green"
            color_counts['green'] += 1
        else:
            word = "mixed"
            color_counts['mixed'] += 1
        os.rename(path,DIR_IMAGES_IN+word+"/"+filename)
        datawriter.writerow( (filename, score[0], score[1], word) )
        pcount += 1
        mcount += 1
        if (mcount == 1000):
            print("At",pcount,color_counts)
            mcount = 0
    end = time.time()
print("Elapsed time:",end-start)
print("Total:",pcount)
print (color_counts)

Patches to process:  30232
At 1000 {'black': 318, 'blue': 361, 'green': 108, 'mixed': 213}
At 2000 {'black': 661, 'blue': 703, 'green': 228, 'mixed': 408}
At 3000 {'black': 1002, 'blue': 1054, 'green': 343, 'mixed': 601}
At 4000 {'black': 1371, 'blue': 1392, 'green': 434, 'mixed': 803}
At 5000 {'black': 1714, 'blue': 1734, 'green': 548, 'mixed': 1004}
At 6000 {'black': 2042, 'blue': 2122, 'green': 638, 'mixed': 1198}
At 7000 {'black': 2376, 'blue': 2473, 'green': 744, 'mixed': 1407}
At 8000 {'black': 2711, 'blue': 2851, 'green': 848, 'mixed': 1590}
At 9000 {'black': 3043, 'blue': 3225, 'green': 938, 'mixed': 1794}
At 10000 {'black': 3417, 'blue': 3565, 'green': 1032, 'mixed': 1986}
At 11000 {'black': 3752, 'blue': 3930, 'green': 1138, 'mixed': 2180}
At 12000 {'black': 4091, 'blue': 4301, 'green': 1258, 'mixed': 2350}
At 13000 {'black': 4447, 'blue': 4675, 'green': 1351, 'mixed': 2527}
At 14000 {'black': 4765, 'blue': 5055, 'green': 1457, 'mixed': 2723}
At 15000 {'black': 5101, 'blue': 

## Data subset was used to set thresholds

At .99 threshold: 78 black, 69 blue, 688 green, 650 uncertain  
At .95 threshold: 101 black, 90 blue, 864 green, 430 uncertain  
At .90 threshold: 108 black, 101 blue, 932 green, 344 uncertain  

At .9/.9/.995: 108 black, 101 blue, 585 green, 691 uncertain  
At .9/.9/.997: 108 black, 101 blue, 452 green, 824 uncertain  
At .9/.9/.998: 108 black, 101 blue, 283 green, 993 uncertain  
At .9/.9/.999: 108 black, 101 blue, 0 green, 1276 uncertain  