In [None]:
import os
from pathlib import Path
import random

import cv2
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from PIL import Image
from skimage import io
from torchvision import transforms as transforms
import torchvision.transforms.functional as tf

from window.utils.tiling import create_tile_list, split_locations_array
from window.models.yolo_for_inference import YoloClass
from yolo.trch_yolonet import YoloNetMeta
from window.utils.truths import windows_truth
from window.utils.inference_windows import process_annotation_df_negative_inference, create_windows_from_yolo, windows_to_whole_im, xc_to_xmn
from window.utils.drawing import draw_results_on_image

In [None]:
valid_whole_image_dir = "/data/old_home_dir/ChrissyF/GFRC/Test/whole_images/"
truth_file = "/data/old_home_dir/ChrissyF/GFRC/Test/00_GFRC_bboxes.csv"

In [None]:
output_base_dir = "/home/cmf21/pytorch_save/GFRC/Bin/meta_ci_end/"
nn = 149
experiment = 'meta_ci_end'
predict_results = False
meta_end = True
col_list = ['lowerci', 'upperci']

#output_base_dir = "/home/cmf21/pytorch_save/GFRC/Bin/rgb_baseline2/"
#nn = 163
#experiment = 'rgb_baseline2'
#predict_results = False
#meta_end = False
#col_list = []

In [None]:
predicted_object_loc = output_base_dir + 'boxes_out' + str(nn) + '_full.csv'
saveweightspath = output_base_dir + "testing_save_" + str(nn) + ".pt"
channels_in = 3
nclazz = 1
dataset = 'gfrc'
channels_in = channels_in + len(col_list)

In [None]:
output_dir = output_base_dir + "test_images_out/"
output_csv = output_dir + "test_results/results_matched_nms.csv"

In [None]:
image_files_jpg = list(Path(valid_whole_image_dir).glob("*.jpg"))
image_files_png = list(Path(valid_whole_image_dir).glob("*.png"))
image_files = image_files_jpg + image_files_png
image_files = [img.name for img in image_files]

In [None]:
# Process truth files
truths = pd.read_csv(truth_file)

truths.loc[:, 'filename'] = [strin.replace('/', '_') for strin in truths.file_loc]
truths = windows_truth(truths)


In [None]:
truths['oc'] = 0
truths

In [None]:
def split_filename(str_in):
    file_nm = str_in[:-4]
    file_splt = file_nm.split('_')
    file_out = file_splt[0] + '_' + file_splt[1] + '.jpg'
    tile_out = file_splt[2]
    return file_out, tile_out

In [None]:
if predict_results:
    yolo_model = YoloClass(wtpath=saveweightspath, channels=channels_in, nclazz=nclazz, meta_cols=col_list, meta_end=meta_end)
        
    windows_whole = pd.DataFrame(columns=['xc', 'yc', 'wid', 'hei', 'conf', 'class', 'xmn', 'xmx', 'ymn', 'ymx', 'filename'])

    for idx, fl in enumerate(image_files):
        # Per image
        print(fl, idx, "of 317")
        
        whole_im = io.imread(valid_whole_image_dir + fl, as_gray=False)

        orig_im_size = whole_im.shape
        
        # create tiles
        tilez = create_tile_list(whole_im)

        # get predictions from yolo
        boxes_whole_im = yolo_model.inference_on_image(tilez, 0.005, fl)

        windows_whole_im = windows_to_whole_im(boxes_whole_im)
        
        windows_whole_im['filename'] = fl

        windows_whole = pd.concat((windows_whole, windows_whole_im), axis=0)

In [None]:
image_data = pd.read_csv('/home/cmf21/pytorch_save/GFRC/preds_for_cnn.csv')
image_data.shape

In [None]:
if predict_results:
    windows_whole

In [None]:
output_dir = "/home/cmf21/pytorch_save/output_for_draft/"
windows_filename = experiment + '_test_windows.csv'
if predict_results:    
    windows_part = windows_whole[windows_whole.conf > 0.01]
    windows_part.to_csv(output_dir + windows_filename, index=False)
else:
    windows_whole = pd.read_csv(output_dir + windows_filename)

In [None]:
def process_truths(truths, dataset):
    truths.loc[:, 'filename'] = [strin.replace('/', '_') for strin in truths.file_loc]
    truths = windows_truth(truths)
    truths['oc'] = truths['oc'].add(1)
    truths['tru_class'] = truths['oc']
    return truths

In [None]:
gfrc_truth = pd.read_csv(truth_file)
gfrc_truth = process_truths(gfrc_truth, 'gfrc')
gfrc_truth

In [None]:
# find biggest and smallest truth
print(max(np.max(gfrc_truth.xmx - gfrc_truth.xmn), np.max(gfrc_truth.ymx - gfrc_truth.ymn)))
print(min(np.min(gfrc_truth.xmx - gfrc_truth.xmn), np.min(gfrc_truth.ymx - gfrc_truth.ymn)))
# find biggest and smallest area
print(np.max((gfrc_truth.xmx - gfrc_truth.xmn) * (gfrc_truth.ymx - gfrc_truth.ymn)))
print(np.min((gfrc_truth.xmx - gfrc_truth.xmn) * (gfrc_truth.ymx - gfrc_truth.ymn)))

In [None]:
too_small = int(0.75*11)
too_big = int(1.25*163)
area_too_small = int(0.75*319)
area_too_big = int(1.25*24287)
print(too_small, too_big, area_too_small, area_too_big)

In [None]:
if predict_results:
    gfrc_windows_in = windows_whole
else:
    gfrc_windows_in = pd.read_csv(output_dir + windows_filename)
gfrc_windows_in.shape[0]

In [None]:
#gfrc_windows_in = pd.read_csv(predicted_object_loc)
#filez = np.unique(gfrc_windows_in.file)

In [None]:
#output_dir = "/home/cmf21/pytorch_save/output_for_draft/"
#windows_filename = experiment + '_test_windows.csv'
#gfrc_windows_in = pd.read_csv(output_dir + windows_filename)

In [None]:
# remove too big and too small detections
gfrc_windows = gfrc_windows_in
gfrc_windows['xside'] = gfrc_windows.xmx - gfrc_windows.xmn
gfrc_windows['yside'] = gfrc_windows.ymx - gfrc_windows.ymn
gfrc_windows['minside'] = np.minimum(gfrc_windows.xside, gfrc_windows.yside)
gfrc_windows['maxside'] = np.maximum(gfrc_windows.xside, gfrc_windows.yside)
gfrc_windows['area'] = gfrc_windows.xside * gfrc_windows.yside
print(np.max(gfrc_windows.maxside), np.min(gfrc_windows.minside))
gfrc_windows = gfrc_windows[gfrc_windows.minside > too_small]
gfrc_windows = gfrc_windows[gfrc_windows.maxside < too_big]
gfrc_windows = gfrc_windows[gfrc_windows.area < area_too_big]
gfrc_windows = gfrc_windows[gfrc_windows.area > area_too_small]
gfrc_windows

In [None]:
gfrc_windows[gfrc_windows.yside == -1]

In [None]:
def nms_per_im(boxes_in, thresh, method='mean'):
    
    boxes_in = boxes_in.sort_values(by='conf', ascending=False)

    xmins = boxes_in.xmn
    xmaxs = boxes_in.xmx
    ymins = boxes_in.ymn
    ymaxs = boxes_in.ymx
    confs = boxes_in.conf
    clazs = boxes_in['class']

    boxes_ot = pd.DataFrame(columns=['xmn', 'xmx', 'ymn', 'ymx', 'conf', 'pred_class'])

    xmins = np.array(xmins.tolist())
    xmaxs = np.array(xmaxs.tolist())
    ymins = np.array(ymins.tolist())
    ymaxs = np.array(ymaxs.tolist())
    confs = np.array(confs.tolist())
    clazs = np.array(clazs.tolist())

    while len(xmins) > 0:

        xmn = xmins[0]
        xmns = np.array(xmins[1:])
        xmx = xmaxs[0]
        xmxs = np.array(xmaxs[1:])
        ymn = ymins[0]
        ymns = np.array(ymins[1:])
        ymx = ymaxs[0]
        ymxs = np.array(ymaxs[1:])
        cnf = confs[0]
        cnfs = np.array(confs[1:])
        clz = clazs[0]
        clzs = np.array(clazs[1:])

        ol_wid = np.minimum(xmx, xmxs) - np.maximum(xmn, xmns)
        ol_hei = np.minimum(ymx, ymxs) - np.maximum(ymn, ymns)

        ol_x = np.maximum(0, ol_wid)
        ol_y = np.maximum(0, ol_hei)

        distx = np.subtract(xmxs, xmns)
        disty = np.subtract(ymxs, ymns)
        bxx = xmx - xmn
        bxy = ymx - ymn

        ol_area = np.multiply(ol_x, ol_y)
        bx_area = bxx * bxy
        bxs_area = np.multiply(distx, disty)

        ious = np.divide(ol_area, np.subtract(np.add(bxs_area, bx_area), ol_area))
        mask_bxs = np.greater(ious, thresh)

        if np.sum(mask_bxs) > 0:
            box_ot = pd.DataFrame(index=range(1), columns=['xmn', 'xmx', 'ymn', 'ymx', 'conf', 'pred_class'])

            xmns = xmns[mask_bxs]
            xmxs = xmxs[mask_bxs]
            ymns = ymns[mask_bxs]
            ymxs = ymxs[mask_bxs]
            cnfs = cnfs[mask_bxs]

            if method == 'mean':
                box_ot.loc[0, 'xmn'] = np.array(np.mean(xmns), dtype=int)
                box_ot.loc[0, 'ymn'] = np.array(np.mean(ymns), dtype=int)
                box_ot.loc[0, 'xmx'] = np.array(np.mean(xmxs), dtype=int)
                box_ot.loc[0, 'ymx'] = np.array(np.mean(ymxs), dtype=int)
                box_ot.loc[0, 'conf'] = np.mean(cnfs)
                box_ot.loc[0, 'pred_class'] = clz
            elif method == 'first':
                box_ot.loc[0, 'xmn'] = xmns[0]
                box_ot.loc[0, 'ymn'] = ymns[0]
                box_ot.loc[0, 'xmx'] = xmxs[0]
                box_ot.loc[0, 'ymx'] = ymxs[0]
                box_ot.loc[0, 'conf'] = np.max(cnfs)
                box_ot.loc[0, 'pred_class'] = clz
            else:
                box_ot.loc[0, 'xmn'] = np.min(xmns)
                box_ot.loc[0, 'ymn'] = np.min(ymns)
                box_ot.loc[0, 'xmx'] = np.max(xmxs)
                box_ot.loc[0, 'ymx'] = np.max(ymxs)
                box_ot.loc[0, 'conf'] = np.max(cnfs)
                box_ot.loc[0, 'pred_class'] = clz

            mask_out = np.repeat(False, len(xmins))
            mask_out[0] = True
            mask_out[1:] = mask_bxs
            mask_out = np.logical_not(mask_out)

            xmins = xmins[mask_out]
            xmaxs = xmaxs[mask_out]
            ymins = ymins[mask_out]
            ymaxs = ymaxs[mask_out]
            confs = confs[mask_out]
            clazs = clazs[mask_out]
            
        else:
            box_ot = pd.DataFrame(index=range(1), columns=['xmn', 'xmx', 'ymn', 'ymx', 'conf', 'pred_class'])

            box_ot.loc[0, 'xmn'] = xmn
            box_ot.loc[0, 'ymn'] = ymn
            box_ot.loc[0, 'xmx'] = xmx
            box_ot.loc[0, 'ymx'] = ymx
            box_ot.loc[0, 'conf'] = cnf
            box_ot.loc[0, 'pred_class'] = clz

            mask_out = np.repeat(False, len(xmins))
            mask_out[0] = True
            mask_out = np.logical_not(mask_out)
            
            xmins = xmins[mask_out]
            xmaxs = xmaxs[mask_out]
            ymins = ymins[mask_out]
            ymaxs = ymaxs[mask_out]
            confs = confs[mask_out]
            clazs = clazs[mask_out]
            
        #box_ot = box_ot.reset_index(drop=True)
        boxes_ot = pd.concat((boxes_ot, box_ot), axis=0, sort=False)

    boxes_ot.loc[:, 'filename'] = boxes_in.filename.iloc[0]

    return boxes_ot

In [None]:
def nms_for_yolo(windows_df, nms_thresh, method):
    images = np.unique(windows_df.filename)
    windows_all_ims = pd.DataFrame(columns=['xmn', 'xmx', 'ymn', 'ymx', 'conf', 'pred_class', 'filename'])
    for im in images:
        windows_im = windows_df[windows_df.filename == im]
        windows_im = nms_per_im(windows_im, nms_thresh, method)
        windows_all_ims.append(windows_im)
        windows_all_ims = pd.concat((windows_all_ims, windows_im), axis=0, ignore_index=True, sort=False)
    return windows_all_ims

In [None]:
def intersection_over_union(box1, box2):
    # determine the (x, y)-coordinates of the intersection rectangle
    xa = max(box1.xmn, box2.xmn)
    xb = min(box1.xmx, box2.xmx)
    ya = max(box1.ymn, box2.ymn)
    yb = min(box1.ymx, box2.ymx)
    # compute the area of intersection rectangle
    inter_area = max(0, xb - xa + 1) * max(0, yb - ya + 1)
    # compute the area of both the prediction and ground-truth
    box1_area = (box1.xmx - box1.xmn + 1) * (box1.ymx - box1.ymn + 1)
    box2_area = (box2.xmx - box2.xmn + 1) * (box2.ymx - box2.ymn + 1)
    # compute the intersection over union 
    iou = inter_area / float(box1_area + box2_area - inter_area)
    # return the intersection over union value
    return iou

In [None]:
def match_to_truth_im(detections_df, truth_df, iou_threshold):
    
    if detections_df.shape[0] > 0:
        results_out = pd.DataFrame(columns = ['xmn', 'xmx', 'ymn', 'ymx', 'conf', 'filename', 'pred_class', 
                                              'tru_class'])
        results_per_im = detections_df[['xmn', 'xmx', 'ymn', 'ymx', 'conf', 'filename', 'pred_class']]
        results_per_im = results_per_im.reset_index(drop=True)
        truths_per_im = truth_df.reset_index(drop=True)
        # best match stores the detection with the highest iou
        best_match = [False] * results_per_im.shape[0]
        # true match stores if the truth overlaps with any detections
        true_match = [False] * truth_df.shape[0]
        # matchz stores any matches that overlap but aren't the best overlap (not double counting TP, but not adding to FP)
        matchz = np.array([True] * results_per_im.shape[0])

        for idx, tru in truths_per_im.iterrows():
            iouz = []
            for res_idx, result in results_per_im.iterrows():
                iou = intersection_over_union(tru, result)
                iouz.append(iou)
            iou_ind = np.argmax(iouz)
            if iouz[iou_ind] > iou_threshold:
                if not best_match[iou_ind]: 
                    best_iou_res = results_per_im.iloc[iou_ind:(iou_ind+1), :]
                    best_iou_res = best_iou_res.reset_index(drop=True)
                    best_iou_res.loc[:, 'confmat'] = 'TP'
                    true_box = f'xmin: {tru.xmn}; xmax:{tru.xmx}; ymin: {tru.ymn}; ymax: {tru.ymx}'
                    best_iou_res.loc[:, 'tru_box'] = true_box
                    best_iou_res.loc[:, 'tru_class'] = tru.tru_class
                    results_out = pd.concat((results_out, best_iou_res), axis=0, ignore_index=True, sort=False)
                    best_match[iou_ind] = True
                    true_match[idx] = True
            # matchz removes any matches that overlap but are not the most overlapping
            match_mask = np.array(iouz) > iou_threshold
            matchz[match_mask] = False

        # use matchz to filter results to keep only those that don't overlap with truths
        results_per_im = results_per_im[matchz]
        results_per_im = results_per_im.reset_index(drop=True)

        if results_per_im.shape[0] > 0:
            results_per_im['confmat'] = 'FP'
            results_per_im['tru_box'] = ''
            results_per_im['tru_class'] = 0
        results_out = pd.concat((results_out, results_per_im), axis=0, ignore_index=True, sort=False)  
        true_match = np.array(true_match)
        true_match = np.logical_not(true_match)
        if np.sum(true_match) > 0:
            false_negatives = truth_df[['xmn', 'xmx', 'ymn', 'ymx', 'filename', 'tru_class']]
            false_negatives = false_negatives[true_match]
            false_negatives = false_negatives.reset_index(drop=True)
            false_negatives.loc[:, 'conf'] = 1.0
            false_negatives.loc[:, 'confmat'] = 'FN'
            false_negatives.loc[:, 'tru_box'] = ''
            false_negatives.loc[:, 'pred_class'] = 0
            results_out = pd.concat((results_out, false_negatives), axis=0, ignore_index=True, sort=False)
        results_out = results_out.reset_index(drop=True)
    else:
        results_out = truth_df.loc[:, ['xmn', 'xmx', 'ymn', 'ymx', 'filename', 'tru_class']]
        results_out.columns = ['xmn', 'xmx', 'ymn', 'ymx', 'filename', 'tru_class']
        results_out.loc[:, 'conf'] = 1
        results_out.loc[:, 'confmat'] = 'FN'
        results_out.loc[:, 'tru_box'] = ''
        results_out.loc[:, 'pred_class'] = 0

    return results_out

In [None]:
def match_to_truth(detections_df, truth_df, valid_whole_image_dir, iou_threshold):
    image_files_jpg = list(Path(valid_whole_image_dir).glob("*.jpg"))
    image_files_png = list(Path(valid_whole_image_dir).glob("*.png"))
    image_files = image_files_jpg + image_files_png
    image_files = [img.name for img in image_files]
    matched_results = pd.DataFrame(columns=['xmn', 'xmx', 'ymn', 'ymx', 'conf', 'filename', 'confmat', 'tru_box', 'pred_class', 'tru_class'])
    
    for im in image_files:
        detections_im = detections_df[detections_df.filename == im]
        truth_im = truth_df[truth_df.filename == im]
        if detections_im.shape[0] > 0:
            # detections and truths need to match
            if truth_im.shape[0] > 0:
                match_results_im = match_to_truth_im(detections_im, truth_im, iou_threshold)
            # detections and no truths - all detections false postive
            else:
                match_results_im = detections_im[['xmn', 'xmx', 'ymn', 'ymx', 'conf', 'filename', 'pred_class']]
                match_results_im['confmat'] = 'FP'
                match_results_im['tru_box'] = ''
                match_results_im['tru_class'] = 0
        else:
            # no detections and truths - all truths false negatives
            if truth_im.shape[0] > 0:
                match_results_im = truth_im[['xmn', 'xmx', 'ymn', 'ymx', 'filename', 'tru_class']]
                match_results_im['conf'] = 1.0
                match_results_im['confmat'] = 'FN'
                match_results_im['tru_box'] = ''
                match_results_im['pred_class'] = 0
            else:
                match_results_im = pd.DataFrame(columns=['xmn', 'xmx', 'ymn', 'ymx', 'conf', 'filename', 'confmat', 'tru_box', 'pred_class', 'tru_class'])
        matched_results = pd.concat((matched_results, match_results_im), axis=0, ignore_index=True, sort=False)
    
    return matched_results

In [None]:
def calculate_threshold_results(windows_df, truth_df, im_dir, nms_threshold=0.05, iou_threshold=0.25, method='first', start_thresh=0.01, nsteps=100):
    threshes = np.linspace(start_thresh, 1, nsteps)
    threshold_array = np.zeros((len(threshes), 8))
    image_files_jpg = list(Path(im_dir).glob("*.jpg"))
    image_files_png = list(Path(im_dir).glob("*.png"))
    image_files = image_files_jpg + image_files_png
    image_files = [img.name for img in image_files]
    for idx, th in enumerate(threshes):
        print(th)
        detections_th = windows_df[windows_df.conf > th]
        detections_th = nms_for_yolo(detections_th, nms_threshold, method)
        result_th = match_to_truth(detections_th, truth_df, im_dir, iou_threshold)
        TP = np.sum(result_th.confmat=="TP")
        FP = np.sum(result_th.confmat=="FP")
        FN = np.sum(result_th.confmat=="FN")
        RE = TP / (TP + FN)
        PR = TP / (TP + FP)
        MR = 1 - RE
        FPPI = FP / len(image_files)

        list_out = [th, TP, FP, FN, RE, PR, MR, FPPI]

        threshold_array[idx, :] = list_out 
        
    threshold_metrics = pd.DataFrame(threshold_array, columns=['threshold', 'TP', 'FP', 'FN', 'RE', 'PR', 'MR', 'FPPI'])
    return threshold_metrics

In [None]:
if predict_results:
    gfrc_metrics = calculate_threshold_results(gfrc_windows, gfrc_truth, valid_whole_image_dir, 0.25, 0.25, 'mean', 0.05, 96)

In [None]:
if predict_results:
    gfrc_metrics.to_csv(output_dir + 'gfrc_' + experiment + "_test_metrics.csv", index=False)
else:
    gfrc_metrics = pd.read_csv(output_dir + 'gfrc_' + experiment + "_test_metrics.csv")
gfrc_metrics[gfrc_metrics.RE > 0.4]

In [None]:

if experiment == 'rgb_baseline2':
    gfrc_conf_threshold = 0.2 #rgb baseline final
else:
    gfrc_conf_threshold = 0.25 #ci?
gfrc_metrics[gfrc_metrics.RE > 0.6]

In [None]:
gfrc_conf_threshold

In [None]:
def single_threshold_results(windows_df, truth_df, im_dir, thresh, nms_threshold=0.05, iou_threshold=0.25, method='first'):

    image_files_jpg = list(Path(im_dir).glob("*.jpg"))
    image_files_png = list(Path(im_dir).glob("*.png"))
    image_files = image_files_jpg + image_files_png
    image_files = [img.name for img in image_files]
    detections_th = windows_df[windows_df.conf > thresh]
    detections_th['class'] = detections_th['class'].add(1)
    detections_th = nms_for_yolo(detections_th, nms_threshold, method)
    result_th = match_to_truth(detections_th, truth_df, im_dir, iou_threshold)

    return result_th

In [None]:
if predict_results:
    gfrc_results = single_threshold_results(gfrc_windows, gfrc_truth, valid_whole_image_dir, gfrc_conf_threshold, 0.25, 0.25, 'mean')
    gfrc_results.to_csv(output_dir + 'gfrc_' + experiment + "_test_results.csv", index=False)
else:
    gfrc_results = pd.read_csv(output_dir + 'gfrc_' + experiment + "_test_results.csv")

In [None]:
print(np.sum(gfrc_results.confmat == "TP"), np.sum(gfrc_results.confmat == "FP")/316)

In [None]:
#gfrc_results_rgb = pd.read_csv(output_dir + "gfrc_gfrc_rgb_bin_results.csv")
#print(np.sum(gfrc_results_rgb.confmat == "TP"), np.sum(gfrc_results_rgb.confmat == "FP")/316)

In [None]:
def get_image_stats(detections_df):
    unique_images = np.unique(detections_df.filename)
    tpz = []
    fpz = []
    fnz = []
    for fl in unique_images:
        results_per_im = detections_df[detections_df['filename'] == fl]
        tpz.append(np.sum(results_per_im.confmat == 'TP'))
        fpz.append(np.sum(results_per_im.confmat == 'FP'))
        fnz.append(np.sum(results_per_im.confmat == 'FN'))

    TPz = np.reshape(np.array(tpz), (len(tpz), 1))
    FPz = np.reshape(np.array(fpz), (len(fpz), 1))
    FNz = np.reshape(np.array(fnz), (len(fnz), 1))
    UIz = np.reshape(unique_images, (len(unique_images), 1))
    df_out = pd.DataFrame(np.hstack((UIz, TPz, FPz, FNz)), columns=['filename', 'TP', 'FP', 'FN'])
    
    return df_out

In [None]:
if predict_results:
    gfrc_image_results = get_image_stats(gfrc_results)
    gfrc_image_results.to_csv(output_dir + 'gfrc_' + experiment + "_test_image_results.csv", index=False)
else:
    gfrc_image_results = pd.read_csv(output_dir + 'gfrc_' + experiment + "_test_image_results.csv")

In [None]:
def draw_res(results_all_ims, valid_whole_image_dir, image_out_dir, dataset, experiment):
    
    Path(image_out_dir).mkdir(parents=True, exist_ok=True)

    image_files_jpg = list(Path(valid_whole_image_dir).glob("*.jpg"))
    image_files_png = list(Path(valid_whole_image_dir).glob("*.png"))
    image_files = image_files_jpg + image_files_png
    image_files = [img.name for img in image_files]

    for fl in image_files:
        # Per image
        whole_im = cv2.imread(valid_whole_image_dir + fl)
        whole_im = cv2.cvtColor(whole_im, cv2.COLOR_BGR2RGB)

        fl_png = fl

        # calculate results
        results_per_im = results_all_ims[results_all_ims.filename == fl]

        # draw results on image
        image_out = draw_results_on_image(whole_im, results_per_im)
        #image_out = cv2.resize(image_out, (1840, 1228))
        image_out = cv2.cvtColor(image_out, cv2.COLOR_BGR2RGB)
        im_name = fl[:-4] + '_' + experiment + fl[-4:]
        cv2.imwrite(str(image_out_dir + im_name), image_out)
        cv2.imwrite(str(image_out_dir + fl), image_out)
        

In [None]:
def get_fpz(results_all_ims, valid_whole_image_dir, image_out_dir, dataset, experiment):

    fpz_out = []
    
    Path(image_out_dir).mkdir(parents=True, exist_ok=True)
    
    fn_images = results_all_ims[results_all_ims.confmat=='FP']
    
    image_files = np.unique(fn_images.filename).tolist()
    image_files = [Path(fl) for fl in image_files]
    image_files = [img.name for img in image_files]

    for fl in image_files:
        # Per image
        whole_im = cv2.imread(valid_whole_image_dir + fl)
        whole_im = cv2.cvtColor(whole_im, cv2.COLOR_BGR2RGB)

        fl_png = fl

        # calculate results
        results_per_im = results_all_ims[results_all_ims.filename == fl]

        # create list of all false negatives
        for rw in range(results_per_im.shape[0]):
            row = results_per_im.iloc[rw, :]
            if row.confmat == 'FP':
                xmn = max(0, row.xmn - 50)
                ymn = max(0, row.ymn - 50)
                xmx = min(whole_im.shape[1], row.xmx + 50)
                ymx = min(whole_im.shape[0], row.ymx + 50)
                fp_window = whole_im[row.ymn:row.ymx, row.xmn:row.xmx]
                f_window = cv2.resize(fp_window, (fp_window.shape[1]*2, fp_window.shape[0]*2))
                fpz_out.append(fp_window)
        
    return fpz_out

In [None]:
def get_fnz(results_all_ims, valid_whole_image_dir, image_out_dir, dataset, experiment):

    fnz_out = []
    
    Path(image_out_dir).mkdir(parents=True, exist_ok=True)
    
    fn_images = results_all_ims[results_all_ims.confmat=='FN']
    
    image_files = np.unique(fn_images.filename).tolist()
    image_files = [Path(fl) for fl in image_files]
    image_files = [img.name for img in image_files]

    for fl in image_files:
        # Per image
        whole_im = cv2.imread(valid_whole_image_dir + fl)
        whole_im = cv2.cvtColor(whole_im, cv2.COLOR_BGR2RGB)

        fl_png = fl

        # calculate results
        results_per_im = results_all_ims[results_all_ims.filename == fl]

        # create list of all false negatives
        for rw in range(results_per_im.shape[0]):
            row = results_per_im.iloc[rw, :]
            if row.confmat == 'FN':
                xmn = max(0, row.xmn - 50)
                ymn = max(0, row.ymn - 50)
                xmx = min(whole_im.shape[1], row.xmx + 50)
                ymx = min(whole_im.shape[0], row.ymx + 50)
                fn_window = whole_im[row.ymn:row.ymx, row.xmn:row.xmx]
                fn_window = cv2.resize(fn_window, (fn_window.shape[1]*2, fn_window.shape[0]*2))
                fnz_out.append(fn_window)
        
    return fnz_out

In [None]:
gfrc_image_outdir = "/home/cmf21/pytorch_save/output_for_draft/" + 'gfrc_' + experiment + '_25_25/'
draw_dir = "/home/cmf21/pytorch_save/output_for_draft/images_to_draw_test/"
Path(gfrc_image_outdir).mkdir(parents=True, exist_ok=True)

In [None]:
draw_res(gfrc_results, draw_dir + 'GFRC/', gfrc_image_outdir, 'gfrc', experiment)
gfrc_fnz = get_fnz(gfrc_results, valid_whole_image_dir, gfrc_image_outdir, 'gfrc', experiment)
gfrc_fpz = get_fpz(gfrc_results, valid_whole_image_dir, gfrc_image_outdir, 'gfrc', experiment)

In [None]:
gfrc_image_results[gfrc_image_results.TP > 5]

In [None]:
gfrc_image_results['recall'] = gfrc_image_results['TP'] / (gfrc_image_results['TP'] + gfrc_image_results['FN'] + 0.0001)

In [None]:
gfrc_image_results[gfrc_image_results.FP>20]

In [None]:
print(gfrc_image_results[gfrc_image_results.filename == 'Z81_Img06976.jpg'])
Image.open(gfrc_image_outdir + 'Z81_Img06976_' + experiment + '.jpg')

In [None]:
Image.open("/home/cmf21/pytorch_save/output_for_draft/" + 'gfrc_rgb_baseline2_25_25/' + 'Z81_Img06976_rgb_baseline2.jpg')

In [None]:
print(gfrc_image_results[gfrc_image_results.filename == 'Z108_Img00684.jpg'])
Image.open(gfrc_image_outdir + 'Z108_Img00684_' + experiment + '.jpg')

In [None]:
print(gfrc_image_results[gfrc_image_results.filename == 'Z110_Img02292.jpg'])
Image.open(gfrc_image_outdir + 'Z110_Img02292_' + experiment + '.jpg')

In [None]:
print(gfrc_image_results[gfrc_image_results.filename == 'Z123_Img12080.jpg'])
Image.open(gfrc_image_outdir + 'Z123_Img12080_' + experiment + '.jpg')

In [None]:
gfrc_image_results_rgb = pd.read_csv("/home/cmf21/pytorch_save/output_for_draft/" + 'gfrc_' + 'rgb_baseline2' + "_test_image_results.csv")

In [None]:
gfrc_image_results_rgb[gfrc_image_results_rgb.TP>5]

In [None]:
gfrc_image_results[gfrc_image_results.TP>5]

In [None]:
def count_funk(df):
    FP0 = np.sum(df.FP == 0)
    FP1 = np.sum(np.logical_and(df.FP > 0, df.FP < 6))
    FP5 = np.sum(np.logical_and(df.FP > 5, df.FP < 11))
    FP10 = np.sum(np.logical_and(df.FP > 10, df.FP < 21))
    FP20 = np.sum(np.logical_and(df.FP > 20, df.FP < 51))
    FP50 = np.sum(df.FP > 50)
    return (FP0, FP1, FP5, FP10, FP20, FP50)

count_funk(gfrc_image_results)

In [None]:
count_funk(gfrc_image_results_rgb)

In [None]:
gfrc_image_results_val = pd.read_csv("/home/cmf21/pytorch_save/output_for_draft/" + 'gfrc_' + experiment + "_image_results.csv")
count_funk(gfrc_image_results_val)

In [None]:
from PIL import Image
# final
# FN Z138_Img02888.jpg
# FP Z120_Img11234.jpg
# TP Z124_Img13083.jpg
Image.open(gfrc_image_outdir + 'Z138_Img02888_' + experiment + '.jpg')

In [None]:

def create_mosaic(list_in, mosaic_tuple, image_size, multiplier=1, grey=False):
    rowz = mosaic_tuple[0]
    colz = mosaic_tuple[1]
    img_rw = image_size[0] 
    img_cl = image_size[1] 
    img_asp =  img_rw / img_cl
    channels = 3
    if grey:
        channels = 1
    combined_im = np.zeros((img_rw * rowz * multiplier, img_cl * colz * multiplier, channels), dtype=np.uint8)
    sample_list = random.sample(list_in, rowz*colz)
    for idx, im in enumerate(sample_list):
        im_shp = im.shape
        im_rw = im_shp[0]
        im_cl = im_shp[1]
        im_asp = im_rw / im_cl
        if im_asp > img_asp:
            tot_cls = im_shp[0] / img_asp
            ncls_to_add = int((tot_cls - im_shp[1]) / 2)
            cls_to_add = np.ones((im_shp[0], ncls_to_add, 3)) * 255
            border_im = np.hstack((cls_to_add, im, cls_to_add))
        else:
            tot_rws = im_shp[1] * img_asp
            nrws_to_add = int((tot_rws - im_shp[0]) / 2)
            rws_to_add = np.ones((nrws_to_add, im_shp[1], 3)) * 255
            border_im = np.vstack((rws_to_add, im, rws_to_add))

        im_reshape = cv2.resize(border_im, (img_cl * multiplier, img_rw * multiplier))
        col = idx % colz
        row = idx // colz
        x1 = col * img_cl * multiplier
        x2 = (col + 1) * img_cl * multiplier
        y1 = row * img_rw * multiplier
        y2 = (row + 1) * img_rw * multiplier
        combined_im[y1:y2, x1:x2, :] = im_reshape
        
    return combined_im

In [None]:
len(gfrc_fnz)

In [None]:
fnz_combined = create_mosaic(gfrc_fnz, (4, 8), (60*2, 60*2))
Image.fromarray(fnz_combined)

In [None]:
fpz_combined = create_mosaic(gfrc_fpz, (4, 8), (60*2, 60*2))
Image.fromarray(fpz_combined)

In [None]:
outdir = "/home/cmf21/pytorch_save/output_for_draft/"
outnam_g = 'gfrc_' + experiment + 'test_mosaic'
fn_out_path = outdir + outnam_g + '_fn.jpg'
fp_out_path = outdir + outnam_g + '_fp.jpg'
fnz_combined = cv2.cvtColor(fnz_combined, cv2.COLOR_BGR2RGB)
cv2.imwrite(fn_out_path, fnz_combined)
fpz_combined = cv2.cvtColor(fpz_combined, cv2.COLOR_BGR2RGB)
cv2.imwrite(fp_out_path, fpz_combined)

In [None]:

def conf_mat_plot_heatmap_blankTN(cm, display_labels, title_in, heatmap_type='true'):
    if len(display_labels) == 2:
        fig, ax = plt.subplots(figsize=(6,4.5))
    else:
        fig, ax = plt.subplots(figsize=(12,9))
    n_classes = cm.shape[0]
    cmap = 'Greys'

    if heatmap_type == 'percent':
        sum_vals = np.sum(cm)
    elif heatmap_type == 'true':
        sum_vals = np.reshape(np.repeat(np.sum(cm, axis=1), n_classes), (n_classes, n_classes))
    elif heatmap_type == 'pred':
        sum_vals = np.reshape(np.tile(np.sum(cm, axis=0), n_classes), (n_classes, n_classes))
        print(sum_vals)

    color_mapping = np.array(np.multiply(np.divide(cm, sum_vals), 255), np.uint8)

    for i in range(n_classes):
        for j in range(n_classes):
            if i == 0 and j == 0:
                text_cm = ""
            else:
                text_cm = format(cm[i, j], ',')
            txt_color = [1, 1, 1] if color_mapping[i, j] > 100 else [0, 0, 0]
            ax.text(j, i, text_cm, ha="center", va="center", color=txt_color, fontsize=18)
            ax.axhline(i - .5, color='black', linewidth=1.0)
            ax.axvline(j - .5, color='black', linewidth=1.0)

    ax.matshow(color_mapping, cmap=cmap)

    ax.set_xlabel("Predicted label", fontsize=16)
    ax.set_ylabel("True label", fontsize=16)
    ax.set_xticks(np.arange(n_classes))
    ax.set_yticks(np.arange(n_classes))
    ax.set_xticklabels(display_labels, fontsize=16)
    ax.set_yticklabels(display_labels, fontsize=16)
    ax.set_title(title_in, fontsize=16)
    ax.tick_params(bottom=True, labelbottom=True, top=False, labeltop=False)

    ax.set_ylim((n_classes - 0.5, -0.5))

    return ax

In [None]:
def conf_mat_raw(true, predicted, labels):
    mat_out = np.empty((len(labels), len(labels)))
    for i, row in enumerate(labels):
        preds_row = predicted[true == row]
        for j, col in enumerate(labels):
            mat_out[i, j] = np.sum(preds_row == col)
    mat_out = np.array(mat_out, dtype=np.int)
    return mat_out

In [None]:
def save_conf_mat_plot(cm, labels, title, results_dir, prefix):
    n_class = len(labels)
    cm_all = np.reshape(np.array(cm, dtype=np.int), (n_class, n_class))
    cm_out = conf_mat_plot_heatmap_blankTN(cm_all, labels, title)
    out_path = prefix + '_confidence_matrix.png'
    results_dir = Path(results_dir)
    cm_out.get_figure().savefig(results_dir / out_path)

In [None]:
gfrc_results['tru_class_bin'] = np.array(gfrc_results.tru_class > 0, dtype=np.int)
cm_gfrc = conf_mat_raw(gfrc_results.tru_class_bin, gfrc_results.pred_class, [0,1])

In [None]:
outdir = "/home/cmf21/pytorch_save/output_for_draft/"
title = ""
labels = ["none", "animal"]
affix = 'gfrc_' + experiment + '_test'
# blank
# springbok 1 (0+1) code=bok
# oryx 2 (1+1) code=oryx
# kudu 3 (2+1) code=kudu
# zebra 4 (3+1) code=zeb
# ostrich 5 (4+1) code=ost
# unidentified 6 (5+1) code=unid
save_conf_mat_plot(cm_gfrc, labels, title, outdir, affix)

In [None]:
Image.open(outdir + 'gfrc_rgb_baseline2_test_confidence_matrix.png')

In [None]:
Image.open(outdir + 'gfrc_meta_ci_end_test_confidence_matrix.png')

In [None]:
Image.open(outdir + 'gfrc_gfrc_rgb_bin_confidence_matrix.png')

In [None]:
Image.open(outdir + 'gfrc_gfrc_meta_prediction_confidence_matrix.png')

In [None]:
Image.open(outdir + 'gfrc_meta_prediction_end_confidence_matrix.png')

In [None]:
Image.open(outdir + 'gfrc_meta_prediction_norm_confidence_matrix.png')

In [None]:
Image.open(outdir + 'gfrc_meta_prediction_ci_confidence_matrix.png')

In [None]:
Image.open(outdir + 'gfrc_meta_pred_ci_spread_confidence_matrix.png')

In [None]:
import matplotlib.pyplot as plt
import numpy as np

conf_mat_plot_heatmap_blankTN(np.array([[0, 3391],[143,343]]), 
                              ["none", "animal"], "")

In [None]:
import matplotlib.pyplot as plt

def conf_mat_plot_heatmap_blankTN2(cm, display_labels, heatmap_type='true'):
    if len(display_labels) == 2:
        fig, ax = plt.subplots(figsize=(6,4.5))
    else:
        fig, ax = plt.subplots(figsize=(12,9))
    n_classes = cm.shape[0]
    cmap = 'Greys'

    if heatmap_type == 'percent':
        sum_vals = np.sum(cm)
    elif heatmap_type == 'true':
        sum_vals = np.reshape(np.repeat(np.sum(cm, axis=1), n_classes), (n_classes, n_classes))
    elif heatmap_type == 'pred':
        sum_vals = np.reshape(np.tile(np.sum(cm, axis=0), n_classes), (n_classes, n_classes))
        print(sum_vals)

    color_mapping = np.array(np.multiply(np.divide(cm, sum_vals), 255), np.uint8)

    for i in range(n_classes):
        for j in range(n_classes):
            if i == 0 and j == 0:
                text_cm = ""
            else:
                text_cm = format(cm[i, j], ',')
            txt_color = [1, 1, 1] if color_mapping[i, j] > 100 else [0, 0, 0]
            ax.text(j, i, text_cm, ha="center", va="center", color=txt_color, fontsize=18)
            ax.axhline(i - .5, color='black', linewidth=1.0)
            ax.axvline(j - .5, color='black', linewidth=1.0)

    ax.matshow(color_mapping, cmap=cmap)

    ax.set_xlabel("Predicted label", fontsize=16)
    ax.set_ylabel("True label", fontsize=16)
    ax.set_xticks(np.arange(n_classes))
    ax.set_yticks(np.arange(n_classes))
    ax.set_xticklabels(display_labels, fontsize=16)
    ax.set_yticklabels(display_labels, fontsize=16)
    ax.tick_params(bottom=True, labelbottom=True, top=False, labeltop=False)

    ax.set_ylim((n_classes - 0.5, -0.5))

    return ax

In [None]:
from pathlib import Path

def save_conf_mat_plot_ed(cm, labels, results_dir, prefix):
    n_class = len(labels)
    cm_all = np.reshape(np.array(cm, dtype=np.int), (n_class, n_class))
    cm_out = conf_mat_plot_heatmap_blankTN2(cm_all, labels)
    out_path = prefix + '_confidence_matrix.png'
    results_dir = Path(results_dir)
    cm_out.get_figure().savefig(results_dir / out_path)

In [None]:
import pandas as pd

output_dir = "/home/cmf21/pytorch_save/output_for_draft/"
exp1 = "gfrc_meta_prediction"
gfrc_results1 = pd.read_csv(output_dir + 'gfrc_' + exp1 + "_results.csv")

In [None]:
import numpy as np

gfrc_results1['tru_class_bin'] = np.array(gfrc_results1.tru_class > 0, dtype=np.int)
cm_gfrc1 = conf_mat_raw(gfrc_results1.tru_class_bin, gfrc_results1.pred_class, [0,1])
cm_gfrc1

In [None]:
labels = ["none", "animal"]

save_conf_mat_plot_ed(cm_gfrc1, labels, output_dir, exp1)

In [None]:
exp2 = "meta_prediction_end"
gfrc_results2 = pd.read_csv(output_dir + 'gfrc_' + exp2 + "_results.csv")
gfrc_results2['tru_class_bin'] = np.array(gfrc_results2.tru_class > 0, dtype=np.int)
cm_gfrc2 = conf_mat_raw(gfrc_results2.tru_class_bin, gfrc_results2.pred_class, [0,1])
save_conf_mat_plot_ed(cm_gfrc2, labels, output_dir, exp2)

In [None]:
exp3 = "meta_prediction_ci"
gfrc_results3 = pd.read_csv(output_dir + 'gfrc_' + exp3 + "_results.csv")
gfrc_results3['tru_class_bin'] = np.array(gfrc_results3.tru_class > 0, dtype=np.int)
cm_gfrc3 = conf_mat_raw(gfrc_results3.tru_class_bin, gfrc_results3.pred_class, [0,1])
save_conf_mat_plot_ed(cm_gfrc3, labels, output_dir, exp3)

In [None]:
exp4 = "meta_pred_ci_spread"
gfrc_results4 = pd.read_csv(output_dir + 'gfrc_' + exp4 + "_results.csv")
gfrc_results4['tru_class_bin'] = np.array(gfrc_results4.tru_class > 0, dtype=np.int)
cm_gfrc4 = conf_mat_raw(gfrc_results4.tru_class_bin, gfrc_results4.pred_class, [0,1])
save_conf_mat_plot_ed(cm_gfrc4, labels, output_dir, exp4)

In [None]:
exp5 = "meta_ci_end"
gfrc_results5 = pd.read_csv(output_dir + 'gfrc_' + exp5 + "_results.csv")
gfrc_results5['tru_class_bin'] = np.array(gfrc_results5.tru_class > 0, dtype=np.int)
cm_gfrc5 = conf_mat_raw(gfrc_results5.tru_class_bin, gfrc_results5.pred_class, [0,1])
save_conf_mat_plot_ed(cm_gfrc5, labels, output_dir, exp5)

In [None]:
exp6 = "gfrc_rgb_bin"
gfrc_results6 = pd.read_csv(output_dir + 'gfrc_' + exp6 + "_results.csv")
gfrc_results6['tru_class_bin'] = np.array(gfrc_results6.tru_class > 0, dtype=np.int)
cm_gfrc6 = conf_mat_raw(gfrc_results6.tru_class_bin, gfrc_results6.pred_class, [0,1])
save_conf_mat_plot_ed(cm_gfrc6, labels, output_dir, exp6)