In [1]:
import warnings
warnings.simplefilter("ignore")
from os import listdir
from xml.etree import ElementTree
from numpy import zeros
from numpy import asarray
from mrcnn.utils import Dataset
from mrcnn.config import Config
from mrcnn.model import MaskRCNN
from numpy import expand_dims
from numpy import mean
from mrcnn.utils import compute_ap
from mrcnn.model import load_image_gt
from mrcnn.model import mold_image
from matplotlib import pyplot
from matplotlib.patches import Rectangle
import os
import sys
import random
import math
import numpy as np
import skimage.io
import matplotlib
import matplotlib.pyplot as plt
from mrcnn import utils
import mrcnn.model as modellib
from mrcnn import visualize
import pandas as pd
sys.path.append('/home/ubuntu/Mask_RCNN/samples/coco') 
import coco
%matplotlib inline
import re
import itertools 

Using TensorFlow backend.


In [None]:
# if not no previous run, starts here!
# get total number of images in each dataset (training vs. validation: n, p)

class ClownDataset(Dataset):
	def load_dataset(self, dataset_dir, is_train=True):
		self.add_class("dataset", 1, "clown")
		self.add_class("dataset", 2, "others")   
		images_dir = dataset_dir + '/images/'
		annotations_dir = dataset_dir + '/annots/'
		for filename in listdir(images_dir):
			image_id = filename[:-4]
			img_path = images_dir + filename
			ann_path = annotations_dir + image_id + '.xml'
			self.add_image('dataset', image_id=image_id, path=img_path, annotation=ann_path, class_ids=[0,1,2])

	def extract_boxes(self, filename):
		tree = ElementTree.parse(filename)
		root = tree.getroot()
		boxes = list()
		for box in root.findall('.//bndbox'):
			xmin = int(box.find('xmin').text)
			ymin = int(box.find('ymin').text)
			xmax = int(box.find('xmax').text)
			ymax = int(box.find('ymax').text)
			coors = [xmin, ymin, xmax, ymax]
			boxes.append(coors)
		width = int(root.find('.//size/width').text)
		height = int(root.find('.//size/height').text)
		return boxes, width, height

	def load_mask(self, image_id):
		info = self.image_info[image_id]
		path = info['annotation']
		boxes, w, h = self.extract_boxes(path)
		masks = zeros([h, w, len(boxes)], dtype='uint8')
		class_ids = list()
		for i in range(len(boxes)):
			box = boxes[i]
			row_s, row_e = box[1], box[3]
			col_s, col_e = box[0], box[2]
			if i == 0:                                    
				masks[row_s:row_e, col_s:col_e, i] = 1                   
				class_ids.append(self.class_names.index('clown'))      
			else:                                         
				masks[row_s:row_e, col_s:col_e, i] = 2                 
				class_ids.append(self.class_names.index('others'))    
		return masks, asarray(class_ids, dtype='int32')                

	def image_reference(self, image_id):
		info = self.image_info[image_id]
		return info['path']

class PredictionConfig(Config):
	NAME = "Clown_cfg"
	NUM_CLASSES = 1 + 2
	GPU_COUNT = 1
	IMAGES_PER_GPU = 1


def evaluate_model(dataset, model, cfg):
    APs = list();
    F1_scores = list();
    PRECISION = list();
    RECALL = list();
    OVERLAPS = list();
    class_id = list(); 
    
    for image_id in dataset.image_ids:
        image, image_meta, gt_class_id, gt_bbox, gt_mask = load_image_gt(dataset, cfg, image_id, use_mini_mask=False)
        scaled_image = mold_image(image, cfg)
        sample = expand_dims(scaled_image, 0)
        yhat = model.detect(sample, verbose=0)
        r = yhat[0]
        AP, precision, recalls, overlaps = compute_ap(gt_bbox, gt_class_id, gt_mask, r["rois"], r["class_ids"], r["scores"], r['masks'],
                                iou_threshold=0.5)
        F1_scores.append((2* (mean(precision) * mean(recalls)))/(mean(precision) + mean(recalls)))
        APs.append(AP)
        PRECISION.append(precision)
        RECALL.append(recalls)
        OVERLAPS.append(overlaps)
        class_id.append(gt_class_id)
        
    mAP = mean(APs)
    return mAP, APs, F1_scores, PRECISION, RECALL, OVERLAPS, class_id 


test_set = ClownDataset()


# get mAP, precision, recall, F1 (per bb in an images)

def generateMetrics(path):
    test_set = ClownDataset()
    test_set.load_dataset(path, is_train=False)
    test_set.prepare()
    cfg = PredictionConfig()
    model = MaskRCNN(mode = 'inference', model_dir = './', config = cfg)
    model.load_weights('/home/ubuntu/Mask_RCNN/models/clown_human_ballin_100epoch/mask_rcnn_clown_cfg_0100.h5', by_name = True)
    test_mAP = evaluate_model(test_set, model, cfg)

    files = []
    for m in test_set.image_from_source_map:
        files.append(m)

    aps = []
    for ap in test_mAP[1]:
        aps.append(ap)

    f1s = []
    for f1 in test_mAP[2]:
        f1s.append(f1)

    PRECISION = []
    for p in test_mAP[3]:
        PRECISION.append(p)

    RECALL = []
    for r in test_mAP[4]:
        RECALL.append(r)

    OVERLAPS = []
    for o in test_mAP[5]:
        OVERLAPS.append(o)

    class_id = []
    for c in test_mAP[6]:
        class_id.append(c)
    
    df = pd.DataFrame({'files':files, 'AP':aps, 'F1':f1s, 'class_id':class_id,'PRECISION':PRECISION, 
                       'RECALL':RECALL, 'OVERLAPS':OVERLAPS})

    return df

In [14]:
# if already run, has filenameImageIDindex file, starts here! 
# get mAP, precision, recall, F1 (per bb in an images)

def generateMetrics():
    df = pd.read_csv('/home/ubuntu/Mask_RCNN/data/video1FramesDataBatchTest/video1filenames.txt').rename(columns={'0':'files'})
    return df

In [15]:
# Ready to anaylyze - approachA -- PUT true or detected files separately and count the number the files (1/0 per frame)

def getNumberofInstance(path, modelUse, dataUse):
    epoch1, epoch25, epoch50, epoch75, epoch100 = [], [], [], [], []
    pre_pos_files = os.listdir(path)
    for file in pre_pos_files:
        if file.endswith('_0001.h5.jpg'):
            epoch1.append(file)
        elif file.endswith('_0025.h5.jpg'):
            epoch25.append(file)
        elif file.endswith('_0050.h5.jpg'):
            epoch50.append(file)
        elif file.endswith('_0075.h5.jpg'):
            epoch75.append(file)
        elif file.endswith('_0100.h5.jpg'):
            epoch100.append(file)

    df = pd.DataFrame({modelUse:[dataUse], "epoch1":len(epoch1), "epoch25":len(epoch25), "epoch50":len(epoch50), 
                       "epoch75":len(epoch75), "epoch100":[len(epoch100)]})
    return(df)

def TrueCount(path, modelUse, dataUse):
    count = len(os.listdir(path))
    df = pd.DataFrame({modelUse:[dataUse], "TrueCount":count})
    return(df)

def main():  #using files in true-folder 
    PredPos_val = getNumberofInstance('/home/ubuntu/Mask_RCNN/data/video1FramesDataBatchTest/outputMaskFrame', 
                                      "set","pred_pos_validation")
    PredTotal_val = getNumberofInstance('/home/ubuntu/Mask_RCNN/data/video1FramesDataBatchTest/outputAllFrame', 
                                      "set","pred_tol_validation")
    prediction = pd.concat([PredPos_val, PredTotal_val])
    return prediction
    
def mainTrue():  #using files in true-folder 
    TruePos_val = TrueCount('/home/ubuntu/Mask_RCNN/data/video1Frames - True', 
                                      "set","true_pos_validation")
    gTrue = TruePos_val   
    return gTrue
    
if __name__ == "__main__":
    
    res = main()
    resTrue = mainTrue()
    Output = res.append(resTrue).T
    Output.columns = Output.iloc[0]
    Output = Output.drop(["set"])
    

In [102]:
# Ready to analyze --- Approach 2 -- read xml annotation files vs. output object counts -- # objects per frame

def getGroundTruthObjectCount(path):    
    df_annotation = pd.read_csv(path)
    df_annotation = df_annotation.drop(columns='Unnamed: 0')
    df_annotation_count = df_annotation.pivot_table(index = 'imageID', columns='annot_objs', aggfunc= lambda x:len(x)).fillna(0)
    #df_annotation_count = df_annotation_count.drop(columns='w')
    return df_annotation_count


def getPredObjectCount(path):
    ObjCount_files = os.listdir(path)

    a, b, c, d = [], [], [], []
    for file in ObjCount_files:
        if file.endswith('_0005.h5.txt'):
            each_file = pd.read_csv(path + file)
            each_file['ImageID'] = file.replace('_mask_rcnn_clown_cfg_0005.h5.txt','')
            each_file['epoch'] = '0005'
            a.append(each_file)
            df_a = pd.concat(a)
            df_a = df_a.drop(columns={'Unnamed: 0', 'image'})

        elif file.endswith('_0015.h5.txt'):
            each_file = pd.read_csv(path + file)
            each_file['ImageID'] = file.replace('_mask_rcnn_clown_cfg_0015.h5.txt','')
            each_file['epoch'] = '0015'
            b.append(each_file)
            df_b = pd.concat(b)
            df_b = df_b.drop(columns={'Unnamed: 0', 'image'})


        elif file.endswith('_0025.h5.txt'):
            each_file = pd.read_csv(path + file)
            each_file['ImageID'] = file.replace('_mask_rcnn_clown_cfg_0025.h5.txt','')
            each_file['epoch'] = '0025'
            c.append(each_file)
            df_c = pd.concat(c)
            df_c = df_c.drop(columns={'Unnamed: 0', 'image'})


        elif file.endswith('_0038.h5.txt'):
            each_file = pd.read_csv(path + file)
            each_file['ImageID'] = file.replace('_mask_rcnn_clown_cfg_0038.h5.txt','')
            each_file['epoch'] = '0038'
            d.append(each_file)
            df_d = pd.concat(d)
            df_d = df_d.drop(columns={'Unnamed: 0', 'image'})

    df_OutObj = pd.concat([df_a, df_b, df_c, df_d])
    return df_OutObj



if __name__ == "__main__":
    
    df_groundTruthTraining = getGroundTruthObjectCount('/home/ubuntu/Mask_RCNN/data/trainingAnnotation.txt')
    df_groundTruthTraining.reset_index(inplace=True)
    df_groundTruthTraining['files'] = df_groundTruthTraining['imageID']
    df_groundTruthTraining = df_groundTruthTraining.drop(columns={'w', "imageID"})
    df_groundTruthTraining['files'] = df_groundTruthTraining['files'].str.replace('.xml','')
    df_groundTruthTraining.to_csv('/home/ubuntu/Mask_RCNN/data/reform_trainingAnnotation.txt')
    
    df_groundTruthVal = getGroundTruthObjectCount('/home/ubuntu/Mask_RCNN/data/testingAnnotation.txt')
    df_groundTruthVal.reset_index(inplace=True)
    df_groundTruthVal['files'] = df_groundTruthVal['imageID']
    df_groundTruthVal = df_groundTruthVal.drop(columns={"imageID"})
    df_groundTruthVal['files'] = df_groundTruthVal['files'].str.replace('.xml','')
    df_groundTruthVal.to_csv('/home/ubuntu/Mask_RCNN/data/reform_testingAnnotation.txt')
    
    df_PredTraining = getPredObjectCount('/home/ubuntu/Mask_RCNN/data/TrainingResults/outputObjCount/')
    df_PredTraining.to_csv('/home/ubuntu/Mask_RCNN/data/reform_TrainingoutputObjCount.txt')
    
    df_PredVal = getPredObjectCount('/home/ubuntu/Mask_RCNN/data/TestingResults/outputObjCount/')
    df_PredVal.to_csv('/home/ubuntu/Mask_RCNN/data/reform_TestingoutputObjCount.txt')

In [196]:
# togethers - groundTruth vs. prediction (trainig set)
df_index_training = pd.read_csv('/home/ubuntu/Mask_RCNN/data/TrainfilevsImageID.txt')
df_index_training['files'] = df_index_training['files'].str.replace("dataset.",'')
df_index_training = df_index.rename(columns= {'Unnamed: 0':'imageID'})
df_index_training['imageID'] = df_index_training['imageID'].replace('.xml', "")

truth = pd.merge(df_groundTruthTraining, df_index_training, on = ['files'], how ='outer')
truth = truth.rename(columns={'clown':'true_clown', 'color':'true_color', 'nface':'true_nface' })

df_PredTraining = df_PredTraining.rename(columns = {'ImageID':'imageID'})

all_file = []
for e in df_PredTraining.epoch.unique():
    dfsub = df_PredTraining[(df_PredTraining.epoch == e)]
    dfsub['imageID'] = dfsub['imageID'].astype(int)
    submerge = pd.merge(truth, dfsub, on=['imageID'], how='outer')
    all_file.append(submerge)
    
df1 = all_file[0]
df2 = all_file[1]
df3 = all_file[2]
df4 = all_file[3]
training_truth_pred_objCount = pd.concat([df1, df2, df3, df4])
training_truth_pred_objCount.to_csv('/home/ubuntu/Mask_RCNN/data/Training_truth_pred_objCount.txt')

In [219]:
# togethers - groundTruth vs. prediction (validation set)
df_index_testing = pd.read_csv('/home/ubuntu/Mask_RCNN/data/TestfilevsImageID.txt')
df_index_testing['files'] = df_index_testing['files'].str.replace("dataset.",'')
df_index_testing = df_index_testing.rename(columns= {'Unnamed: 0':'imageID'})
df_index_testing['imageID'] = df_index_testing['imageID'].replace('.xml', "")


truth_val = pd.merge(df_groundTruthVal, df_index_testing, on = ['files'], how ='outer')
truth_val = truth_val.rename(columns={'clown':'val_clown', 'color':'val_color', 'nface':'val_nface' })

df_PredVal = df_PredVal.rename(columns = {'ImageID':'imageID'})

all_file_val = []
for e in df_PredVal.epoch.unique():
    dfsub_val = df_PredVal[(df_PredVal.epoch == e)]
    dfsub_val['imageID'] = dfsub_val['imageID'].astype(int)
    submerge_val = pd.merge(truth_val, dfsub_val, on=['imageID'], how='outer')
    all_file_val.append(submerge_val)
    
df1_val = all_file_val[0]
df2_val = all_file_val[1]
df3_val = all_file_val[2]
df4_val = all_file_val[3]
testing_truth_pred_objCount = pd.concat([df1_val, df2_val, df3_val, df4_val])
testing_truth_pred_objCount.to_csv('/home/ubuntu/Mask_RCNN/data/Testing_truth_pred_objCount.txt')

# Analysis Starts - Approach 1

In [38]:
# Matrics based on # of images (each image evaluation)

def TruefilesNumber(pathTrueImage, DataUse):
    
    fileassign = generateMetrics()
    fileassign['imageID'] = fileassign.index
    IndexFiles = fileassign[['imageID', 'files']]
    IndexFiles['files'] = IndexFiles['files'].str.replace('dataset.',"")

    TrueFileConvert = []
    TrueFiles = os.listdir(pathTrueImage)
    for f in TrueFiles:
        TrueFileConvert.append(f.split(".")[0])
    Truefile = pd.DataFrame({'files': TrueFileConvert})

    f = pd.merge(IndexFiles, Truefile, on="files", how='inner')
    f[DataUse] = 'TruePositive'
    
    return f

def mainVal():
    val = TruefilesNumber('/home/ubuntu/Mask_RCNN/data/video1Frames - True',
                    'validation')
    return val
    


if __name__ == "__main__":
    TrueVal = mainVal()

    

In [39]:
def runningFiles(pathtooutput):
    ID, epoch = [], []
    files = os.listdir(pathtooutput)
    for file in files:
        ID.append(file.split('_')[0])
        epoch.append(file.split('_')[5].split('.')[0])
    inFile = pd.DataFrame({"imageID": ID, "Epoch": epoch})
    return inFile
    
    
if __name__ == "__main__":
    
    
    PredValAllfiles = runningFiles('/home/ubuntu/Mask_RCNN/data/video1FramesDataBatchTest/outputAllFrame')
    PredValPosfiles = runningFiles('/home/ubuntu/Mask_RCNN/data/video1FramesDataBatchTest/outputMaskFrame')

In [None]:
# Validation
PredValPosfiles['validation'] = 'PredPositive'
vali = pd.merge(PredValAllfiles, PredValPosfiles, on=['imageID', 'Epoch'], how='outer')  
vali = vali.fillna('PredNegative')
listTrueVal = [str(i) for i in list(TrueVal['imageID'])]
PreVal = vali[vali['imageID'].isin(listTrueVal)]
PreVal['validation_Truth'] = 'TruePostive'
PreValf = vali[~vali['imageID'].isin(listTrueVal)]
PreValf['validation_Truth'] = 'TrueNegative'
ValidationData = pd.concat([PreVal, PreValf])

ep, pp, pn, np, nn = [], [], [],[], []
for e in ValidationData['Epoch'].unique():   
    ValidationData1 = ValidationData.groupby(['Epoch']).get_group(e)
    pp.append(len(ValidationData1[(ValidationData1['validation']=='PredPositive') 
                                      &(ValidationData1['validation_Truth']=='TruePostive')]))
    pn.append(len(ValidationData1[(ValidationData1['validation']=='PredPositive') 
                                      &(ValidationData1['validation_Truth']=='TrueNegative')]))
    np.append(len(ValidationData1[(ValidationData1['validation']=='PredNegative') 
                                      &(ValidationData1['validation_Truth']=='TruePostive')]))
    nn.append(len(ValidationData1[(ValidationData1['validation']=='PredNegative') 
                                      &(ValidationData1['validation_Truth']=='TrueNegative')]))
    ep.append(e)
    
    validationMatrix = pd.DataFrame({'Epoch':ep, 'PrePos_TruePos': pp, 'PrePos_TrueNeg': pn, 'PreNeg_TruePos': np,
                                    'PreNeg_TrueNeg': nn})
    
validationMatrix.sort_values(by=['Epoch'])

In [43]:
#save the results
Output.to_csv('/home/ubuntu/Mask_RCNN/data/outputMetrics/totalNumberImages.txt')
validationMatrix.to_csv('/home/ubuntu/Mask_RCNN/data/outputMetrics/validationMatrix.txt')

# Analysis Starts - Approach 2

In [344]:
def generateNNPPTNNPforEpoch(data_file):

    NN = data_file[(data_file.true_clown == 0.0) & (
    data_file['#clown'] == 0.0)]

    PP = data_file[(data_file.true_clown != 0.0) & (
    data_file['#clown'] != 0.0)] 

    NP = data_file[(data_file.true_clown == 0.0) & (
    data_file['#clown'] != 0.0)] 

    PN = data_file[(data_file.true_clown != 0.0) & (
    data_file['#clown'] == 0.0)] 

    nn = NN.pivot_table(index='epoch', aggfunc = lambda x:len(x))
    nn.reset_index(inplace=True)
    nn['NN'] = nn['#clown'] #use any
    nn = nn.drop(columns={'#clown', '#nface', 'files', 'imageID', 'true_clown','true_color', 'true_nface'})

    pp = PP.pivot_table(index='epoch', aggfunc = lambda x:len(x))
    pp.reset_index(inplace=True)
    pp['PP'] = pp['#clown'] #use any
    pp = pp.drop(columns={'#clown', '#nface', 'files', 'imageID', 'true_clown','true_color', 'true_nface'})

    np = NP.pivot_table(index='epoch', aggfunc = lambda x:len(x))
    np.reset_index(inplace=True)
    np['NP'] = np['#clown'] #use any
    np = np.drop(columns={'#clown', '#nface', 'files', 'imageID', 'true_clown','true_color', 'true_nface'})

    pn = PN.pivot_table(index='epoch', aggfunc = lambda x:len(x))
    pn.reset_index(inplace=True)
    pn['PN'] = pn['#clown'] #use any
    pn = pn.drop(columns={'#clown', '#nface', 'files', 'imageID', 'true_clown','true_color', 'true_nface'})

    df_all = pd.concat([nn, pp, np, pn], axis = 1).drop_duplicates()
    df_all = df_all.loc[:,~df_all.columns.duplicated()]
    
    return df_all


if __name__ == "__main__":
    
# cut out google images 
    training_truth_pred_objCount = training_truth_pred_objCount.sort_values(by=['files']).iloc[0:2464]
    df_training_PPNNPNNP_tabel = generateNNPPTNNPforEpoch(training_truth_pred_objCount)
    df_training_PPNNPNNP_tabel.to_csv('/home/ubuntu/Mask_RCNN/data/training_forConfusionTablePlot.txt')
    
# fix names    
    testing_truth_pred_objCount = testing_truth_pred_objCount.rename(columns={'val_clown':'true_clown',
                                           'val_color':'true_color',
                                           'val_nface':'true_nface'})
    df_testing_PPNNPNNP_tabel = generateNNPPTNNPforEpoch(testing_truth_pred_objCount)
    df_testing_PPNNPNNP_tabel.to_csv('/home/ubuntu/Mask_RCNN/data/testing_forConfusionTablePlot.txt')

In [349]:
df_training_PPNNPNNP_tabel

Unnamed: 0,epoch,NN,PP,NP,PN
0,5,51,343,161,61
1,15,47,362,165,42
2,25,55,339,157,65
3,38,102,324,110,80


In [351]:
df_testing_PPNNPNNP_tabel

Unnamed: 0,epoch,NN,PP,NP,PN
0,5,17,105,59,26
1,15,10,108,66,23
2,25,14,115,62,16
3,38,39,102,37,29
