# Inference Notebook for Object Detections with YOLOv4

## Import Libraries

In [None]:
import tensorflow as tf
from tensorflow import constant,shape,reshape
from tensorflow.image import combined_non_max_suppression
import matplotlib.pyplot as plt
import cv2
import numpy as np
import os

## Mount Local Drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

## Location of files

In [None]:
path = "/content/drive/Shareddrives/FISCOM/fiscom_final"

In [None]:
!ls $path

In [None]:
%cd $path

## Download Git functions

In [None]:
!git clone https://github.com/theAIGuysCode/yolov4-custom-functions.git

In [None]:
%cd yolov4-custom-functions/

In [None]:
!pip install -r requirements.txt

## Configuration Variables

In [None]:
tags=["banana-bag","banana","blackberries","raspberry","lemon-bag","lemon","grapes-bag","grapes","tomato-bag","tomato","apple-bag","apple","chili-bag","chili"]

## Loading Model

In [None]:
!mkdir $path'/yolov4-custom-functions/data/backup'

In [None]:
!sudo cp -rf $path"/yolov4_training/yolov4/backup/yolov4-obj_last.weights" $path"/yolov4-custom-functions/data/backup" 

In [None]:
!sudo cp -rf $path"/yolov4_training/yolov4/obj.names" $path"/yolov4-custom-functions/data/classes" 

In [None]:
!python save_model.py --weights $path'/yolov4-custom-functions/data/backup/yolov4-obj_last.weights' --output $path'/yolov4/yolov4-416' --input_size 416 --model yolov4 

In [None]:
#Load Model
loaded = tf.saved_model.load(os.path.join(path,'yolov4/yolov4-416'))
print(list(loaded.signatures.keys()))  # ["serving_default"]

In [None]:
#Load signatures
infer = loaded.signatures["serving_default"]
print(infer.structured_input_signature)

# Final Approach w/ YOLO Labels

## Relevant Functions

In [None]:
ALPHA = 0.5
FONT = cv2.FONT_HERSHEY_PLAIN
TEXT_SCALE = 3.0
TEXT_THICKNESS = 5
BLACK = (0, 0, 0)
WHITE = (255, 255, 255)

def draw_boxed_text(img, text, topleft, color):
    """Draw a transluent boxed text in white, overlayed on top of a
    colored patch surrounded by a black border. FONT, TEXT_SCALE,
    TEXT_THICKNESS and ALPHA values are constants (fixed) as defined
    on top.
    # Arguments
      img: the input image as a numpy array.
      text: the text to be drawn.
      topleft: XY coordinate of the topleft corner of the boxed text.
      color: color of the patch, i.e. background of the text.
    # Output
      img: note the original image is modified inplace.
    """
    assert img.dtype == np.uint8
    img_h, img_w, _ = img.shape
    if topleft[0] >= img_w or topleft[1] >= img_h:
        return img
    margin = 3
    size = cv2.getTextSize(text, FONT, TEXT_SCALE, TEXT_THICKNESS)
    w = size[0][0] + margin * 2
    h = size[0][1] + margin * 2
    # the patch is used to draw boxed text
    patch = np.zeros((h, w, 3), dtype=np.uint8)
    patch[...] = color
    cv2.putText(patch, text, (margin+1, h-margin-2), FONT, TEXT_SCALE,
                WHITE, thickness=TEXT_THICKNESS, lineType=cv2.LINE_8)
    cv2.rectangle(patch, (0, 0), (w-1, h-1), BLACK, thickness=1)
    w = min(w, img_w - topleft[0])  # clip overlay at image boundary
    h = min(h, img_h - topleft[1])
    # Overlay the boxed text onto region of interest (roi) in img
    roi = img[topleft[1]:topleft[1]+h, topleft[0]:topleft[0]+w, :]
    cv2.addWeighted(patch[0:h, 0:w, :], ALPHA, roi, 1 - ALPHA, 0, roi)
    return img

def rectangle_box(tags,img,cls,c1,c2,coor,prob,color):
  img=cv2.rectangle(img, c1, c2,color, 2)
  txt_loc = (max(int(coor[1]),0), max(int(coor[0])-15, 0))
  cls_name=tags[int(cls)]
  cf=float(prob)
  txt = '{} {:.2f}'.format(cls_name, cf)
  img = draw_boxed_text(img, txt, txt_loc, color)
  return img

def inference(img,tags,video = False):
    class_img=[]
    images_data=[]
    original_image = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    image_data = cv2.resize(original_image, (416, 416))
    image_data = image_data / 255.
    images_data.append(image_data)
    batch_data = tf.constant(images_data)
    batch_data =tf.cast(batch_data, tf.float32)
    
    pred_bbox =infer(batch_data)
    for key, value in pred_bbox.items():
        boxes = value[:, :, 0:4]
        pred_conf = value[:, :, 4:]
        
    boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(
                boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
                scores=tf.reshape(
                    pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])),
                max_output_size_per_class=50,
                max_total_size=50,
                iou_threshold=0.3,
                score_threshold=0.5
            )

    for box,prob,cls in zip(boxes.numpy()[0],scores.numpy()[0],classes.numpy()[0]):
        if float(prob)>0.05:
          if not video:
            # print(box)
            # print(prob)
            print(cls)
          class_img.append(int(cls))
          coor=box
          image_h,image_w,_=img.shape


          coor[1] = int(coor[1] * image_w)
          coor[0] = int(coor[0] * image_h)

          coor[3] = int(coor[3] * image_w)
          coor[2] = int(coor[2] * image_h)

          c1, c2 = (coor[1], coor[0]), (coor[3], coor[2])
          img=rectangle_box(tags,img,cls,c1,c2,coor,prob,(255,0,0))
    
        # if not video:
          # print(classes)
          # print(scores)
    if not video: 
      fig = plt.figure(figsize=(10,10))
      plt.imshow(img) 
      plt.show()
      # print(class_img)
    return cv2.cvtColor(img, cv2.COLOR_RGB2BGR),class_img

def get_labels_yolo(base_path,txt_path,tags_list):
    labs=[]
    with open(os.path.join(base_path,txt_path)) as f:
        lines = f.readlines()
        for line in lines:
            labs.append(tags_list[int(line[0])])
    return labs

def inf_test_yolo(base_path,tags):
    cmatrix={}
    for lb in tags:
        cmatrix[lb]=np.zeros((2,2))
        
    for pth in os.listdir(base_path):
        extension = pth.split(".")[-1]
        if extension == "jpg":
            print(pth)
            txt_path=pth[:-3]+'txt'
            _,pred_list=inference(cv2.cvtColor(cv2.imread(os.path.join(base_path,pth)),cv2.COLOR_BGR2RGB),tags)
            gnd_list=get_labels_yolo(base_path,txt_path,tags)
            print(pred_list)
            print(gnd_list)
            cmatdic=test_labels(gnd_list,pred_list,tags)
            print(cmatdic)
            for lab in cmatdic:
                cmatrix[lab]=cmatrix[lab]+cmatdic[lab]
            print(cmatrix)
    return cmatrix

def inf_test(path):
    for pth in os.listdir(path):
        extension = pth.split(".")[-1]
        if extension == "jpg":
          print(pth)
          inference_1(cv2.cvtColor(cv2.imread(path+"/"+pth),cv2.COLOR_BGR2RGB))

## Definition of Confusion Matrix per class

In [None]:
"""
Define confusion matrix for each class as:
 --------------------Ground Truth-----------------
        Positive (Defect)    Negative (No Defect)  

|    Pos        TP                    FP
Pred   
|    Neg        FN                    TN
--------------------------------------------------

TP=[0,0]
FN=[1,0]
FP=[0,1]
TN=[0,1]

"""


def test_labels(gnd,preds,tags):
    preds_txt=[tags[pred] for pred in preds]
    #Initialize confusion matrix for each label
    cmatdic={}
    for lb in tags:
        cmatdic[lb]=np.zeros((2,2))
    
    gv,gc=np.unique(gnd, return_counts=True)
    pv,pc=np.unique(preds_txt, return_counts=True)
    
    gd=dict(zip(gv,gc))
    pd=dict(zip(pv,pc)) 
    

    #Populate label counts with missing tags with count=0
    for label in tags:
        if label not in gd:
            gd[label]=0
        if label not in pd:
            pd[label]=0

    for lab,cnt in gd.items():
        #True positives
        if lab in pd.keys():
            comp=cnt-pd[lab]
            #Case: same number of defects than predicted (True positives and True Negatives)
            if comp==0:
                #TN
                if cnt==0 and pd[lab]==0:
                    cmatdic[lab][1,1]=+1
                #TP
                else:
                    cmatdic[lab][0,0]=+cnt
            #Case: less defects than predicted (True Positives and False negatives)
            elif comp>0:
                #Add defects not detected (difference between original label count and predicted)
                cmatdic[lab][1,0]=+comp
                #TP
                cmatdic[lab][0,0]=+pd[lab]

            #Case: more defects than predicted (True Positives and False positives)    
            elif comp<0:
                #Add extra defects detected (difference between predicted and original label count)
                cmatdic[lab][0,1]=-comp
                #TP
                cmatdic[lab][0,0]=+cnt
    return cmatdic

def pprint_cmat(cmat):
  for label,mat in cmat.items():
    print(label)
    print(mat)
    print()

def metrics(cmat_dic,print_rep=True):
  out_dic={}
  for label,cmat in cmat_dic.items():
    metrics_dic={}

    TP=cmat[0,0]
    FN=cmat[1,0]
    FP=cmat[0,1]
    TN=cmat[1,1]
    

    acc=(TP+TN)/(TP+TN+FP+FN)
    prec=(TP)/(TP+FP)
    rec=(TP)/(TP+FN)
    f1=(2*prec*rec)/(prec+rec)
    spc=(TN)/(TN+FP)

    metrics_dic['accuracy']=acc
    metrics_dic['precision']=prec
    metrics_dic['recall']=rec
    metrics_dic['f1_score']=f1
    metrics_dic['specificity']=spc

    out_dic[label]=metrics_dic
    if print_rep:
      print(label)
      for metric,result in metrics_dic.items():
        print(metric+': {0:.3f}'.format(result))
      print('-'*5)
  return out_dic


In [None]:
%ls $path"/dataset/new_split/"

## Copy test dataset for inference and performance analysis

In [None]:
!cp -r $path'/dataset/new_split/test' $path'/yolov4/check'

In [None]:
path=path + '/yolov4/check'

In [None]:
tags

In [None]:
cmat=inf_test_yolo(path,tags)

In [None]:
pprint_cmat(cmat)


```
Define confusion matrix for each class as:
 --------------------Ground Truth-----------------
        Positive (Defect)    Negative (No Defect)  

|    Pos        TP                    FP
Pred   
|    Neg        FN                    TN
--------------------------------------------------

TP=[0,0]
FN=[1,0]
FP=[0,1]
TN=[1,1]
```



In [None]:
m=metrics(cmat)

# Video Inference

In [None]:
%cd /mydrive/Demo

In [None]:
import cv2
from VideoWriter import VideoWriter

video_path = "Viakable_clip.mp4"
cap = cv2.VideoCapture(video_path)

fps = cap.get(cv2.CAP_PROP_FPS) # Gets the frames per second
frame_width = int(cap.get(3))
frame_height = int(cap.get(4))
frame_count = cap.get(cv2.CAP_PROP_FRAME_COUNT)

vw = VideoWriter(video_path,frame_width,frame_height,fps,frame_count)
out = vw.CreateWriter()

# start_time = 0
elapsed_time = 120*1000
while(True):
	# Capture frame-by-frame
    success, img = cap.read()
    
    if not success:
        break
    current_time = cap.get(cv2.CAP_PROP_POS_MSEC)
    if "start_time" in locals() and "elapsed_time" in locals():
      if current_time < start_time:
          continue
      elif current_time > start_time + elapsed_time:
          break
    elif "elapsed_time" in locals():
      if current_time > elapsed_time:
        break
    img,pred_list=inference(img,tags,True)
    counter = len(pred_list)
    count_full = len([value for value in pred_list if value == 0])
    count_empty = len([value for value in pred_list if value == 1])
    txt = 'Total coils:  {} \n Full coils:  {} \n Empty coils:  {} \n'.format(
        str(counter),str(count_full),str(count_empty))
    y0, dy = 130, 50
    img = cv2.rectangle(img, (40,120), (500,280), (255,255,255), -1)
    for i, line in enumerate(txt.split('\n')):
      y = y0 + i*dy
      img = draw_boxed_text(img, line, (50,y), (0,0,255))
    # cv2_imshow(img)
    vw.SaveFrame(img,current_time)
    if (current_time % 1000) == 0:
      print(current_time)
    if cv2.waitKey(1) & 0xFF == ord('q'):
            break

cap.release()
cv2.destroyAllWindows()