In [1]:
import tensorflow as tf

In [2]:
from absl import flags
import sys
sys.argv = sys.argv[:1]
FLAGS = flags.FLAGS
FLAGS(sys.argv)

import time
import numpy as np
import cv2
import matplotlib.pyplot as plt
%matplotlib inline

from yolov3_tf2.models import YoloV3
from yolov3_tf2.dataset import transform_images
from yolov3_tf2.utils import convert_boxes

from deep_sort import preprocessing
from deep_sort import nn_matching
from deep_sort.detection import Detection
from deep_sort.tracker import Tracker
from tools import generate_detections as gdet

In [3]:
max_cosine_distance = 0.5
nn_budget = None
nms_max_overlap = 0.8

model_filename = 'model_data/mars-small128.pb'
encoder = gdet.create_box_encoder(model_filename, batch_size=1)
metric = nn_matching.NearestNeighborDistanceMetric('cosine', max_cosine_distance, nn_budget)
tracker = Tracker(metric)

In [4]:
import os
import tensorflow as tf
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as viz_utils
from object_detection.builders import model_builder
from object_detection.utils import config_util

In [5]:
model_path='/Users/fadijemmali/Desktop/Tracker/faster_rcnn_inception_resnet_v2_1024x1024/export'
configs = config_util.get_configs_from_pipeline_file(os.path.join(model_path,'pipeline.config'))
detection_model = model_builder.build(model_config=configs['model'], is_training=False)

# Restore checkpoint
ckpt = tf.compat.v2.train.Checkpoint(model=detection_model)

# Load the latest checkpoint
latest_ckpt = tf.train.latest_checkpoint(os.path.join(model_path,'checkpoint'))
if latest_ckpt:
    ckpt.restore(latest_ckpt).expect_partial()
    print(f"Checkpoint loaded: {latest_ckpt}")
else:
    raise FileNotFoundError("No checkpoint found in", os.path.join(model_path,'checkpoint'))

@tf.function
def rcnn(image):
    image, shapes = detection_model.preprocess(image)
    prediction_dict = detection_model.predict(image, shapes)
    detections = detection_model.postprocess(prediction_dict, shapes)
    return detections

Checkpoint loaded: /Users/fadijemmali/Desktop/Tracker/faster_rcnn_inception_resnet_v2_1024x1024/export/checkpoint/ckpt-0


In [6]:
labels = [{'name':'ball', 'id':1}, {'name':'goalkeeper', 'id':2},{'name':'player', 'id':3},{'name':'referee', 'id':4}]

with open('label_map.pbtxt', 'w') as f:
    for label in labels:
        f.write('item { \n')
        f.write('\tname:\'{}\'\n'.format(label['name']))
        f.write('\tid:{}\n'.format(label['id']))
        f.write('}\n')
category_index = label_map_util.create_category_index_from_labelmap('label_map.pbtxt')

In [7]:
def detect_fn2(img, threshold=0.25):
    image_np = np.array(img)

    input_tensor = tf.convert_to_tensor(np.expand_dims(image_np, 0), dtype=tf.float32)
    detections = rcnn(input_tensor)
    num_detections = int(detections.pop('num_detections'))
    detections = {key: value[0, :num_detections].numpy()
                  for key, value in detections.items()}
    detections['num_detections'] = num_detections
    
    detections['detection_classes'] = detections['detection_classes'].astype(np.int64)
    
    boxes = detections['detection_boxes']
    boxes = boxes[:, [1, 0, 3, 2]]  # Rearranging box coordinates
    scores = detections['detection_scores']
    classes = detections['detection_classes']

    ## Filter out detections with scores less than the threshold
    keep = scores > threshold
    boxes = boxes[keep]
    scores = scores[keep]
    classes = classes[keep]

    # Process class names
    class_names = ['person' if category_index[class_id + 1]['name'] in ['player', 'goalkeeper', 'referee'] else category_index[class_id + 1]['name'] for class_id in classes]
    
    return boxes, scores, class_names

In [8]:
vid = cv2.VideoCapture('./data/video/red_white.mp4')

codec = cv2.VideoWriter_fourcc(*'mp4v')
vid_fps =int(vid.get(cv2.CAP_PROP_FPS))
vid_width,vid_height = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)), int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
out = cv2.VideoWriter('./data/video/results-red_white.mp4', codec, vid_fps, (vid_width, vid_height))

In [None]:
all_frames_data = []
current=1
while True:
    _, img = vid.read()
    if img is None:
        print('Completed')
        break
    frame_objects = []

    t1 = time.time()

    boxes, scores, names= detect_fn2(img)
    #allowed_classes=['player']
    #deleted_indx = []
    #for j in range(len(boxes)):
    #    if not (names[j] in allowed_classes):
    #        deleted_indx.append(j)
    #boxes = np.delete(boxes, deleted_indx, axis=0)
    #names = np.delete(names, deleted_indx, axis=0)
    #scores = np.delete(scores, deleted_indx, axis=0)

    converted_boxes = convert_boxes(img, boxes)
    features = encoder(img, converted_boxes)



    
    detections = [Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in
                   zip(converted_boxes, scores, names, features)]
    boxs = np.array([d.tlwh for d in detections])
    scores = np.array([d.confidence for d in detections])
    classes = np.array([d.class_name for d in detections])
    indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores)
    detections = [detections[i] for i in indices]
    tracker.predict()
    tracker.update(detections)

    cmap = plt.get_cmap('tab20b')
    colors = [cmap(i)[:3] for i in np.linspace(0,1,20)]

    current_count = int(0)

    for track in tracker.tracks:
        if not track.is_confirmed() or track.time_since_update >1:
            continue
        
        bbox = track.to_tlbr()
        class_name= track.get_class()
        color = colors[int(track.track_id) % len(colors)]
        color = [i * 255 for i in color]

        cv2.rectangle(img, (int(bbox[0]),int(bbox[1])), (int(bbox[2]),int(bbox[3])),color, 2)
        cv2.rectangle(img, (int(bbox[0]), int(bbox[1]-30)), (int(bbox[0])+(len(class_name)
                    +len(str(track.track_id)))*17, int(bbox[1])),color, -1)
        cv2.putText(img, class_name+"-"+str(track.track_id), (int(bbox[0]), int(bbox[1]-10)), 0, 0.75,
                    (255, 255, 255), 2)


        object_data = {
            "box": track.to_tlbr(),
            "id": track.track_id,
            "class": track.get_class()
        }

        frame_objects.append(object_data)

    all_frames_data.append(frame_objects)
        
    fps = 1./(time.time()-t1)
    cv2.putText(img, "FPS: {:.2f}".format(fps), (0,30), 0, 1, (0,0,255), 2)
    #cv2.resizeWindow('output', 1024, 768)
    #cv2.imshow('output', img)
    
    out.write(img)

    #if cv2.waitKey(1) == ord('q'):
    #    break
    print(f"frame{current} done")
    current+=1

vid.release()

out.release()
#cv2.destroyAllWindows()

#all_frames_data

In [10]:
def count_nonblack_np(img):
    """Return the number of pixels in img that are not black.
    img must be a Numpy array with colour values along the last axis.

    """
    return img.any(axis=-1).sum()
def red(image):
    
    # Convert the image from BGR to HSV color space
    hsv_image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    
    # Define lower and upper range of yellow in HSV
    lower_red1 = np.array([0, 120, 70])
    upper_red1 = np.array([10, 255, 255])
    lower_red2 = np.array([170, 120, 70])
    upper_red2 = np.array([180, 255, 255])
   
    mask_red1 = cv2.inRange(hsv_image, lower_red1, upper_red1)
    mask_red2 = cv2.inRange(hsv_image, lower_red2, upper_red2)
    
    # Combine masks
    full_mask_red = cv2.bitwise_or(mask_red1, mask_red2)
    red_regions = cv2.bitwise_and(image, image, mask=full_mask_red)
    
    tot_pix = count_nonblack_np(image)
    color_pix = count_nonblack_np(red_regions)
    ratio = color_pix/tot_pix
    
    return ratio
    
def green(image):
    hsv_image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    lower_green = np.array([40, 40, 40])  # Lower bound of light green
    upper_green = np.array([70, 255, 255])  # Upper bound of light green
    
    # Create masks for the yellow range
    mask_green = cv2.inRange(hsv_image, lower_green, upper_green)
    
    green_regions = cv2.bitwise_and(image, image, mask=mask_green)
    
    tot_pix = count_nonblack_np(image)
    color_pix = count_nonblack_np(green_regions)
    ratio = color_pix/tot_pix
    return ratio
def yellow(image):
    hsv_image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    lower_yellow = np.array([20, 100, 100])
    upper_yellow = np.array([30, 255, 255])
    
    # Create masks for the yellow range
    mask_yellow = cv2.inRange(hsv_image, lower_yellow, upper_yellow)
    
    yellow_regions = cv2.bitwise_and(image, image, mask=mask_yellow)
    
    tot_pix = count_nonblack_np(image)
    color_pix = count_nonblack_np(yellow_regions)
    ratio = color_pix/tot_pix
    
    return ratio

def white(image):
    hsv_image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)

    # Define the range for white color
    lower_white = np.array([0, 0, 200])
    upper_white = np.array([180, 55, 255])

    # Create masks for the white range
    mask_white = cv2.inRange(hsv_image, lower_white, upper_white)
    
    white_regions = cv2.bitwise_and(image, image, mask=mask_white)
    
    tot_pix = count_nonblack_np(image)
    color_pix = count_nonblack_np(white_regions)
    ratio = color_pix / tot_pix
    
    
    return ratio

In [11]:
import json
import numpy as np

# Specify the file name
file_name = "/Users/fadijemmali/Desktop/Tracker/data/all_frames_data3.json"
def convert_to_serializable(data):
   if isinstance(data, np.ndarray):
        return data.tolist()
   elif isinstance(data, list):
        return [convert_to_serializable(item) for item in data]
   elif isinstance(data, dict):
        return {key: convert_to_serializable(value) for key, value in data.items()}
   else:
        return data

# Convert all ndarrays to lists
serializable_data = convert_to_serializable(all_frames_data)

with open(file_name, 'w') as file:
    json.dump(serializable_data, file, indent=4)

In [12]:
import json

# Load from JSON file
file_name = "/Users/fadijemmali/Desktop/Tracker/data/all_frames_data3.json"
with open(file_name, 'r') as file:
    all_frames_data = json.load(file)

# Now loaded_data contains the data from the JSON file

data=all_frames_data.copy()
#data

In [13]:
import cv2
import numpy as np
from collections import defaultdict, Counter
%matplotlib inline
vid = cv2.VideoCapture('./data/video/red_white.mp4')

for frame in data:
    _, img = vid.read()
    if img is None:
        print('Completed')
        break
    for obj in frame:
        xmin, ymin, xmax, ymax = obj['box']
        xmin, ymin, xmax, ymax = int(xmin), int(ymin), int(xmax), int(ymax)
        crop_img = img[ymin:ymax, xmin:xmax]
        if crop_img.size > 0:
            class_name= obj['class']
            id=obj['id']
            if class_name=='person':
                if red(crop_img)>0.05 or white(crop_img)>0.05:
                    if red(crop_img)>white(crop_img):
                        obj['class']='team1'
                    else:
                        obj['class']='team2'
                    
    #persons=[obj for obj in frame if obj['class'] == 'person']
    #team1=[obj for obj in frame if obj['class'] == 'team1']
    #team2=[obj for obj in frame if obj['class'] == 'team2']
    #balls=[obj for obj in frame if obj['class'] == 'ball']
    
    last_id_per_class = {}
    color_dict = {
    "team1": (0, 0, 255),    # Red color for team1 (BGR format)
    "team2": (0, 255, 255),    # yellow color for team2
    "person": (255, 0, 255), # Purple color for person (mix of red and blue)
    "ball": (255, 255, 255)  # White color for ball
    }
vid.release()

vid = cv2.VideoCapture('./data/video/red_white.mp4')

codec = cv2.VideoWriter_fourcc(*'mp4v')
vid_fps =int(vid.get(cv2.CAP_PROP_FPS))
vid_width,vid_height = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)), int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
out = cv2.VideoWriter('./data/video/results-red_white.mp4', codec, vid_fps, (vid_width, vid_height))


labels_per_id = defaultdict(list)

for frame in data:  # Iterate over each frame
    for obj in frame:  # Iterate over each detection in the frame
        if obj['id'] == 20:
            obj['class']='person'
        player_id = obj['id']
        label = obj['class']
        labels_per_id[player_id].append(label)

# Dictionary to store the most common label for each ID
most_common_label_per_id = {}

for player_id, labels in labels_per_id.items():
    # Determine the most common label
    most_common_label = Counter(labels).most_common(1)[0][0]
    most_common_label_per_id[player_id] = most_common_label


new_id_mapping = defaultdict(lambda: defaultdict(int))

for old_id, label in most_common_label_per_id.items():
    # Increment the counter for the label and assign the new ID
    new_id_mapping[label]['counter'] += 1
    new_id_mapping[label][old_id] = new_id_mapping[label]['counter']

def get_new_id(old_id, label):
    """
    Given an old ID and its label, returns a new ID that is unique within the label category.
    """
    return new_id_mapping[label][old_id]

for frame in data:
    _, img = vid.read()
    if img is None:
        print('Completed')
        break
    for obj in frame:
        id = obj['id']
        # Assign the most common label to this detection
        obj['class'] = most_common_label_per_id[id]
        class_name = obj['class']
        obj['id']=get_new_id(obj['id'],obj['class'])
        bbox=obj['box']
        cv2.rectangle(img, (int(bbox[0]),int(bbox[1])), (int(bbox[2]),int(bbox[3])),color_dict[obj['class']], 2)
        #cv2.rectangle(img, (int(bbox[0]), int(bbox[1]-30)), (int(bbox[0])+(len(class_name)
        #            +len(str(id)))*17, int(bbox[1])),color, -1)
        if obj['class']=='person' or obj['class']=='ball'  :
            cv2.putText(img, obj['class'], (int(bbox[0]), int(bbox[1]-10)), 0, 0.4,
                    color_dict[obj['class']], 2)
        else:
            cv2.putText(img,'player' + str(obj['id']), (int(bbox[0]), int(bbox[1]-10)), 0, 0.4,
                    color_dict[obj['class']], 2)

    out.write(img)

    #if cv2.waitKey(1) == ord('q'):
    #    break

                
vid.release()
out.release()