In [1]:
import numpy as np
import pandas as pd
import itertools
import glob
import os
import cv2
from sklearn.metrics import accuracy_score
from tqdm.auto import tqdm
from multiprocessing import Pool
from matplotlib import pyplot as plt
from sklearn.cluster import KMeans
import random
import warnings

from cuml.cluster import KMeans as KMeans
warnings.simplefilter('ignore')

In [2]:
from external_lib.deep_sort_pytorch.utils.parser import get_config
from external_lib.deep_sort_pytorch.deep_sort import DeepSort

from external_lib.NFLlib.score import NFLAssignmentScorer, check_submission
from external_lib.NFLlib.features import add_track_features


In [3]:
base_dir = "/work/data/input/nfl-health-and-safety-helmet-assignment"
debug = True

## function

In [4]:
def compute_color_for_id(label):
    """
    Simple function that adds fixed color depending on the id
    """
    palette = (2 ** 11 - 1, 2 ** 15 - 1, 2 ** 20 - 1)

    color = [int((p * (label ** 2 - label + 1)) % 255) for p in palette]
    return tuple(color)

def plot_one_box(x, im, color=None, label=None, line_thickness=3):
    # Plots one bounding box on image 'im' using OpenCV
    assert im.data.contiguous, 'Image not contiguous. Apply np.ascontiguousarray(im) to plot_on_box() input image.'
    tl = line_thickness or round(0.002 * (im.shape[0] + im.shape[1]) / 2) + 1  # line/font thickness
    color = color or [random.randint(0, 255) for _ in range(3)]
    c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
    cv2.rectangle(im, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA)
    if label: 
        tf = max(tl - 1, 1)  # font thickness
        t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
        c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
        cv2.rectangle(im, c1, c2, color, -1, cv2.LINE_AA)  # filled
        cv2.putText(im, label, (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA)
    return im



In [5]:



def deepsort_helmets(video_data,
                     video_dir,
                     deepsort_config='deepsort.yaml',
                     plot=False,
                     plot_frames=[]):
    
    # Setup Deepsort
    cfg = get_config()
    cfg.merge_from_file(deepsort_config)    
    deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT,
                        max_dist=cfg.DEEPSORT.MAX_DIST,
                        min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE,
                        nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP,
                        max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
                        max_age=cfg.DEEPSORT.MAX_AGE,
                        n_init=cfg.DEEPSORT.N_INIT,
                        nn_budget=cfg.DEEPSORT.NN_BUDGET,
                        use_cuda=True)
    
    # Run through frames.
    video_data = video_data.sort_values('frame').reset_index(drop=True)
    ds = []
    for frame, d in tqdm(video_data.groupby(['frame']), total=video_data['frame'].nunique()):
        d['x'] = (d['left'] + round(d['width'] / 2))
        d['y'] = (d['top'] + round(d['height'] / 2))

        xywhs = d[['x','y','width','height']].values

        cap = cv2.VideoCapture(f'{video_dir}/{myvideo}.mp4')
        cap.set(cv2.CAP_PROP_POS_FRAMES, frame-1) # optional
        
        ##シーケンス
        success, image = cap.read()
        
        # 画像の色の順番を変更（BGR to RGB）
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        confs = np.ones([len(d),])
        clss =  np.zeros([len(d),])
        
        outputs = deepsort.update(xywhs, confs, clss, image)
        
        #if (frame %10 == 0):
            #display(outputs)
            #break;
                        
        if (plot and frame > cfg.DEEPSORT.N_INIT) or (frame in plot_frames):
            for j, (output, conf) in enumerate(zip(outputs, confs)): 

                bboxes = output[0:4]
                id = output[4]
                cls = output[5]

                c = int(cls)  # integer class
                label = f'{id}'
                color = compute_color_for_id(id)
                im = plot_one_box(bboxes, image, label=label, color=color, line_thickness=2)
            fig, ax = plt.subplots(figsize=(15, 10))
            video_frame = d['video_frame'].values[0]
            ax.set_title(f'Deepsort labels: {video_frame}')
            plt.imshow(im)
            plt.show()

        preds_df = pd.DataFrame(outputs, columns=['left','top','right','bottom','deepsort_cluster','class'])
            
        if len(preds_df) > 0:
            # TODO Fix this messy merge
            d = pd.merge_asof(d.sort_values(['left','top']),
                              preds_df[['left','top','deepsort_cluster']] \
                              .sort_values(['left','top']), on='left', suffixes=('','_deepsort'),
                              direction='nearest')
        ds.append(d)
    dout = pd.concat(ds)
    return dout


def add_deepsort_label_col(out):
    # Find the top occuring label for each deepsort_cluster
    sortlabel_map = out.groupby('deepsort_cluster')['label'].value_counts() \
        .sort_values(ascending=False).to_frame() \
        .rename(columns={'label':'label_count'}) \
        .reset_index() \
        .groupby(['deepsort_cluster']) \
        .first()['label'].to_dict()
    # Find the # of times that label appears for the deepsort_cluster.
    sortlabelcount_map = out.groupby('deepsort_cluster')['label'].value_counts() \
        .sort_values(ascending=False).to_frame() \
        .rename(columns={'label':'label_count'}) \
        .reset_index() \
        .groupby(['deepsort_cluster']) \
        .first()['label_count'].to_dict()
    
    out['label_deepsort'] = out['deepsort_cluster'].map(sortlabel_map)
    out['label_count_deepsort'] = out['deepsort_cluster'].map(sortlabelcount_map)

    return out


def score_vs_deepsort(myvideo, out, labels):
    # Score the base predictions compared to the deepsort postprocessed predictions.
    myvideo_mp4 = myvideo + '.mp4'
    labels_video = labels.query('video == @myvideo_mp4')
    scorer = NFLAssignmentScorer(labels_video)
    out_deduped = out.groupby(['video_frame','label']).first().reset_index()
    base_video_score = scorer.score(out_deduped)
    
    out_preds = out.drop('label', axis=1).rename(columns={'label_deepsort':'label'})
    print(out_preds.shape)
    out_preds = out_preds.groupby(['video_frame','label']).first().reset_index()
    print(out_preds.shape)
    deepsort_video_score = scorer.score(out_preds)
    print(f'{base_video_score:0.5f} before --> {deepsort_video_score:0.5f} deepsort')

# Add video and frame columns to submission.
submission_df = submission_df_v2;

submission_df['video'] = submission_df['video_frame'].str.split('_').str[:3].str.join('_')
submission_df['frame'] = submission_df['video_frame'].str.split('_').str[-1].astype('int')

labels = pd.read_csv(f"{base_dir}/train_labels.csv")

if debug:
    video_dir = f"{base_dir}/train/"
else:
    video_dir = f"{base_dir}/test/"

    
    

# Loop through test videos and apply. If in debug mode show the score change.
outs = []
for myvideo, video_data in tqdm(submission_df.groupby('video'), total=submission_df['video'].nunique()):
    #print(myvideo)
    #print(f'==== {myvideo} ====')
    
    
    if debug:
        # Plot deepsort labels when in debug mode.
        #out = deepsort_helmets(video_data, video_dir, plot_frames=[ 5, 10,43])
        out = deepsort_helmets(video_data, video_dir)        
    else:
        out = deepsort_helmets(video_data, video_dir)        
    out = add_deepsort_label_col(out)
    outs.append(out)
    if debug:        
        score_vs_deepsort(myvideo, out, labels)
        
    
submission_deepsort = pd.concat(outs).copy()

## 画素値集計

def deepsort_helmets(video_data,
                     video_dir,
                     deepsort_config='deepsort.yaml',
                     plot=False,
                     plot_frames=[]):
    
    # Setup Deepsort
    cfg = get_config()
    cfg.merge_from_file(deepsort_config)    
    deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT,
                        max_dist=cfg.DEEPSORT.MAX_DIST,
                        min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE,
                        nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP,
                        max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
                        max_age=cfg.DEEPSORT.MAX_AGE,
                        n_init=cfg.DEEPSORT.N_INIT,
                        nn_budget=cfg.DEEPSORT.NN_BUDGET,
                        use_cuda=True)
    
    # Run through frames.
    video_data = video_data.sort_values('frame').reset_index(drop=True)
    ds = []
    for frame, d in tqdm(video_data.groupby(['frame']), total=video_data['frame'].nunique()):
        d['x'] = (d['left'] + round(d['width'] / 2))
        d['y'] = (d['top'] + round(d['height'] / 2))

        xywhs = d[['x','y','width','height']].values

        cap = cv2.VideoCapture(f'{video_dir}/{myvideo}.mp4')
        cap.set(cv2.CAP_PROP_POS_FRAMES, frame-1) # optional
        
        ##シーケンス
        success, image = cap.read()
        
        # 画像の色の順番を変更（BGR to RGB）
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        confs = np.ones([len(d),])
        clss =  np.zeros([len(d),])
        
        outputs = deepsort.update(xywhs, confs, clss, image)
        
        
        if (frame %10 == 0):
            pass;
            #break;
                        
        
        if (plot and frame > cfg.DEEPSORT.N_INIT) or (frame in plot_frames):
            for j, (output, conf) in enumerate(zip(outputs, confs)): 

                bboxes = output[0:4]
                id = output[4]
                cls = output[5]

                c = int(cls)  # integer class
                label = f'{id}'
                color = compute_color_for_id(id)
                print(bboxes)
                im = plot_one_box(bboxes, image, label=label, color=color, line_thickness=2)
                print(bbox);
                color = []
                
                
                
                
            fig, ax = plt.subplots(figsize=(15, 10))
            video_frame = d['video_frame'].values[0]
            ax.set_title(f'Deepsort labels: {video_frame}')
            plt.imshow(im)
            plt.show()

        preds_df = pd.DataFrame(outputs, columns=['left','top','right','bottom','deepsort_cluster','class'])
            
        if len(preds_df) > 0:
            # TODO Fix this messy merge
            d = pd.merge_asof(d.sort_values(['left','top']),
                              preds_df[['left','top','deepsort_cluster']] \
                              .sort_values(['left','top']), on='left', suffixes=('','_deepsort'),
                              direction='nearest')
        ds.append(d)
    dout = pd.concat(ds)
    return dout




In [6]:
from lib.noglobal import noglobal

submission_df_v2 = pd.read_csv("/work/data/input/nfl-health-and-safety-helmet-assignment/train_baseline_helmets.csv")
myvideo = "57594_000923_Sideline"
sub  = submission_df_v2[submission_df_v2["video_frame"].str.contains(myvideo)].reset_index()
sub['frame'] = sub['video_frame'].str.split('_').str[-1].astype('int')
#myvideo = "57596_002686"

In [7]:
@noglobal()
def extract_pixel(img,box):
    left = box[0]
    right = box[2]
    
    upper = box[1]
    lower = box[3]
    
    #left = int(box[0] + (box[2] - box[0])*2/6)
    #right = int(box[0] + (box[2] - box[0])*4/6)
    
    #upper = int(box[1] + (box[3] - box[1])*2/6)
    #lower = int(box[1] + (box[3] - box[1])*4/6)
    
    ret_list = []
    for x in range(left,right):
        for y in range(upper,lower):
            ret_list.append(img[y,x])
            
    
    return np.mean(ret_list,axis=0).tolist()
    

## arg
myvideo = myvideo
video_data = sub
deepsort_config='deepsort.yaml'
plot = True
video_dir = "/work/data/input/nfl-health-and-safety-helmet-assignment/train"
plot_frames=[4]

##

# Setup Deepsort
cfg = get_config()
cfg.merge_from_file(deepsort_config)    
deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT,
                    max_dist=cfg.DEEPSORT.MAX_DIST,
                    min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE,
                    nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP,
                    max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
                    max_age=cfg.DEEPSORT.MAX_AGE,
                    n_init=cfg.DEEPSORT.N_INIT,
                    nn_budget=cfg.DEEPSORT.NN_BUDGET,
                    use_cuda=True)

# Run through frames.
video_data = video_data.sort_values('frame').reset_index(drop=True)
ds = []
for frame, d in tqdm(video_data.groupby(['frame']), total=video_data['frame'].nunique()):
    
    if(frame < 279):
        continue;
    
    d['x'] = (d['left'] + round(d['width'] / 2))
    d['y'] = (d['top'] + round(d['height'] / 2))

    xywhs = d[['x','y','width','height']].values

    cap = cv2.VideoCapture(f'{video_dir}/{myvideo}.mp4')
    cap.set(cv2.CAP_PROP_POS_FRAMES, frame-1) # optional

    ##シーケンス
    success, image = cap.read()

    # 画像の色の順番を変更（BGR to RGB）
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    confs = np.ones([len(d),])
    clss =  np.zeros([len(d),])

    
    
        
    outputs = deepsort.update(xywhs, confs, clss, image)
    if (plot and frame > cfg.DEEPSORT.N_INIT) or (frame in plot_frames):        
        
        
        
        for j, (output, conf) in enumerate(zip(outputs, confs)): 

            bboxes = output[0:4]
            id = output[4]
            cls = output[5]
            #print(bboxes)

            
            color = []


       
        
        #for j, (output, team) in enumerate(zip(outputs, kmeans.labels_)):             
        #    bboxes = output[0:4]
        #    image = plot_one_box(bboxes, image, label=str(team), color=color, line_thickness=2)    
                    
    preds_df = pd.DataFrame(outputs, columns=['left','top','right','bottom','deepsort_cluster','class'])
    preds_df["frame"] = frame
    
    if (len(preds_df) > 0):
        #print(preds_df.shape)
        pixel_value_list = [];
        for index,row in preds_df.iterrows():
            bboxes = row[["left","top","right","bottom"]].values                
            pixel_value_list.append(extract_pixel(image,bboxes))
            
    
        pixel_value_list_numpy = np.array(pixel_value_list)
        
        preds_df["average_pixel_r"] = pixel_value_list_numpy[:,0]
        preds_df["average_pixel_g"] = pixel_value_list_numpy[:,1]
        preds_df["average_pixel_b"] = pixel_value_list_numpy[:,2]
    #preds_df["helmet_color_list"] = pixel_value_list
    #if len(preds_df) > 0:
    #  TODO Fix this messy merge
    #    d = pd.merge_asof(d.sort_values(['left','top']),
    #                      preds_df[['left','top','deepsort_cluster']] \
    #                      .sort_values(['left','top']), on='left', suffixes=('','_deepsort'),
    #                      direction='nearest')
    ds.append(preds_df)


dout = pd.concat(ds).reset_index()

preds_df
#preds_df["average_pixel"] = np.array(preds_df["average_pixel"].values.tolist())

kmeans = KMeans(n_clusters=2, max_iter=1000, init='scalable-k-means++')
pixel_value_list_numpy = np.array(dout["average_pixel"].values.tolist())
b = kmeans.fit(pixel_value_list_numpy)        
dout["team"] = kmeans.labels_
display(dout)

for frame, d in tqdm(dout.groupby(['frame']), total=dout['frame'].nunique()):
    
    xywhs = d[['left','top','right','bottom']].values
    labels = d["team"].values

    cap = cv2.VideoCapture(f'{video_dir}/{myvideo}.mp4')
    cap.set(cv2.CAP_PROP_POS_FRAMES, frame-1) # optional

    ##シーケンス
    success, image = cap.read()

    # 画像の色の順番を変更（BGR to RGB）
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    
    
    if (plot and frame > cfg.DEEPSORT.N_INIT):
        
        for j, (output, label) in enumerate(zip(xywhs, labels)): 

            bboxes = output[0:4]
            label = f'{label}'
            
            color = compute_color_for_id(int(label))
            if (label == "0"):
                im = plot_one_box(bboxes, image, label=str(label), color=[0,255,0], line_thickness=2)    
            elif (label == "1"):
                im = plot_one_box(bboxes, image, label=str(label), color=[255,0,0], line_thickness=2)    
            else:
                raise Exception("asdf");
                
            
            
        ig, ax = plt.subplots(figsize=(15, 10))
        video_frame = d['frame'].values[0]
        ax.set_title(f'Deepsort labels: {video_frame}')
        plt.imshow(im)
        plt.show()
            
            
        
    
    

## Deepsortを使わない場合

In [8]:
## arg
myvideo = myvideo
video_data = sub
plot = True
video_dir = "/work/data/input/nfl-health-and-safety-helmet-assignment/train"
plot_frames=[4]


# Run through frames.
video_data = video_data.sort_values('frame').reset_index(drop=True)
ds = []
for frame, d in tqdm(video_data.groupby(['frame']), total=video_data['frame'].nunique()):
            
    d['x'] = (d['left'] + round(d['width'] / 2))
    d['y'] = (d['top'] + round(d['height'] / 2))
    d["right"] = (d['left'] + round(d['width']))
    d["bottom"] = (d['top'] + round(d['height']))
    
    xywhs = d[['x','y','width','height']].values

    cap = cv2.VideoCapture(f'{video_dir}/{myvideo}.mp4')
    cap.set(cv2.CAP_PROP_POS_FRAMES, frame-1) # optional

    ##シーケンス
    success, image = cap.read()

    # 画像の色の順番を変更（BGR to RGB）
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    
        
    pixel_value_list=[]
    for index,row in d.iterrows():
        bboxes = row[["left","top","right","bottom"]].values                
        pixel_value_list.append(extract_pixel(image,bboxes))
   
    pixel_value_list_numpy = np.array(pixel_value_list)
        
    d["average_pixel_r"] = pixel_value_list_numpy[:,0]
    d["average_pixel_g"] = pixel_value_list_numpy[:,1]
    d["average_pixel_b"] = pixel_value_list_numpy[:,2]
    d["frame"] = frame
    ds.append(d)

  0%|          | 0/509 [00:00<?, ?it/s]

In [9]:
dout = pd.concat(ds).reset_index()
dout = dout[dout["conf"]>0.2]

In [34]:
dout

Unnamed: 0,index,left,top,right,bottom,deepsort_cluster,class,frame,average_pixel_r,average_pixel_g,average_pixel_b
0,0,396,111,413,129,2,0,1,81.027778,107.611111,62.083333
1,1,700,256,715,272,4,0,1,155.040000,172.680000,146.000000
2,2,748,219,767,237,7,0,1,90.722222,115.722222,68.722222
3,3,51,264,69,281,8,0,1,96.694444,110.750000,69.444444
4,4,510,333,530,351,9,0,1,88.238095,115.023810,67.238095
...,...,...,...,...,...,...,...,...,...,...,...
11702,18,686,222,704,242,139,0,1,82.023810,107.309524,63.642857
11703,19,383,134,400,150,150,0,1,79.866667,108.200000,62.866667
11704,20,728,220,746,237,152,0,1,89.500000,108.277778,71.777778
11705,21,760,262,775,279,187,0,1,158.733333,121.700000,103.000000


CPU times: user 4.65 s, sys: 1.21 s, total: 5.86 s
Wall time: 753 ms


In [10]:
%%time
kmeans = KMeans(n_clusters=2, max_iter=100000, init='scalable-k-means++')
pixel_value_list_numpy = dout[["average_pixel_r","average_pixel_g","average_pixel_b"]].values
b = kmeans.fit(pixel_value_list_numpy)        
dout["team"] = kmeans.labels_

CPU times: user 797 ms, sys: 375 ms, total: 1.17 s
Wall time: 1.17 s


In [11]:
for frame, d in tqdm(dout.groupby(['frame']), total=dout['frame'].nunique()):
    
    xywhs = d[['left','top','right','bottom']].values
    labels = d["team"].values

    cap = cv2.VideoCapture(f'{video_dir}/{myvideo}.mp4')
    cap.set(cv2.CAP_PROP_POS_FRAMES, frame-1) # optional

    ##シーケンス
    success, image = cap.read()

    # 画像の色の順番を変更（BGR to RGB）
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    
    
    if (False and frame > cfg.DEEPSORT.N_INIT):
        
        for j, (output, label) in enumerate(zip(xywhs, labels)): 

            bboxes = output[0:4]
            label = f'{label}'
            
            color = compute_color_for_id(int(label))
            if (label == "0"):
                im = plot_one_box(bboxes, image, label=str(label), color=[0,255,0], line_thickness=2)    
            elif (label == "1"):
                im = plot_one_box(bboxes, image, label=str(label), color=[255,0,0], line_thickness=2)    
            else:
                raise Exception("asdf");
                
            
            
        #ig, ax = plt.subplots(figsize=(15, 10))
        #video_frame = d['frame'].values[0]
        #ax.set_title(f'Deepsort labels: {video_frame}')
        #plt.imshow(im)
        #plt.show()
            

  0%|          | 0/509 [00:00<?, ?it/s]