In [1]:
import pandas as pd
import numpy as np

from external_lib.NFLlib.score import NFLAssignmentScorer, check_submission
from external_lib.NFLlib.features import add_track_features
from tqdm.notebook import tqdm

import warnings
warnings.simplefilter('ignore')

## ファイル読み込み

In [2]:
label_path = "/work/data/input/nfl-health-and-safety-helmet-assignment/train_labels.csv"
helmets_path = "/work/data/input/nfl-health-and-safety-helmet-assignment/train_baseline_helmets.csv"
tracking_path =  "/work/data/input/nfl-health-and-safety-helmet-assignment/train_player_tracking.csv"
sub_path =  "/work/analysis/baseline/baseline_rotate_xy/submission-baseline_upgrade.csv"


sub = pd.read_csv(sub_path)
labels = pd.read_csv(label_path)
helmets = pd.read_csv(helmets_path)
trackings = pd.read_csv(tracking_path)

### ファイル抽出

In [3]:
target_video =  "58095_004022_Sideline"
ext_sub = sub[sub["video_frame"].str.contains(target_video)]
ext_labels = labels[labels["video_frame"].str.contains(target_video)]

In [4]:
def evaluate(sub,labels):
    scorer = NFLAssignmentScorer(labels)
    baseline_score = scorer.score(sub)
    return baseline_score
    

In [5]:
evaluate(ext_sub,ext_labels)

0.05314715359828142

## Deepsort

### import及び関数定義

In [6]:
import numpy as np
import pandas as pd
import itertools
import glob
import os
import cv2
from sklearn.metrics import accuracy_score
from tqdm.auto import tqdm
from multiprocessing import Pool
from matplotlib import pyplot as plt
from sklearn.cluster import KMeans
import random

from external_lib.deep_sort_pytorch.utils.parser import get_config
from external_lib.deep_sort_pytorch.deep_sort import DeepSort

In [7]:
def compute_color_for_id(label):
    """
    Simple function that adds fixed color depending on the id
    """
    palette = (2 ** 11 - 1, 2 ** 15 - 1, 2 ** 20 - 1)

    color = [int((p * (label ** 2 - label + 1)) % 255) for p in palette]
    return tuple(color)

def plot_one_box(x, im, color=None, label=None, line_thickness=3):
    # Plots one bounding box on image 'im' using OpenCV
    assert im.data.contiguous, 'Image not contiguous. Apply np.ascontiguousarray(im) to plot_on_box() input image.'
    tl = line_thickness or round(0.002 * (im.shape[0] + im.shape[1]) / 2) + 1  # line/font thickness
    color = color or [random.randint(0, 255) for _ in range(3)]
    c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
    cv2.rectangle(im, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA)
    if label: 
        tf = max(tl - 1, 1)  # font thickness
        t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
        c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
        cv2.rectangle(im, c1, c2, color, -1, cv2.LINE_AA)  # filled
        cv2.putText(im, label, (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA)
    return im

In [8]:

def deepsort_helmets(video_data,
                     video_dir,
                     deepsort_config='/work/config/deepsort.yaml',
                     plot=False,
                     plot_frames=[]):
    
    # Setup Deepsort
    cfg = get_config()
    cfg.merge_from_file(deepsort_config)    
    deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT,
                        max_dist=cfg.DEEPSORT.MAX_DIST,
                        min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE,
                        nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP,
                        max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
                        max_age=cfg.DEEPSORT.MAX_AGE,
                        n_init=cfg.DEEPSORT.N_INIT,
                        nn_budget=cfg.DEEPSORT.NN_BUDGET,
                        use_cuda=True)
    
    # Run through frames.
    video_data = video_data.sort_values('frame').reset_index(drop=True)
    ds = []
    for frame, d in tqdm(video_data.groupby(['frame']), total=video_data['frame'].nunique()):
        d['x'] = (d['left'] + round(d['width'] / 2))
        d['y'] = (d['top'] + round(d['height'] / 2))

        xywhs = d[['x','y','width','height']].values

        cap = cv2.VideoCapture(f'{video_dir}/{myvideo}.mp4')
        cap.set(cv2.CAP_PROP_POS_FRAMES, frame-1) # optional
        
        ##シーケンス
        success, image = cap.read()
        
        # 画像の色の順番を変更（BGR to RGB）
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        confs = np.ones([len(d),])
        clss =  np.zeros([len(d),])
        
        outputs = deepsort.update(xywhs, confs, clss, image)

 
        preds_df = pd.DataFrame(outputs, columns=['left','top','right','bottom','deepsort_cluster','class'])
        if len(preds_df) > 0:
            # TODO Fix this messy merge
            d = pd.merge_asof(d.sort_values(['left','top']),
                              preds_df[['left','top','deepsort_cluster']] \
                              .sort_values(['left','top']), on='left', suffixes=('','_deepsort'),
                              direction='nearest')
        ds.append(d)
    dout = pd.concat(ds)
    return dout


def add_deepsort_label_col(out):
    # Find the top occuring label for each deepsort_cluster
    sortlabel_map = out.groupby('deepsort_cluster')['label'].value_counts() \
        .sort_values(ascending=False).to_frame() \
        .rename(columns={'label':'label_count'}) \
        .reset_index() \
        .groupby(['deepsort_cluster']) \
        .first()['label'].to_dict()
    # Find the # of times that label appears for the deepsort_cluster.
    sortlabelcount_map = out.groupby('deepsort_cluster')['label'].value_counts() \
        .sort_values(ascending=False).to_frame() \
        .rename(columns={'label':'label_count'}) \
        .reset_index() \
        .groupby(['deepsort_cluster']) \
        .first()['label_count'].to_dict()
    
    out['label_deepsort'] = out['deepsort_cluster'].map(sortlabel_map)
    out['label_count_deepsort'] = out['deepsort_cluster'].map(sortlabelcount_map)

    return out


def score_vs_deepsort(myvideo, out, labels):
    # Score the base predictions compared to the deepsort postprocessed predictions.
    myvideo_mp4 = myvideo + '.mp4'
    labels_video = labels.query('video == @myvideo_mp4')
    scorer = NFLAssignmentScorer(labels_video)
    out_deduped = out.groupby(['video_frame','label']).first().reset_index()
    base_video_score = scorer.score(out_deduped)
    
    out_preds = out.drop('label', axis=1).rename(columns={'label_deepsort':'label'})
    print(out_preds.shape)
    out_preds = out_preds.groupby(['video_frame','label']).first().reset_index()
    print(out_preds.shape)
    deepsort_video_score = scorer.score(out_preds)
    print(f'{base_video_score:0.5f} before --> {deepsort_video_score:0.5f} deepsort')

### main関数

In [9]:
# Add video and frame columns to submission.
submission_df = ext_sub;
debug = True

base_dir = "/work/data/input/nfl-health-and-safety-helmet-assignment"

submission_df['video'] = submission_df['video_frame'].str.split('_').str[:3].str.join('_')
submission_df['frame'] = submission_df['video_frame'].str.split('_').str[-1].astype('int')

labels = pd.read_csv(f"{base_dir}/train_labels.csv")

if debug:
    video_dir = f"{base_dir}/train/"
else:
    video_dir = f"{base_dir}/test/"

    
    

# Loop through test videos and apply. If in debug mode show the score change.
outs = []
for myvideo, video_data in tqdm(submission_df.groupby('video'), total=submission_df['video'].nunique()):
    #print(myvideo)
    #print(f'==== {myvideo} ====')
    
    
    if debug:
        # Plot deepsort labels when in debug mode.
        out = deepsort_helmets(video_data, video_dir, plot_frames=[10, 150, 250])
    else:
        out = deepsort_helmets(video_data, video_dir)        
    out = add_deepsort_label_col(out)
    
    outs.append(out)
    if debug:        
        score_vs_deepsort(myvideo, out, labels)
        
    break;
        
    
submission_deepsort = pd.concat(outs).copy()

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/482 [00:00<?, ?it/s]

(9201, 14)
(6709, 14)
0.05315 before --> 0.05942 deepsort


In [10]:
ext_deepsort = submission_deepsort.drop('label', axis=1).rename(columns={'label_deepsort':'label'}).groupby(['video_frame','label']).first().reset_index()

## deepsortとオリジナルの推定結果の結合
deepsortのmain関数は一部レコードを削除するため、削除したものはdeepsort実施前のものを追加する。<br>
しかしその場合、同一フレーム内の異なるヘルメットに同じプレイヤーが割り当てられる。<br>
それを避けるため、deepsortの方を信頼する。

### function difinication

In [11]:
from external_lib.postprocess.merge_deepsort_original import merge_base_and_deepsort
            

In [12]:
Union_sub = merge_base_and_deepsort(ext_sub,ext_deepsort,erase_duplicated=False,random_allocation=True);
#Union_sub = merge_base_and_deepsort(ext_sub,ext_deepsort,erase_duplicated=False,random_allocation=True);
Union_sub = Union_sub.reset_index(drop=True)

In [13]:
evaluate(Union_sub,ext_labels)

0.06083780880773362

## 時間的要素による後処理


瞬間移動をなくす

### 事前準備

columns = Union_sub.columns

Union_sub["x"] = Union_sub["left"]  + Union_sub["width"]/2
Union_sub["y"] = Union_sub["top"]  + Union_sub["height"]/2
Union_sub["xy"] = [ np.array([x,y]) for x,y in zip(Union_sub["x"],Union_sub["y"])] 

Union_sub_pivot_df =  Union_sub.pivot(index = "frame",columns="label",values = "xy")

### 前後関係の処理

prev_Union_sub_pivot_df = Union_sub_pivot_df.shift(1)
dif_pivot_df = Union_sub_pivot_df -  prev_Union_sub_pivot_df

player_cols = []
distance_cols = [];
exchangeflag_cols = [];

for col in dif_pivot_df.columns:                
    dif_pivot_df[col+"_distance_bet_prev"] = dif_pivot_df[col].apply(lambda x:np.linalg.norm(x) if type(x) == np.ndarray else -1).fillna(-1)    
    dif_pivot_df[col + "_exchangeflag"] = dif_pivot_df[col+"_distance_bet_prev"]>15
    
    exchangeflag_cols.append(col + "_exchangeflag")
    distance_cols.append(col+"_distance_bet_prev")
    
    
dif_pivot_df["isOK"] = dif_pivot_df[exchangeflag_cols].sum(axis=1)
    

added_union_sub_pivot_df = Union_sub_pivot_df.add_suffix("_current_pos")
added_prev_Union_sub_pivot_df = prev_Union_sub_pivot_df.add_suffix("_prev_pos")
dif_pivot_df = pd.concat([dif_pivot_df,added_union_sub_pivot_df,added_prev_Union_sub_pivot_df],axis=1)

%%time
def find_replaced_label():
    
    
    pd_list = []
    before_series = None;
    change_hashmap_list =[]
    for index,each_series in dif_pivot_df.iterrows():        

        Leap_counts = each_series["isOK"]

        if (Leap_counts < 2):
            pass;
        elif(Leap_counts >= 2):
            change_hashmap = exchange_player_allocation(each_series,before_series,False);
            change_hashmap_list.append([index,change_hashmap])
        else:
            raise Exception("unexpected error has occurred");

        before_series = each_series;

    
    






#### グラフ出力

change_target = change_hashmap_list.copy()
pd_list = [];

for key,each_df in Union_sub.copy().groupby("frame"):
    ind = key+1;
    
    if ( (len(change_target) > 0) and change_target[0][0] ==  ind ):
        s = change_target.pop(0)
        
        modified = each_df.replace({"label":s[1]})
        modified["label"] = modified["label"].str.replace("_______","")
        pd_list.append(modified)
    else:
        pd_list.append(each_df)
        
    
    
modifiued_df = pd.concat(pd_list)


evaluate(modifiued_df,ext_labels)

### 関数定義

In [14]:
from external_lib.postprocess.remove_leap import remove_leap
sub=Union_sub.copy();
#sub = ext_sub.copy();
for i in range(1):
    s = remove_leap(sub,parameter=20)
    s._prepare();    
    sub = s.execute()
    
    #print(evaluate(sub,ext_labels))


In [15]:
sub["video"]

0       58095_004022_Sideline
1       58095_004022_Sideline
2       58095_004022_Sideline
3       58095_004022_Sideline
4       58095_004022_Sideline
                ...          
7995    58095_004022_Sideline
7996    58095_004022_Sideline
7997    58095_004022_Sideline
7998    58095_004022_Sideline
7999    58095_004022_Sideline
Name: video, Length: 9201, dtype: object

### K-means

In [16]:
@noglobal()
def extract_pixel(img,box):
    #left = box[0]
    #right = box[2]
    
    #upper = box[1]
    #lower = box[3]
    
    left = int(box[0] + (box[2] - box[0])*2/6)
    right = int(box[0] + (box[2] - box[0])*4/6)
    
    upper = int(box[1] + (box[3] - box[1])*2/6)
    lower = int(box[1] + (box[3] - box[1])*4/6)
    
    ret_list = []
    for x in range(left,right):
        for y in range(upper,lower):
            ret_list.append(img[y,x])
            
    
    return np.mean(ret_list,axis=0).tolist()
    

NameError: name 'noglobal' is not defined

In [None]:
from lib.noglobal import noglobal

@noglobal()
def add_team_label(sub):
    video_dir = "/work/data/input/nfl-health-and-safety-helmet-assignment/train"
    myvideo = "57583_000082_Endzone"
    video_data = sub.sort_values('frame').reset_index(drop=True)


    ds = []
    cap = cv2.VideoCapture(f'{video_dir}/{myvideo}.mp4')
    for frame, d in tqdm(video_data.groupby(['frame']), total=video_data['frame'].nunique()):
            
        d['x'] = (d['left'] + round(d['width'] / 2))
        d['y'] = (d['top'] + round(d['height'] / 2))
        d["right"] = (d['left'] + round(d['width']))
        d["bottom"] = (d['top'] + round(d['height']))

        xywhs = d[['x','y','width','height']].values

        
        cap.set(cv2.CAP_PROP_POS_FRAMES, frame-1) # optional

        ##シーケンス
        success, image = cap.read()

        # 画像の色の順番を変更（BGR to RGB）
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)


        pixel_value_list=[]
        for index,row in d.iterrows():
            bboxes = row[["left","top","right","bottom"]].values                
            pixel_value_list.append(extract_pixel(image,bboxes))

        pixel_value_list_numpy = np.array(pixel_value_list)

        d["average_pixel_r"] = pixel_value_list_numpy[:,0]
        d["average_pixel_g"] = pixel_value_list_numpy[:,1]
        d["average_pixel_b"] = pixel_value_list_numpy[:,2]
        d["frame"] = frame
        ds.append(d)
        
    
    
    ret_df = pd.concat(ds);
    pixel_value_list_numpy = ret_df[["average_pixel_r","average_pixel_g","average_pixel_b"]].values
    
    kmeans = KMeans(n_clusters=2, max_iter=100000, init='scalable-k-means++')    
    b = kmeans.fit(pixel_value_list_numpy)
    ret_df["team"] = kmeans.labels_
    
    return ret_df
    

In [None]:
a = add_team_label(sub)