In [1]:
import random, os
import numpy as np
import torch
import glob

def seed_everything(seed: int):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True
    
seed_everything(42)

In [2]:
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
device

device(type='cuda')

In [3]:
import pandas as pd

train_df = pd.read_csv('train/cleaned_train.csv')
train_df['date_orig'] = pd.to_datetime(train_df['date'], format='%Y-%m-%d %H:%M:%S')
train_df['date_int'] = train_df['date_orig'].apply(lambda x: x.value// 10 ** 9)
train_df.head()

Unnamed: 0,id,date,employee,action,date_orig,date_int
0,1256,2022-05-24 08:08:36,2,15.0,2022-05-24 08:08:36,1653379716
1,1257,2022-05-24 08:08:37,2,4.0,2022-05-24 08:08:37,1653379717
2,1258,2022-05-24 08:12:13,3,15.0,2022-05-24 08:12:13,1653379933
3,1259,2022-05-24 08:12:25,3,2.0,2022-05-24 08:12:25,1653379945
4,1260,2022-05-24 08:12:42,3,12.0,2022-05-24 08:12:42,1653379962


In [4]:
parsed_time_train1_df=pd.read_csv('parsed_time_train1.txt',sep='\t',names=['frame','date'])
parsed_time_train1_df['date_orig'] = pd.to_datetime(parsed_time_train1_df['date'], format='%m-%d-%Y %a %H:%M:%S', errors='coerce')
parsed_time_train1_df = parsed_time_train1_df.dropna(subset=['date_orig'])
parsed_time_train1_df['date'] = parsed_time_train1_df['date_orig'].apply(lambda x: x.strftime('%Y-%m-%d %H:%M:%S'))
parsed_time_train1_df['folder']='train1'

parsed_time_df=parsed_time_train1_df.copy()

In [5]:
parsed_time_df.head()

Unnamed: 0,frame,date,date_orig,folder
0,1,2022-05-24 08:08:33,2022-05-24 08:08:33,train1
1,2,2022-05-24 08:08:34,2022-05-24 08:08:34,train1
2,3,2022-05-24 08:08:34,2022-05-24 08:08:34,train1
3,4,2022-05-24 08:08:34,2022-05-24 08:08:34,train1
4,5,2022-05-24 08:08:34,2022-05-24 08:08:34,train1


In [6]:
parsed_time_train1_df=pd.read_csv('parsed_time_train2.txt',sep='\t',names=['frame','date'])
parsed_time_train1_df['date_orig'] = pd.to_datetime(parsed_time_train1_df['date'], format='%m-%d-%Y %a %H:%M:%S', errors='coerce')
parsed_time_train1_df = parsed_time_train1_df.dropna(subset=['date_orig'])
parsed_time_train1_df['date'] = parsed_time_train1_df['date_orig'].apply(lambda x: x.strftime('%Y-%m-%d %H:%M:%S'))
parsed_time_train1_df['folder']='train2'
parsed_time_train1_df['frame']=-parsed_time_train1_df['frame']
parsed_time_train1_df.head()

Unnamed: 0,frame,date,date_orig,folder
0,-1,2022-05-26 08:02:47,2022-05-26 08:02:47,train2
1,-2,2022-05-26 08:02:47,2022-05-26 08:02:47,train2
2,-3,2022-05-26 08:02:47,2022-05-26 08:02:47,train2
3,-4,2022-05-26 08:02:47,2022-05-26 08:02:47,train2
4,-5,2022-05-26 08:02:47,2022-05-26 08:02:47,train2


In [7]:
parsed_time_df = pd.concat([parsed_time_df, parsed_time_train1_df])
parsed_time_df['frame_ind']=parsed_time_df['frame']
parsed_time_df=parsed_time_df.set_index('frame_ind')
parsed_time_df.head()

Unnamed: 0_level_0,frame,date,date_orig,folder
frame_ind,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,1,2022-05-24 08:08:33,2022-05-24 08:08:33,train1
2,2,2022-05-24 08:08:34,2022-05-24 08:08:34,train1
3,3,2022-05-24 08:08:34,2022-05-24 08:08:34,train1
4,4,2022-05-24 08:08:34,2022-05-24 08:08:34,train1
5,5,2022-05-24 08:08:34,2022-05-24 08:08:34,train1


In [8]:
parsed_time_df['frame'][-1]

-1

In [9]:
parsed_time_df[parsed_time_df['folder']=='train1'].shape

(65964, 4)

In [10]:
parsed_time_df[parsed_time_df['folder']=='train2'].shape

(99763, 4)

In [11]:
df = pd.merge(train_df, parsed_time_df, on=['date'], how="outer", indicator=True)
# df = df[df['_merge'] == 'left_only']
df = df[df['_merge'] == 'both']
df[['employee','action','frame']] = df[['employee','action','frame']].astype(int)
df.head()

Unnamed: 0,id,date,employee,action,date_orig_x,date_int,frame,date_orig_y,folder,_merge
0,1256.0,2022-05-24 08:08:36,2,15,2022-05-24 08:08:36,1653380000.0,12,2022-05-24 08:08:36,train1,both
1,1256.0,2022-05-24 08:08:36,2,15,2022-05-24 08:08:36,1653380000.0,13,2022-05-24 08:08:36,train1,both
2,1256.0,2022-05-24 08:08:36,2,15,2022-05-24 08:08:36,1653380000.0,14,2022-05-24 08:08:36,train1,both
3,1256.0,2022-05-24 08:08:36,2,15,2022-05-24 08:08:36,1653380000.0,15,2022-05-24 08:08:36,train1,both
4,1256.0,2022-05-24 08:08:36,2,15,2022-05-24 08:08:36,1653380000.0,16,2022-05-24 08:08:36,train1,both


In [12]:
front_door_borders=[300,100,1050,1350] # row_top,col_top,row_bottom,col_bottom
front_door_ids=[1,2,7,8,9]

back_door_borders=[100,1000,400,1400] # row_top,col_top,row_bottom,col_bottom
back_door_ids=[3,4,5,6]

In [13]:
people_detect_model = torch.hub.load('ultralytics/yolov5','custom', 'yolov5x6') #'ultralytics/yolov5', 'custom', 'yolov5s-cls.pt'
people_detect_model.classes = [0]

Using cache found in /root/.cache/torch/hub/ultralytics_yolov5_master
YOLOv5 🚀 2022-8-10 Python-3.8.10 torch-1.13.0.dev20220701+cu116 CUDA:0 (NVIDIA GeForce RTX 3090, 24268MiB)

Fusing layers... 
YOLOv5x6 summary: 574 layers, 140730220 parameters, 0 gradients
Adding AutoShape... 


In [14]:
from pathlib import Path
employee_front_frames={}
employee_back_frames={}
for i in range(12):
    Path(f'employee_dataset_new/{i}').mkdir(parents=True, exist_ok=True)
    employee_front_frames[i]=set()
    employee_back_frames[i]=set()


In [15]:
df[df['frame']==-6]

Unnamed: 0,id,date,employee,action,date_orig_x,date_int,frame,date_orig_y,folder,_merge
6227,2516.0,2022-05-26 08:02:48,6,15,2022-05-26 08:02:48,1653552000.0,-6,2022-05-26 08:02:48,train2,both


In [16]:
parsed_time_df[parsed_time_df['frame']==-6]

Unnamed: 0_level_0,frame,date,date_orig,folder
frame_ind,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
-6,-6,2022-05-26 08:02:48,2022-05-26 08:02:48,train2


In [17]:
for ind in df.index:
    action=int(df['action'][ind])
    employee=int(df['employee'][ind])
    frame=int(df['frame'][ind])
    date_orig=df['date_orig_x'][ind]
    folder=df['folder'][ind]
    
    
    candidates_frame=[frame]
    k= -1 if frame<0 else 1
    
    frame_range=[]
    if action>0 and action<10:
        frame_range=np.array(list(range(abs(frame)-5,abs(frame)))) 
    elif action>10 and action<20:
        frame_range=np.array(list(range(abs(frame),abs(frame)+5))) 
    
    frame_range=frame_range*k
        
    
#     print(frame_range)
    for i in frame_range:
        dt=parsed_time_df.loc[i]['date_orig']
        if date_orig<dt:
            delta=dt-date_orig
        else:
            delta=date_orig-dt
        
        if delta.total_seconds()<4:
            if action in front_door_ids or action-10 in front_door_ids:
                employee_front_frames[employee].add(i)
            else:
                employee_back_frames[employee].add(i)
                
        
        

In [18]:
print('front')
for k in employee_front_frames:
    print(k,len(employee_front_frames[k]))
    
print('back')
for k in employee_back_frames:
    print(k,len(employee_back_frames[k]))

front
0 1823
1 36
2 496
3 1823
4 117
5 1530
6 144
7 90
8 153
9 0
10 976
11 3149
back
0 2937
1 457
2 1716
3 2929
4 784
5 2617
6 2464
7 206
8 180
9 0
10 2348
11 804


In [19]:
parsed_time_df.filter(items = list(employee_back_frames[0]), axis=0).head()

Unnamed: 0_level_0,frame,date,date_orig,folder
frame_ind,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
-8189,-8189,2022-05-26 10:26:15,2022-05-26 10:26:15,train2
-8188,-8188,2022-05-26 10:26:15,2022-05-26 10:26:15,train2
-8187,-8187,2022-05-26 10:26:15,2022-05-26 10:26:15,train2
-8186,-8186,2022-05-26 10:26:15,2022-05-26 10:26:15,train2
-8185,-8185,2022-05-26 10:26:14,2022-05-26 10:26:14,train2


In [20]:
parsed_time_df.filter(items = list(employee_front_frames[8]), axis=0).head()

Unnamed: 0_level_0,frame,date,date_orig,folder
frame_ind,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
-34282,-34282,2022-05-26 20:25:13,2022-05-26 20:25:13,train2
-34281,-34281,2022-05-26 20:25:13,2022-05-26 20:25:13,train2
-34280,-34280,2022-05-26 20:25:13,2022-05-26 20:25:13,train2
-34279,-34279,2022-05-26 20:25:13,2022-05-26 20:25:13,train2
-34278,-34278,2022-05-26 20:25:12,2022-05-26 20:25:12,train2


In [21]:
import cv2
from PIL import Image


def detect_employee_and_save(frame,crop_box,result_path):
    if frame<0:
        frame_path=f"/home/hacaton_irkutsk/train/train2/out{-frame}.png"
    else:
        frame_path=f"/home/hacaton_irkutsk/train/train1/out{frame}.png"


    img = cv2.imread(frame_path, cv2.IMREAD_UNCHANGED)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    newsize = (1920,1080)
    resized = cv2.resize(img, newsize, interpolation = cv2.INTER_AREA)
    crop = resized[crop_box[0]:crop_box[2],crop_box[1]:crop_box[3]]

    results = people_detect_model(crop)
    if results.xyxy[0].shape == torch.Size([1, 6]):
        find_region=results.xyxy[0][0].cpu().numpy()
        bbox=np.array(find_region[:4])
        bbox = bbox.astype(int)
        bbox[1]+=crop_box[0]
        bbox[3]+=crop_box[0]
        bbox[0]+=crop_box[1]
        bbox[2]+=crop_box[1]

        delta_y=0
        delta_x=0
#         delta=(bbox[3]-bbox[1])-(bbox[2]-bbox[0])
#         if delta>0:
#             delta_x=int(delta/2)
#         else:
#             delta_y=int(delta/2)

        crop=img[bbox[1]-delta_y:bbox[3]+delta_y, bbox[0]-delta_x:bbox[2]+delta_x]
        im = Image.fromarray(crop)
        im.save(result_path)
        return True
    return False
    
for k in range(0,12):
    empl_path=f'employee_dataset_new/{k}'
    ind=0
    
    print(k)
    print('back')
    for frame in employee_back_frames[k]:
        result_path= f'{empl_path}/{ind}.png'
        is_saved = detect_employee_and_save(frame,back_door_borders,result_path)
        if is_saved:
#             print(ind)
            ind+=1
            
    print('front')
    
    for frame in employee_front_frames[k]:
        result_path= f'{empl_path}/{ind}.png'
        is_saved = detect_employee_and_save(frame,front_door_borders,result_path)
        if is_saved:
#             print(ind)
            ind+=1
        
#     break
            
        
            
        
            

0
back
front
1
back
front
2
back
front
3
back
front
4
back
front
5
back
front
6
back
front
7
back
front
8
back
front
9
back
front
10
back
front
11
back
front
