In [1]:
# !pip install segmentation-models-pytorch==0.2.1

In [2]:
# !pip install ultralytics

In [1]:
import torch, cv2, os, glob, time
import albumentations as albu
import matplotlib.pyplot as plt
from ultralytics import YOLO
import segmentation_models_pytorch as smp
import numpy as np
import pandas as pd

In [2]:
os.environ["CUDA_VISIBLE_DEVICES"]="1"

In [3]:
def get_test_augmentation():
    return albu.Compose([
        albu.Resize(height=IMG_HEIGHT, width=IMG_WIDTH),
    ])

def to_tensor(x, **kwargs):
    return x.transpose(2, 0, 1).astype('float32')
    
def get_preprocessing(preprocessing_fn=None):
    """Construct preprocessing transform    
    Args:
        preprocessing_fn (callable): data normalization function 
            (can be specific for each pretrained neural network)
    Return:
        transform: albumentations.Compose
    """   
    _transform = []
        
    if preprocessing_fn:
        _transform.append(albu.Lambda(image=preprocessing_fn))

    _transform.append(albu.Lambda(name='to_tenzor', image=to_tensor, mask=to_tensor))
        
    return albu.Compose(_transform)

In [4]:
def corner_contours(shape, coeff, type=None):    
   
    weight = int(shape[1]/2)
    height = int(shape[0]/1.8)
    
    if type == 'warning':
        
        left_sight = 0
        right_sight = int(shape[1])
    else:        
        left_sight = int(weight - int(0.65*weight))
        right_sight = int(weight + int(0.65*weight))
    
    corner = int(weight*coeff)
    left_corner = left_sight + corner
    right_corner = right_sight - corner

    point1 = [left_corner, height]
    point2 = [left_sight, int(shape[0])]
    point3 = [right_sight,int(shape[0])]
    point4 = [right_corner, height]
    
    contours = np.array([point1, point2, point3, point4], dtype=np.int32)

    return contours

def danger_area(predict_frame):
    
    shape_image = predict_frame.shape
    pts = corner_contours(shape_image, coeff=0.5)

    mask = np.zeros(shape_image, dtype=np.uint8)
    ignore_mask_color = (255)
    cv2.fillConvexPoly(mask, pts, ignore_mask_color)
    masked_image = cv2.bitwise_and(predict_frame, mask)
    
    return masked_image

def warning_area(image_frame):
    shape_image = image_frame.shape[:2]
    pts = corner_contours(shape_image, coeff=0.85, type='warning')

    mask = np.zeros(image_frame.shape, dtype=np.uint8)
    channel_count = image_frame.shape[2]  # i.e. 3 or 4 depending on your image
    ignore_mask_color = (255,)*channel_count
    cv2.fillConvexPoly(mask, pts, ignore_mask_color)
    masked_image = cv2.bitwise_and(image_frame, mask)
    binary_mask = (masked_image[:,:,0] > 0).astype(int)
    
    return binary_mask

In [5]:
def predict(image, shape):
    '''
        Выход модели, вовзращает тензоры маски рельс(1) или пути(0).
    '''
    
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    img = augmentor(image=image)['image']
    img = preprocessor(image=img)['image']
    
    x_tensor = torch.from_numpy(img).to(DEVICE).unsqueeze(0)
    pr_mask = model.predict(x_tensor)
    
    pr_mask = pr_mask.cpu().squeeze(0).numpy().round()
    answer = (np.sum(pr_mask, axis=0) > 0).astype('uint8')
    answer_res = cv2.resize(answer, shape)
    
    return answer_res

In [6]:
DEVICE = 'cuda:0'
IMG_WIDTH = 704
IMG_HEIGHT = 704

ENCODER = 'resnet18'
ENCODER_WEIGHTS = 'imagenet'

preprocessing_fn = smp.encoders.get_preprocessing_fn(ENCODER, ENCODER_WEIGHTS)    
augmentor = get_test_augmentation()
preprocessor = get_preprocessing(preprocessing_fn)

In [7]:
model_path = '/home/jovyan/work/SEGMENT/weights/DeepLabV3Plus_resnet18_e100_bestloss.pth'

model = torch.load(model_path, DEVICE)
_ = model.eval()

In [8]:
model_yolo = YOLO('yolov8m.pt')

class_list =[0, 1, 2, 3, 5, 7]

In [20]:
def create_df(image, name, df, count_status, count_alarm, count_frame, status, alarm_status):
    
    shape_image = image.shape[1], image.shape[0]
    
    if count_status == 50:
        status = False
        
    if (count_alarm == 50) & (alarm_status == True):
        
        alarm_status = False
        label = 3
        df.loc[len(df)] = name, label, count_frame
        
    predict_yolo = model_yolo.predict(source=image, classes = class_list, conf=0.6, verbose=False)
    bbox = predict_yolo[0].boxes.data.detach().cpu().numpy()
    bbox_data = bbox[bbox[:,-1] != 7][:,:-2].astype(int)
    
    if len(bbox_data) != 0:
        
        predict_segment = predict(image, (shape_image))
        binary_danger_area = danger_area(predict_segment)
        
        for i in bbox_data:
            
            test_area = binary_danger_area[i[1]:i[3], i[0]:i[2]]
            summary_check = test_area.sum()
            
            if (summary_check > 20) & (status == False):
                                
                label = 1            
                status = True
                count_status = -1
                count_alarm -= 1
                df.loc[len(df)] = name, label, count_frame                
                filename = 'label_sub/violation{}_{}'.format(name, count_frame) + '.jpg'
                image_result_ = predict_yolo[0].plot()
                cv2.imwrite(filename, image_result_)
                break
                
            elif summary_check > 10:
                count_status = -1
                count_alarm -= 1
                break
                
            else:
                                
                binary_test_area = warning_area(frame)
                test_area = binary_test_area[i[1]:i[3], i[0]:i[2]]
                summary_check = test_area.sum()
                
                thresholder = int(0.2 * test_area.shape[0] * test_area.shape[1])

                if (summary_check > thresholder) & (alarm_status == False):
                    label = 2
                    alarm_status = True
                    count_alarm = -1
                    count_status -= 1
                    df.loc[len(df)] = name, label, count_frame
                    break
                    
                elif summary_check > thresholder:
                    count_alarm = -1
                    count_status -= 1
                    break
                else:
                    continue
                    
    count_alarm += 1
    count_status += 1                    
                    
    return df, count_status, count_frame, count_alarm, status, alarm_status

In [62]:
df = pd.DataFrame(columns=['name', 'type_violation', 'frame'])
time_list = []
duration_list = []
for file_path in glob.glob('/home/jovyan/work/hakaton_train_dataset/test_video/*.mp4'):
    
    cap = cv2.VideoCapture(file_path)
    fps = cap.get(cv2.CAP_PROP_FPS)   
    
    count_status = 0
    count_frame = 0
    count_alarm = 0
    status = False
    alarm_status = False
    
    ret = True
    name = file_path.split(os.sep)[-1]
    start = time.time()
    while ret:
        ret, frame = cap.read()    
        if ret:        
            df, count_status, count_frame, count_alarm, status, alarm_status = create_df(frame, name, df, count_status, count_alarm, count_frame, status, alarm_status)
            count_frame += 1
            
    end = time.time()
    time_list.append(end-start)
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))    
    duration = frame_count/fps
    duration_list.append(duration)
    print('next')
    break

next


In [22]:
fps = 25

df['timestamps'] = df['frame'] / fps
df['timestamps'] = pd.to_datetime(df.timestamps, unit='s').dt.strftime('%M:%S')

df.head(15)

Unnamed: 0,name,type_violation,frame,timestamps
0,danger.mp4,2,0,00:00
1,danger.mp4,1,156,00:06
2,danger.mp4,3,203,00:08
3,danger.mp4,2,222,00:08


In [17]:
df_duration = pd.DataFrame(columns=['name', 'calculate_duration', 'real_duration'])

for file_path in glob.glob('/home/jovyan/work/hakaton_train_dataset/test_video/*.mp4'):
    name = file_path.split(os.sep)[-1]
    index_ = len(df_duration)
    df_duration.loc[index_] = name, time_list[index_], duration_list[index_]

df_duration['calculate_duration'] = pd.to_datetime(df_duration.calculate_duration, unit='s').dt.strftime('%M:%S')
df_duration['real_duration'] = pd.to_datetime(df_duration.real_duration, unit='s').dt.strftime('%M:%S')

In [20]:
df_duration.head()

Unnamed: 0,name,calculate_duration,real_duration
0,00_08_36.mp4,03:47,07:06
1,00_20_12.mp4,03:08,04:45
2,00_21_23.mp4,03:21,05:22
3,00_26_43.mp4,04:27,07:48
4,00_41_35.mp4,03:20,05:15


In [22]:
df.to_csv('submit_all_video_endV0.csv', index=False)
df_duration.to_csv('duration_all_video_endV0.csv', index=False)