In [1]:
import sys
sys.path.append('./faster_RCNN/')

from H_dataset_class import HDataset

import os
import torch
import pandas as pd

from PIL import Image
import numpy as np
import cv2
import pickle

In [2]:
METADATA_DIR = '/home/ubuntu/workspace/clubbertv/metadata/'
VIDEOS_DURATION = pickle.load(open(os.path.join(METADATA_DIR, 'videos-duration.pkl'), 'rb'))

VIDEO_NAME = 'clubber_video9.mp4'
START_SECOND = VIDEOS_DURATION[VIDEO_NAME][0]
END_SECOND = VIDEOS_DURATION[VIDEO_NAME][-1]

VIDEOS_DIR = '../raw_videos'
os.makedirs(VIDEOS_DIR, exist_ok=True)

VIDEO_PATH = os.path.join(VIDEOS_DIR, VIDEO_NAME)
S3_BUCKET = 's3://bf-editor-videos/clubbertv'

if not os.path.exists(VIDEO_PATH):
    download_res = os.system('aws s3 cp ' + os.path.join(S3_BUCKET, VIDEO_NAME)  + ' ' + VIDEO_PATH)
    if download_res != 0:
        raise Exception('Error while syncing data!!!')
    else:
        print('Video correctly downloaded')
        
FRAMES_DIR = 'frames_tmp'
os.makedirs(FRAMES_DIR, exist_ok=True)

Video correctly downloaded


In [3]:
vs = cv2.VideoCapture(VIDEO_PATH)
height = int(vs.get(cv2.CAP_PROP_FRAME_HEIGHT))
width = int(vs.get(cv2.CAP_PROP_FRAME_WIDTH))
fps = round(vs.get(cv2.CAP_PROP_FPS))

frame_id = 0
start_frame = START_SECOND * fps
end_frame = (START_SECOND+30*60) * fps

while True:
    
    grabbed, frame = vs.read()
    if not grabbed:
        break
        
    if frame_id < start_frame:
        frame_id += 1
        continue
    
    if frame_id > end_frame:
        break
        
    save_path = os.path.join(FRAMES_DIR, str(frame_id).zfill(8) + '.jpg')
    cv2.imwrite(save_path, frame)
    
    frame_id += 1      

vs.release()
os.remove(VIDEO_PATH)

In [4]:
from torchvision import transforms

def my_collate(batch):
    data = torch.stack(batch)
    return data

class MyDataset(torch.utils.data.Dataset):
    def __init__(self, data_df):
        self.img_paths = img_paths
        self.transforms = transforms.Compose([transforms.ToTensor()])
        
    def __getitem__(self, idx):
        img = Image.open(self.img_paths[idx]).convert("RGB")
        img = self.transforms(img)
        return img

    def __len__(self):
        return len(self.img_paths)

In [5]:
img_paths = sorted([os.path.join(FRAMES_DIR, f) for f in os.listdir(FRAMES_DIR) if f.endswith('.jpg')])
dataset = MyDataset(img_paths)
    
data_loader = torch.utils.data.DataLoader(dataset=dataset,
                                          batch_size=16,
                                          shuffle=False,
                                          num_workers=4,
                                          collate_fn=my_collate,
                                          pin_memory=True)

In [6]:
from torchvision import transforms
import numpy as np

@torch.no_grad()
def inference(model, data_loader, device):
    
    model.eval()
    model.to(device)
    
    preds = []
    for i, X_batch in enumerate(data_loader):
        X_batch_GPU = X_batch.to(device)
        preds += model(X_batch_GPU)
    
    return preds
        
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
checkpoint_path = 'trained_models/checkpoint-10.pt'

model = torch.load(checkpoint_path)
model.to(device)
preds = inference(model, data_loader, device)

In [7]:
frames = [int(img_path.split('/')[-1].split('.')[0]) for img_path in img_paths]
frames_list = []
points = []
scores_list = []

for frame, pred in zip(frames, preds):
    boxes = pred['boxes'].cpu().numpy()
    scores = pred['scores'].cpu().numpy()
    
    if len(boxes) != 0:
        for b, s in zip(boxes, scores):
            frames_list.append(frame)
            points.append(b)
            scores_list.append(s)

log_df = pd.DataFrame({'frame': frames_list, 'points': points, 'score': scores_list})
log_df = log_df[~log_df['points'].isnull()].reset_index().drop('index', axis=1)
log_df

Unnamed: 0,frame,points,score
0,19371,"[410.53696, 0.96802163, 467.43884, 39.34807]",0.097869
1,19405,"[411.137, 1.08263, 624.4006, 87.910484]",0.232400
2,19406,"[407.74384, 6.462352, 635.7931, 90.32554]",0.342061
3,19407,"[423.32516, 8.770724, 634.2957, 90.250336]",0.544703
4,19408,"[369.45715, 4.3966103, 630.5453, 94.00476]",0.210654
...,...,...,...
43703,64187,"[311.98077, 69.97229, 364.18713, 91.44499]",0.858484
43704,64188,"[330.27853, 70.132576, 381.55692, 91.05926]",0.850376
43705,64189,"[344.57468, 70.37331, 398.9219, 91.85774]",0.925933
43706,64190,"[367.70227, 70.188576, 418.01013, 91.3684]",0.071521


In [8]:
writer = cv2.VideoWriter('out.avi', cv2.VideoWriter_fourcc(*'XVID'), fps, (width, height), True)

for img_path in img_paths:
    img = cv2.imread(img_path)
    
    frame = int(img_path.split('/')[-1].split('.')[0])
    df = log_df[log_df['frame'] == frame]
    
    for _, row in df.iterrows():
        x1, y1, x2, y2 = row['points'].astype(int)
        cv2.rectangle(img, (x1, y1), (x2, y2), (89, 54, 22), 2)
        cv2.putText(img, str(round(row['score'], 2)), (x1+(x2-x1)//2, y1+(y2-y1)//2), cv2.FONT_HERSHEY_SIMPLEX, 2, (255,123,145), 2, cv2.LINE_AA)
        
    writer.write(img)    
    frame_id += 1
    os.remove(img_path)

vs.release()
writer.release()