In [None]:
!pip install decord

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting decord
  Downloading decord-0.6.0-py3-none-manylinux2010_x86_64.whl (13.6 MB)
[K     |████████████████████████████████| 13.6 MB 3.6 MB/s 
Installing collected packages: decord
Successfully installed decord-0.6.0


In [None]:
import cv2
import os
import subprocess
import pandas as pd
import numpy as np
from keras.preprocessing import image
from keras.applications import vgg16, DenseNet201, densenet
import matplotlib.pyplot as plt
from datetime import datetime
from decord import VideoReader
from decord import cpu, gpu

In [None]:
from google.colab import drive

drive.mount('/content/drive/')

Mounted at /content/drive/


In [None]:
###### DEFINES ######
SOCCERDB_PATH='/content/drive/MyDrive/TCC/SoccerDB/'
SOCCERNET_PATH='/content/drive/MyDrive/TCC/SoccerNet/'
VGG16_DESCRIPTORS_PATH='./VGG/'
FRAMES_DELTA=5

In [None]:
# Functions
def to_seconds(time):
    t = datetime.strptime(time, '%H:%M:%S').time()
    return t.hour*3600 + t.minute*60 + t.second

def get_video_duration(video_path):
    result = subprocess.run(["ffprobe", "-v", "error", "-show_entries",
                             "format=duration", "-of",
                             "default=noprint_wrappers=1:nokey=1", video_path],
        stdout=subprocess.PIPE,
        stderr=subprocess.STDOUT)
    return float(result.stdout)

def open_video(video_path):
    with open(video_path, 'rb') as f:
      vr = VideoReader(f, ctx=cpu(0))
    duration = get_video_duration(video_path)
    return vr, duration

def get_event_frames(frames_delta, video_cursor, video_duration, event_start_time, event_end_time):
    # Calculate frames index
    total_frames = len(video_cursor)
    event_start = to_seconds(event_start_time)
    event_end = to_seconds(event_end_time)
    start_frame = int((event_start / video_duration) * total_frames)
    ending_frame = int((event_end / video_duration) * total_frames)
    frames_index = np.arange(start_frame, ending_frame, frames_delta)

    # Get frames
    frames = video_cursor.get_batch(frames_index)
    return frames.asnumpy()

def get_video_path(SoccerDB2SoccerNet, video_name):
    SoccerNetName = (SoccerDB2SoccerNet[SoccerDB2SoccerNet['SoccerDB Name'] == video_name]['SoccerNet Name']).to_list()[0]
    if (SoccerNetName):
      return SOCCERNET_PATH + SoccerNetName
    else:
      return SOCCERDB_PATH + video_name

In [None]:
# Todos os eventos
seg_info = pd.read_csv('https://raw.githubusercontent.com/newsdata/SoccerDB/master/dataset/video_dataset/seg_info.csv')

# Mapeamento dos nomes - SoccerDB e SoccerNet
SoccerDB2SoccerNet = pd.read_csv('https://raw.githubusercontent.com/newsdata/SoccerDB/master/dataset/video_dataset/SoccerDB2SoccerNet.csv')
SoccerDB2SoccerNet['SoccerNet Name'] = SoccerDB2SoccerNet['SoccerNet Name'].replace(np.nan, '')
SoccerDB2SoccerNet=SoccerDB2SoccerNet.applymap(str)

seg_info

Unnamed: 0,seg_id,video_name,start_time,end_time,event_start_time,event_end_time,cls_id,highlight_cls
0,73ed8378dc1811e897b86c96cfde8f_1_100_origin_00...,73ed8378dc1811e897b86c96cfde8f_1.mkv,00:00:00,00:00:09,00:00:00,00:00:09,0,0
1,73ed8378dc1811e897b86c96cfde8f_1_100_origin_00...,73ed8378dc1811e897b86c96cfde8f_1.mkv,00:00:10,00:00:14,00:00:10,00:00:14,0,0
2,73ed8378dc1811e897b86c96cfde8f_1_100_origin_00...,73ed8378dc1811e897b86c96cfde8f_1.mkv,00:00:15,00:00:33,00:00:15,00:00:33,0,0
3,73ed8378dc1811e897b86c96cfde8f_1_100_origin_00...,73ed8378dc1811e897b86c96cfde8f_1.mkv,00:00:34,00:00:38,00:00:34,00:00:38,0,0
4,73ed8378dc1811e897b86c96cfde8f_1_100_origin_00...,73ed8378dc1811e897b86c96cfde8f_1.mkv,00:00:39,00:00:43,00:00:39,00:00:43,0,0
...,...,...,...,...,...,...,...,...
142570,ddce0e38d5ac11e8b2536c96cfde8f_100_origin_0000...,ddce0e38d5ac11e8b2536c96cfde8f.mp4,01:47:36,01:47:46,01:47:36,01:47:46,0,0
142571,ddce0e38d5ac11e8b2536c96cfde8f_100_origin_0000...,ddce0e38d5ac11e8b2536c96cfde8f.mp4,01:47:47,01:48:02,01:47:47,01:48:02,0,0
142572,ddce0e38d5ac11e8b2536c96cfde8f_100_origin_0000...,ddce0e38d5ac11e8b2536c96cfde8f.mp4,01:48:03,01:48:04,01:48:03,01:48:04,0,0
142573,ddce0e38d5ac11e8b2536c96cfde8f_115-8a0b0941-dd...,ddce0e38d5ac11e8b2536c96cfde8f.mp4,01:48:02,01:48:11,01:48:05,01:48:08,9,0


In [None]:
# VGG16 MODEL
vgg16_features = vgg16.VGG16(weights='imagenet',
                             include_top=False,
                             pooling='max'
)

DESCRIPTOR_DIMENSION=512

def describe_event_vgg16(vgg16_model, images):
  resized_imgs = []
  for image in images:
    resized_imgs.append(cv2.resize(image, (224, 224)))
  vgg_images = np.array(resized_imgs)
  img = vgg16.preprocess_input(vgg_images)
  features = vgg16_features.predict(img)
  return features

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5


In [None]:
# Para cada segmento, pega os frames do evento e gera descritores
seg_idx = 0
current_video = ''
video_cursor = None
video_duration = 0
for seg in seg_info.to_numpy():
  # Baixa/remove os videos
  if seg[1] != current_video:
    video_path = get_video_path(SoccerDB2SoccerNet, seg[1])
    video_name = video_path.rsplit('/', 1)[-1]
    print (video_path)
    if current_video != '':
      !rm *'.mkv'
      !rm *'.mp4'
      del video_cursor
    !cp {video_path.replace(' ', '\ ')} .
    try:  
      video_cursor, video_duration = open_video('./' + video_name)
    except:
      print('Erro ao abrir:', video_name)
      continue
    current_video = seg[1]

  # Obtem frames e gera descritores
  try:
    frames = get_event_frames(FRAMES_DELTA, video_cursor, video_duration, seg[4], seg[5])
    event_descriptors = describe_event_vgg16(vgg16_features, frames)
  except:
      print('Erro ao processar:', seg[0])
      continue
  df = pd.DataFrame(data=event_descriptors)
  descriptor_path = VGG16_DESCRIPTORS_PATH + seg[1] + '/'
  if not os.path.exists(descriptor_path):
    os.mkdir(descriptor_path)
  df.to_csv(descriptor_path + seg[0] + '.csv')
  print(seg_idx)
  seg_idx += 1

In [None]:
!zip -r ./VGG56.zip /content/VGG

In [None]:
!mv './VGG56.zip' '/content/drive/MyDrive/TCC'

In [None]:
print(event_descriptors.shape)

(45, 1920)


In [None]:
def gen_y(label_str):
  # print(label_str)
  out = pd.Series({'label_%d' % (i) : 0 for i in range(11)})
  labels = str(label_str).split(' ')
  for l in labels:
    out['label_%s' % (l)] = 1
  # out['old_label'] = label_str
  return out

y = seg_info['cls_id'].apply(gen_y)

In [None]:
y

Unnamed: 0,label_0,label_1,label_2,label_3,label_4,label_5,label_6,label_7,label_8,label_9,label_10
0,1,0,0,0,0,0,0,0,0,0,0
1,1,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,0
3,1,0,0,0,0,0,0,0,0,0,0
4,1,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...
142570,1,0,0,0,0,0,0,0,0,0,0
142571,1,0,0,0,0,0,0,0,0,0,0
142572,1,0,0,0,0,0,0,0,0,0,0
142573,0,0,0,0,0,0,0,0,0,1,0


In [None]:
cls = 'label_10'
print(sum(y[cls]))
print(sum(100*y[cls])/142757)

2066
1.4472144973626513
