# Generating new tensors

This notebook shows how to implement a new segmentation strategy using:
- video info data (as MatchesInfo.csv from Common module)
- target events DataFrame (as ProcessedEvents.csv from Common module)
- matches .mp4 videos (as Videos from Common module).

In [16]:
# === Paths ===

video_info_path = "../Common/MatchesInfo.csv"
videos_path = "../Common/Videos/"
events_path = "../Common/ProcessedEvents.csv"
output_path = "../Common/Tensors/new_strategy.h5"

videos_path = "/home/gabriel/Documentos/GitHub/POC_II/match_videos"


## Creating new Start and End timestamps foreach event 

On this example an event End will be set as it's Start timestamp (provided by 'eventSec' atribute + match period start) + 1.5.

In [6]:
import pandas as pd
import numpy as np

def define_events_endtimes(df_events):
    df_events = df_events.sort_values(by=['matchId','matchPeriod','eventSec'],ascending=True).reset_index(drop=True).copy()
    df_events['eventSecEnd'] = df_events['eventSec'] + 1.5
    return df_events

def collect_all_timestamps(df_events, video_info_path):

    # Collect all event timestamps for all matches given an events dataframe and video_info dataframe

    df_video_info = pd.read_csv(video_info_path)
    matches = np.unique(df_events["matchId"].values)
    timestamps = []
    for match_id in matches:
        df_match_events = df_events[df_events["matchId"] == int(match_id)]
        video_info = df_video_info[df_video_info["matchId"] == int(match_id)]
        first_half_start = video_info["1st_half_start"].values[0]
        second_half_start = video_info["2nd_half_start"].values[0]
        
        for index,row in df_match_events.iterrows():
            event_id = row["id"]
            period = row["matchPeriod"]
            if period == "1H":
                start_timestamp = first_half_start + row["eventSec"]
                end_timestamp = first_half_start + row["eventSecEnd"] 
                
            elif period == "2H":
                start_timestamp = second_half_start + row["eventSec"]
                end_timestamp = second_half_start + row["eventSecEnd"] 
                
            else:
                continue
            timestamps.append((match_id,event_id,period,start_timestamp,end_timestamp))
    return timestamps

df_events = pd.read_csv(events_path)
df_events = define_events_endtimes(df_events)
timestamps = collect_all_timestamps(df_events, video_info_path)



## Defining h5py file management functions

In [8]:
def create_h5_file(h5_path):
    with h5py.File(h5_path, 'w') as f:
        f.create_group("data")
    print(f"File {h5_path} created successfully.")


def add_video_to_h5(h5_path, video_id, video_data):
    
    with h5py.File(h5_path, 'a') as f:
        data_group = f["data"]
        key = str(video_id)

        
        if key in data_group:
            
            del data_group[key]  

        data_group.create_dataset(
            key,
            data=video_data.astype('uint8'),
            compression="gzip",
            compression_opts=4
        )


## Creating Tensors with new timestamps

In [None]:
from tqdm import tqdm
import cv2
import h5py
import os

def extract_segment_frames(video_path, start_time, end_time, num_frames=13, size=(252, 252)):
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        raise IOError(f"Unable to open video at {video_path}")

    fps = cap.get(cv2.CAP_PROP_FPS)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    start_frame = int(start_time * fps)
    end_frame = int(end_time * fps)
    end_frame = min(end_frame, total_frames - 1)

    frame_indices = np.linspace(start_frame, end_frame, num_frames, dtype=int)

    frames = []
    for idx in frame_indices:
        cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
        ret, frame = cap.read()
        if not ret:
            continue

        frame = cv2.resize(frame, size)
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)  
        frames.append(frame)

    cap.release()

    return np.array(frames, dtype=np.uint8)

create_h5_file(output_path)

for match_id, event_id, period, start_ts, end_ts in tqdm(timestamps, desc="Processing events"):
    video_file = os.path.join(videos_path, f"{str(match_id)}.mp4")
    segment_frames = extract_segment_frames(video_file, start_ts, end_ts)
    add_video_to_h5(output_path, event_id, segment_frames)
    

