In [1]:
# IMPORTS


import numpy as np
import pandas as pd
import av

### VIDEOFRAME INFO EXTRACTOR

In [2]:
def frametimes(video_path):

    # Open the video file
    container = av.open(video_path)

    # Get the video stream (usually index 0 for the first video stream)
    video_stream = container.streams.video[0]


    # Retrieve video info
    total_frames = video_stream.frames  # Number of frames
    duration_ts = video_stream.duration  # Total duration in time units (PTS)
    time_base = video_stream.time_base  # Time base to convert duration_ts to seconds
    duration_seconds = float(duration_ts * time_base) # Duration of the video in seconds

    # Print the results
    print(f"Total Frames: {total_frames}")
    print(f"Duration (PTS units): {duration_ts}")
    print(f"Time Base: {time_base}")
    print(f"Duration (seconds): {duration_seconds} \n")


    # Storage for frame intervals
    frame_intervals = []


    # Decode video frames
    for i, frame in enumerate(container.decode(video=0)):
        # TODO: If the video stream is corrupted, it could cause infinitely repeated decoding attempts. Might need if check in case it's unstable during testing

        # Update frame info
        start_time = float(frame.pts * time_base)
        frame_intervals.append(start_time)
    
        if i < 5:
            print(f"Frame {i + 1}: starttime {start_time}, PTS {frame.pts}, DTS {frame.dts}")

    end_time = duration_seconds
    frame_intervals.append(end_time)

    # print("")
    # print(frame_intervals)

    return total_frames, frame_intervals

'\ndef frametimes(video_path):\n\n    # Open the video file\n    container = av.open(video_path)\n\n    # Get the video stream (usually index 0 for the first video stream)\n    video_stream = container.streams.video[0]\n\n\n    # Retrieve video info\n    total_frames = video_stream.frames  # Number of frames\n    duration_ts = video_stream.duration  # Total duration in time units (PTS)\n    time_base = video_stream.time_base  # Time base to convert duration_ts to seconds\n    duration_seconds = float(duration_ts * time_base) # Duration of the video in seconds\n\n    # Print the results\n    print(f"Total Frames: {total_frames}")\n    print(f"Duration (PTS units): {duration_ts}")\n    print(f"Time Base: {time_base}")\n    print(f"Duration (seconds): {duration_seconds} \n")\n\n\n    # Storage for frame intervals\n    frame_intervals = []\n\n\n    # Decode video frames\n    for i, frame in enumerate(container.decode(video=0)):\n        # TODO: If the video stream is corrupted, it could ca

### GET IMU DATAFRAME

In [4]:
def getIMU(dataframe_path):
    # Load the CSV file into a pandas DataFrame
    df = pd.read_csv(dataframe_path)


    # Display the top and bottom rows
    print("Top rows:")
    print(df.head())

    print("\nBottom rows:")
    print(df.tail())

    return df

### ADD FRAME_INDEX TO DATAFRAME

In [5]:
def add_frame_index(df, total_frames, frame_intervals):

    # Make a list of all frame indices
    frame_labels = list(range(1, total_frames + 1))

    if "TIMESTAMP" not in df.columns:
        raise ValueError("The dataframe must contain a 'TIMESTAMP' column.")

    # Match FRAME_INDEX to TIMESTAMP
    df["FRAME_INDEX"] = pd.cut(df["TIMESTAMP"], bins=frame_intervals, labels=frame_labels, include_lowest=True)

    return df

### CONVERT LABELED FRAMES TO DATAFRAME 

In [6]:
def dict_to_labeledframes(dict_list):
    
    df_label = pd.DataFrame(dict_list)

    return df_label

### MAKE A DICTIONARY FOR LABELS

In [7]:
# Function that stores frame indices to a label
def sort_frametolabels(df_label):

    label_dict = {}
    
    for _, row in df_label.iterrows():
        frame_indices = list(range(row["frame_start"], row["frame_end"] + 1))
        label = row["label"]


        if label in label_dict:
            label_dict[label].extend(frame_indices)
        else:
            label_dict[label] = frame_indices

    return label_dict

### ADD LABEL TO DATAFRAME

In [8]:
# Function to assign labels
def assign_label(frame_index, label_dict):
    for label, frame_indices in label_dict.items():
        if frame_index in frame_indices:
            return label
    return None  # For values not in the label_dict


# Match LABEL to FRAME_INDEX
def match_labeltoframe(df, label_dict):

    if "FRAME_INDEX" not in df.columns:
        raise ValueError("The dataframe must contain a 'FRAME_INDEX' column.")

    df["LABEL"] = df["FRAME_INDEX"].apply(lambda frame_index: assign_label(frame_index, label_dict))

    return df

### FUNCTION TO RUN EVERYTHING

In [9]:
def runner(video_path, dataframe, dataframe_labeled_frames):

    total_frames, frame_intervals = frametimes(video_path)
    dataframe = add_frame_index(dataframe, total_frames, frame_intervals)
    label_dict = sort_frametolabels(dataframe_labeled_frames)
    dataframe = match_labeltoframe(dataframe, label_dict)
    dataframe.dropna(subset=['FRAME_INDEX'], inplace=True) # Remove data outside video duration (before the first or after the last frame)

    return dataframe

In [10]:
# Path to the video
video_path = "data/GH010041.MP4"

# Path to DataFrame
dataframe_path = "data/IMU_GH010041.csv"
dataframe = getIMU(dataframe_path)
print("")

# Path to labeled DataFrame
#dict_list = [{'label': 'opstappen', 'frame_start': 1, 'frame_end': 1550}, {'label': 'links', 'frame_start': 29225, 'frame_end': 29350}, {'label': 'rechtdoor', 'frame_start': 25416, 'frame_end': 25883}, {'label': 'links', 'frame_start': 25884, 'frame_end': 25937}, {'label': 'rechtdoor', 'frame_start': 28847, 'frame_end': 29224}, {'label': 'rechtdoor', 'frame_start': 28509, 'frame_end': 28728}, {'label': 'rechtdoor', 'frame_start': 29651, 'frame_end': 31357}, {'label': 'rechtdoor', 'frame_start': 1873, 'frame_end': 3765}, {'label': 'rechtdoor', 'frame_start': 7693, 'frame_end': 9644}, {'label': 'rechtdoor', 'frame_start': 5025, 'frame_end': 5993}, {'label': 'links', 'frame_start': 1767, 'frame_end': 1872}, {'label': 'rechtdoor', 'frame_start': 3898, 'frame_end': 4874}, {'label': 'rechts', 'frame_start': 26007, 'frame_end': 26052}, {'label': 'rechts', 'frame_start': 27293, 'frame_end': 27493}, {'label': 'rechts', 'frame_start': 17205, 'frame_end': 17352}, {'label': 'rechts', 'frame_start': 15396, 'frame_end': 15567}, {'label': 'rechts', 'frame_start': 9645, 'frame_end': 9735}, {'label': 'links', 'frame_start': 31358, 'frame_end': 31447}, {'label': 'rechts', 'frame_start': 10508, 'frame_end': 10667}]
#dataframe_labeled_frames = dict_to_labeledframes(dict_list)
#print(dataframe_labeled_frames)
#print("")

# Path to labeled DataFrame
labeled_dataframe_path = "data\GH010041_anonymized_labeled.csv"
dataframe_labeled_frames = getIMU(labeled_dataframe_path)
print(dataframe_labeled_frames)
print("")

dataframe = runner(video_path, dataframe, dataframe_labeled_frames)

  labeled_dataframe_path = "data\GH010041_anonymized_labeled.csv"


Top rows:
   TIMESTAMP    GYRO_x    GYRO_y    GYRO_z     ACCL_x    ACCL_y    ACCL_z
0   0.000000  2.074608 -1.159340 -4.027179  10.127098  0.901679  0.561151
1   0.005079  1.769518 -0.610179 -3.355983  10.424460  0.489209  0.522782
2   0.010158  1.647482 -0.671197 -4.942447  10.333333  0.956835  0.393285
3   0.015237  1.220357 -1.098322 -3.722090  10.172662  0.954436  0.019185
4   0.020316  0.793232 -1.220357 -2.318679  10.079137  0.127098 -0.395683

Bottom rows:
         TIMESTAMP     GYRO_x     GYRO_y     GYRO_z     ACCL_x     ACCL_y  \
104649  531.507587 -10.006931 -11.898485  59.919548  11.587530 -13.803357   
104650  531.512666  -2.867840 -15.132432 -16.596861   6.935252  -8.314149   
104651  531.517745  -0.244071  -5.979751 -63.336549   0.935252  14.904077   
104652  531.522824 -11.166270   0.549161   0.244071   3.760192   9.726619   
104653  531.527903 -15.620575  19.586736  38.868383   7.592326  -9.237410   

          ACCL_z  
104649 -1.134293  
104650 -9.199041  
104651 -5.15

In [11]:
display(dataframe.head(5095))
display(dataframe.tail(1360))

Unnamed: 0,TIMESTAMP,GYRO_x,GYRO_y,GYRO_z,ACCL_x,ACCL_y,ACCL_z,FRAME_INDEX,LABEL
0,0.000000,2.074608,-1.159340,-4.027179,10.127098,0.901679,0.561151,1,opstappen
1,0.005079,1.769518,-0.610179,-3.355983,10.424460,0.489209,0.522782,1,opstappen
2,0.010158,1.647482,-0.671197,-4.942447,10.333333,0.956835,0.393285,1,opstappen
3,0.015237,1.220357,-1.098322,-3.722090,10.172662,0.954436,0.019185,1,opstappen
4,0.020316,0.793232,-1.220357,-2.318679,10.079137,0.127098,-0.395683,2,opstappen
...,...,...,...,...,...,...,...,...,...
5090,25.851882,-95.309912,-9.457770,-36.671740,10.050360,1.199041,-0.676259,1550,opstappen
5091,25.856961,-94.089555,-9.274716,-41.797241,8.844125,3.489209,-2.330935,1550,opstappen
5092,25.862040,-94.455662,-4.576340,-33.864918,9.163070,2.609113,-0.486811,1551,rechts
5093,25.867119,-93.296323,-6.406876,-33.498810,9.163070,1.453237,-0.095923,1551,rechts


Unnamed: 0,TIMESTAMP,GYRO_x,GYRO_y,GYRO_z,ACCL_x,ACCL_y,ACCL_z,FRAME_INDEX,LABEL
103294,524.625603,9.518788,-28.617381,-22.393558,10.719424,4.242206,-10.014388,31447,links
103295,524.630682,10.067948,-2.989876,0.305089,12.652278,0.700240,-3.954436,31447,links
103296,524.635761,11.593395,-2.013590,6.589930,12.932854,-5.009592,-0.362110,31447,links
103297,524.640840,15.315485,-14.766324,-16.962968,8.589928,-0.985612,-4.776978,31448,rechtdoor
103298,524.645919,18.366379,-4.210233,-24.773255,6.278177,7.117506,-1.270983,31448,rechtdoor
...,...,...,...,...,...,...,...,...,...
104649,531.507587,-10.006931,-11.898485,59.919548,11.587530,-13.803357,-1.134293,31859,
104650,531.512666,-2.867840,-15.132432,-16.596861,6.935252,-8.314149,-9.199041,31859,
104651,531.517745,-0.244071,-5.979751,-63.336549,0.935252,14.904077,-5.155875,31860,
104652,531.522824,-11.166270,0.549161,0.244071,3.760192,9.726619,-4.673861,31860,


In [12]:
display(dataframe.head(3940))

Unnamed: 0,TIMESTAMP,GYRO_x,GYRO_y,GYRO_z,ACCL_x,ACCL_y,ACCL_z,FRAME_INDEX,LABEL
0,0.000000,2.074608,-1.159340,-4.027179,10.127098,0.901679,0.561151,1,opstappen
1,0.005079,1.769518,-0.610179,-3.355983,10.424460,0.489209,0.522782,1,opstappen
2,0.010158,1.647482,-0.671197,-4.942447,10.333333,0.956835,0.393285,1,opstappen
3,0.015237,1.220357,-1.098322,-3.722090,10.172662,0.954436,0.019185,1,opstappen
4,0.020316,0.793232,-1.220357,-2.318679,10.079137,0.127098,-0.395683,2,opstappen
...,...,...,...,...,...,...,...,...,...
3935,19.985689,26.481755,-21.417272,5.979751,8.215827,-3.858513,3.625899,1198,opstappen
3936,19.990768,26.054630,-22.698647,6.162805,8.460432,-3.685851,2.992806,1199,opstappen
3937,19.995847,26.054630,-21.600326,6.223823,8.702638,-3.613909,3.364508,1199,opstappen
3938,20.000926,26.054630,-22.515594,6.040769,8.688249,-3.496403,3.215827,1199,opstappen


In [13]:
path = 'data/labeled_GH010041.csv'
dataframe.to_csv(path, index=False)