# Import 'mergetablestodf' function for merging tables

In [49]:
import pandas as pd

In [2]:
from table_merge import mergetablestodf

# Merging data from dynamoDB tables to a dataframe

In [3]:
df = mergetablestodf()

# Dataframe for analysis

In [5]:
df.head()

Unnamed: 0,original_file_size,video_location,reduced_file_size,reduced_method,reconstructed_method,reconstructed_file_size,yolo_score,mp_score
0,2091398,original-videos/collision04.mp4,,,,,0.5480615942028965,0.1831801242236025
1,2767585,original-videos/collision06.mp4,,,,,0.4073898117386491,0.1957209302325581
2,18178091,original-videos/licenseplate04.mp4,,,,,0.6519646475712425,0.4746809815950918
3,10749496,original-videos/licenseplate08.mp4,,,,,0.7438326545530494,0.4215263157894738
4,2101401,original-videos/collision02.mp4,,,,,0.8432392026578077,0.2805116279069765


In [31]:
## to extract all of the technique names and steps from the video location and save it as a column
def get_technique(video_location):
    if('original-videos') in video_location:
        return "original-videos"
    else:
        return video_location.split("/")[-2]
    
df['technique'] = df['video_location'].apply(get_technique)


In [38]:
def get_step(video_location):
    if('original-videos') in video_location:
        return "original"
    else:
        return video_location.split("/")[-3]
    
df['step'] = df['video_location'].apply(get_step)


# Find the best reduction techniques

In [53]:
#find the mean original file size
df['original_file_size'] = pd.to_numeric(df["original_file_size"])
mean_og = df['original_file_size'].mean(skipna= True)

In [87]:
## find the average file size 

## create df to find the best reduction

df['reduced_file_size'] = pd.to_numeric(df["reduced_file_size"])

dict_technique_reduction_avg = {}

df_reduction = df.loc[df['step'] == 'reduced-videos']

for i in df_reduction['technique'].unique():
    df_reduction_tech = df.loc[df['technique'] == i]
    dict_technique_reduction_avg[i] = mean_og/df_reduction_tech['reduced_file_size'].mean()

In [88]:
from collections import OrderedDict

dict_technique_reduction_avg = OrderedDict(sorted(dict_technique_reduction_avg.items(), key=lambda x: x[1], reverse = True))

In [89]:
dict_technique_reduction_avg

OrderedDict([('ffmpeg_rd-quality_240p-algorithm_lanczos', 30.20393468454852),
             ('ffmpeg_rd-quality_360p-algorithm_lanczos', 18.36709881242929),
             ('fps_bitrate-fps_5x-bitrate_10x', 15.375717637164907),
             ('fps_bitrate-fps_4x-bitrate_10x', 15.272461223314526),
             ('fps_bitrate-fps_3x-bitrate_10x', 14.774038260971315),
             ('fps_bitrate-fps_2x-bitrate_10x', 14.197398509225824),
             ('fps_bitrate-fps_5x-bitrate_9x', 13.67005498821681),
             ('fps_bitrate-fps_4x-bitrate_9x', 13.572424649549886),
             ('fps_bitrate-fps_3x-bitrate_9x', 13.30519818646692),
             ('fps_bitrate-fps_2x-bitrate_9x', 12.657274884795891),
             ('fps_bitrate-fps_5x-bitrate_8x', 11.40760248438665),
             ('fps_bitrate-fps_4x-bitrate_8x', 11.287553263473898),
             ('ffmpeg_rd-quality_480p-algorithm_lanczos', 10.929929242300378),
             ('fps_bitrate-fps_3x-bitrate_8x', 10.90523132429338),
             ('fp

The dictionary above shows the reduction technique, and the factor reduction on video size.

The key represents the technique name and the value represents the X times reduction. ex/ ffmpeg_rd-quality_240p-algorithm_lanczos had a 30x reduction

# Find the best reconstruction technique

In [82]:
# find the original yolo and mp scores
df['yolo_score'] = pd.to_numeric(df["yolo_score"])
df['mp_score'] = pd.to_numeric(df["mp_score"])

df_og = df.loc[df['step'] == 'original']

mean_og_yolo = df_og['yolo_score'].mean()
mean_og_mp = df_og['mp_score'].mean()


In [129]:
# find the reconstruction technique that impacted yolo the least

dict_technique_recon_yolo_percent = {}
dict_technique_recon_mp_percent = {}

df_reconstructed = df.loc[df['step'] == 'reconstructed-videos']

for i in df_reconstructed['technique'].unique():
    df_reconstructed_tech = df.loc[df['technique'] == i]
    dict_technique_recon_yolo_percent[i] = (mean_og_yolo - df_reconstructed_tech['yolo_score'].mean(skipna = True))/mean_og_yolo *100
    dict_technique_recon_mp_percent[i] = (mean_og_mp-df_reconstructed_tech['mp_score'].mean(skipna = True))/mean_og_mp *100




In [130]:
dict_technique_recon_yolo_percent = OrderedDict(sorted(dict_technique_recon_yolo_percent.items(), key=lambda x: x[1]))

dict_technique_recon_yolo_percent

OrderedDict([('opencv_ru-codec_mp4v-resolution_1080X1920-reduction_ffmpeg_rd-quality_480p-algorithm_lanczos',
              nan),
             ('smooth_fps-factor4-reduction_fps_bitrate-fps_4x-bitrate_6x',
              1.0376057230366107),
             ('fastsrgan-codec_mp4v', nan),
             ('smooth_fps-factor4-reduction_fps_bitrate-fps_4x-bitrate_3x',
              -4.3752548016461565),
             ('smooth_fps-factor4-reduction_fps_bitrate-fps_4x-bitrate_5x',
              -3.7765543068815646),
             ('smooth_fps-factor4-reduction_fps_bitrate-fps_4x-bitrate_4x',
              -2.9506133498497484),
             ('smooth_fps-factor4-reduction_fps_bitrate-fps_4x-bitrate_7x',
              -2.7615986330240605),
             ('smooth_fps-factor2-reduction_fps_bitrate-fps_2x-bitrate_2x',
              -1.7992032947044423),
             ('smooth_fps-factor3-reduction_fps_bitrate-fps_3x-bitrate_2x',
              -1.7423608232650014),
             ('smooth_fps-factor3-reduction

In [131]:
dict_technique_recon_mp_percent = OrderedDict(sorted(dict_technique_recon_mp_percent.items(), key=lambda x: x[1]))
dict_technique_recon_mp_percent

OrderedDict([('opencv_ru-codec_mp4v-resolution_1080X1920-reduction_ffmpeg_rd-quality_480p-algorithm_lanczos',
              nan),
             ('smooth_fps-factor2-reduction_fps_bitrate-fps_2x-bitrate_2x',
              2.051496220188118),
             ('smooth_fps-factor3-reduction_fps_bitrate-fps_3x-bitrate_2x',
              2.242062666821549),
             ('smooth_fps-factor5-reduction_fps_bitrate-fps_5x-bitrate_2x',
              2.250074967096491),
             ('smooth_fps-factor4-reduction_fps_bitrate-fps_4x-bitrate_2x',
              2.7302755831867147),
             ('smooth_fps-factor3-reduction_fps_bitrate-fps_3x-bitrate_3x',
              2.7315321267193644),
             ('smooth_fps-factor5-reduction_fps_bitrate-fps_5x-bitrate_4x',
              2.844143405696179),
             ('smooth_fps-factor2-reduction_fps_bitrate-fps_2x-bitrate_3x',
              2.859780381531053),
             ('smooth_fps-factor4-reduction_fps_bitrate-fps_4x-bitrate_4x',
              3.919420

Here the mediapipe and yolo results are represented by the keys as technique for reconstruciton, and the value as the % decrease in average confidence score. 