# Import 'mergetablestodf' function for merging tables

In [54]:
import pandas as pd
from tabulate import tabulate

In [3]:
from table_merge import mergetablestodf

# Merging data from dynamoDB tables to a dataframe

In [4]:
df = mergetablestodf()

# Dataframe for analysis

In [6]:
# To extract all of the technique names from the video location and save it as a column.
def get_technique(video_location):
    if('original-videos') in video_location:
        return "original-videos"
    else:
        return video_location.split("/")[-2]
    
df['technique'] = df['video_location'].apply(get_technique)


In [7]:
# To extract the stage "original, reduced-video, reconstructed-video" for which the video belongs to. 
def get_step(video_location):
    if('original-videos') in video_location:
        return "original"
    else:
        return video_location.split("/")[-3]
    
df['step'] = df['video_location'].apply(get_step)


# Find the best reduction techniques

In [8]:
# Find the mean original file size.
df['original_file_size'] = pd.to_numeric(df["original_file_size"])
mean_og = df['original_file_size'].mean(skipna= True)

In [9]:
# Create df to find the best reduction technique, from reduced file size. 

df['reduced_file_size'] = pd.to_numeric(df["reduced_file_size"])

dict_technique_reduction_avg = {}

df_reduction = df.loc[df['step'] == 'reduced-videos']

for i in df_reduction['technique'].unique():
    df_reduction_tech = df.loc[df['technique'] == i]
    dict_technique_reduction_avg[i] = mean_og/df_reduction_tech['reduced_file_size'].mean()

In [85]:
df_technique_reduction_avg = pd.DataFrame([k for k  in dict_technique_reduction_avg.items()], columns=['reduction_technique', 'reduction_factor_X'])
print(tabulate(df_technique_reduction_avg.set_index(['reduction_technique']).sort_values("reduction_factor_X",ascending = False), headers = 'keys', tablefmt = 'pretty'))

+-------------------------------------------+--------------------+
|            reduction_technique            | reduction_factor_X |
+-------------------------------------------+--------------------+
| ffmpeg_rd-quality_240p-algorithm_lanczos  | 30.20393468454852  |
| ffmpeg_rd-quality_360p-algorithm_lanczos  | 18.36709881242929  |
|      fps_bitrate-fps_5x-bitrate_10x       | 13.814648068934439 |
|      fps_bitrate-fps_4x-bitrate_10x       | 13.623390334070221 |
|      fps_bitrate-fps_3x-bitrate_10x       | 13.184123953098258 |
|      fps_bitrate-fps_2x-bitrate_10x       | 12.585390029565719 |
|       fps_bitrate-fps_5x-bitrate_9x       | 12.278428907772266 |
|       fps_bitrate-fps_4x-bitrate_9x       | 12.122931133054342 |
|       fps_bitrate-fps_3x-bitrate_9x       | 11.847717015137281 |
|       fps_bitrate-fps_2x-bitrate_9x       | 11.260163202169709 |
| ffmpeg_rd-quality_480p-algorithm_lanczos  | 10.929929242300378 |
|       fps_bitrate-fps_5x-bitrate_8x       | 10.8804995383725

**The dataframe above shows the reduction technique, and the factor reduction on video size.**

# Find the best reconstruction technique

In [26]:
# Find the original yolo and mediapipe scores.
df['yolo_score'] = pd.to_numeric(df["yolo_score"])
df['mp_score'] = pd.to_numeric(df["mp_score"])

df_og = df.loc[df['step'] == 'original']

mean_og_yolo = df_og['yolo_score'].mean()
mean_og_mp = df_og['mp_score'].mean()


In [27]:
# Create and populate dictionarires (one for yolo, and one for mp) mapping reconstruction technique to confidence change (%). 

dict_technique_recon_yolo_percent = {}
dict_technique_recon_mp_percent = {}

df_reconstructed = df.loc[df['step'] == 'reconstructed-videos']

for i in df_reconstructed['technique'].unique():
    df_reconstructed_tech = df.loc[df['technique'] == i]
    dict_technique_recon_yolo_percent[i] = (mean_og_yolo - df_reconstructed_tech['yolo_score'].mean(skipna = True))/mean_og_yolo *100
    dict_technique_recon_mp_percent[i] = (mean_og_mp-df_reconstructed_tech['mp_score'].mean(skipna = True))/mean_og_mp *100




In [83]:
# Print the yolo confidence score changes.
df_technique_recon_yolo_percent = pd.DataFrame([k for k  in dict_technique_recon_yolo_percent.items()], columns=['reconstruction_technique', 'yolo_confidence_change_%'])
print(tabulate(df_technique_recon_yolo_percent.set_index(['reconstruction_technique']).sort_values("yolo_confidence_change_%",ascending = True), headers = 'keys', tablefmt = 'pretty'))

+-----------------------------------------------------------------------------------------------+--------------------------+
|                                   reconstruction_technique                                    | yolo_confidence_change_% |
+-----------------------------------------------------------------------------------------------+--------------------------+
|           fastsrgan-codec_mp4v-reduction_ffmpeg_rd-quality_1080p-algorithm_lanczos            |    -8.173367433771272    |
|                  smooth_fps-factor4-reduction_fps_bitrate-fps_4x-bitrate_7x                   |    -6.731613087629423    |
|                  smooth_fps-factor4-reduction_fps_bitrate-fps_4x-bitrate_5x                   |    -3.776554306881546    |
|                  smooth_fps-factor4-reduction_fps_bitrate-fps_4x-bitrate_4x                   |   -2.0074868382673947    |
|                  smooth_fps-factor2-reduction_fps_bitrate-fps_2x-bitrate_2x                   |   -1.7992032947044423    |


**The dataframe above shows the reconstruction technique, and the % change in YOLO model average confidence score.**

In [84]:
# Print the mediapipe confidence percent changes.
df_technique_recon_mp_percent = pd.DataFrame([k for k  in dict_technique_recon_mp_percent.items()], columns=['reconstruction_technique', 'mediapipe_confidence_change_%'])
print(tabulate(df_technique_recon_mp_percent.set_index(['reconstruction_technique']).sort_values("mediapipe_confidence_change_%",ascending = True), headers = 'keys', tablefmt = 'pretty'))

+-----------------------------------------------------------------------------------------------+-------------------------------+
|                                   reconstruction_technique                                    | mediapipe_confidence_change_% |
+-----------------------------------------------------------------------------------------------+-------------------------------+
| opencv_ru-codec_mp4v-resolution_1080X1920-reduction_ffmpeg_rd-quality_1080p-algorithm_lanczos |      1.0391391828600802       |
|                  smooth_fps-factor2-reduction_fps_bitrate-fps_2x-bitrate_2x                   |       2.051496220188118       |
|                  smooth_fps-factor3-reduction_fps_bitrate-fps_3x-bitrate_2x                   |       2.242062666821549       |
|                  smooth_fps-factor5-reduction_fps_bitrate-fps_5x-bitrate_2x                   |       2.250074967096491       |
| opencv_ru-codec_mp4v-resolution_1080X1920-reduction_ffmpeg_rd-quality_480p-algorithm_lan

**Here the mediapipe results are represented by the  technique for reconstruciton, and the value as the % decrease in average confidence score.**