In [25]:
# from lfs_tree import *
import logging
logger = logging.getLogger()
logger.setLevel(logging.CRITICAL)
import pydot
import networkx as nx
from string import Template
import pandas as pd
import glob
import pydot
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)
import seaborn as sns

from IPython.display import Image, display
import matplotlib.pyplot as plt
# from IPython.display import Image, display
import pickle
import matplotlib as mpl
import matplotlib.patches as mpatches  # For creating custom legend entries


mpl.rcParams.update({
    'font.size': 20,  # Default font size for titles, labels, and tick labels
    'axes.labelsize': 20,  # Font size for axis labels
    'xtick.labelsize': 18,  # Font size for x-axis tick labels
    'ytick.labelsize': 18,  # Font size for y-axis tick labels
    'legend.fontsize': 18  # Font size for the legend
})

In [26]:
# functions

def view_pydot(pdot):
    plt = Image(pdot.create_png())
    display(plt)

def show_trees(directory):
    for f in glob.glob(f'{directory}*tree_*'):
        file = open(f)
        dot_string = file.read()
        print(f)
#             print(dot_string)
        graph = pydot.graph_from_dot_data(dot_string)[0]
        view_pydot(graph)
        print('\n')
            
def show_stats(directory):
    results_files = []
    unsuccessful_cnt = 0
    fs = glob.glob(f'{directory}/**/*experiment_stats')
    df = pd.concat([pd.read_csv(f) for f in fs])
#     print(f"unsuccessful cnt : {unsuccessful_cnt}")
    return df

def determine_shape(x, shape_assignment_dict):
    return shape_assignment_dict[x]

def determine_color(x, color_assignment_dict):
    return color_assignment_dict[x]

def scatter_plots(df, x, y, xlabel, ylabel, title, shape_assignment_dict, color_assignment_dict,
                 first_legend_pos, second_legend_pos, strat_legend_rel_pos, ratio_legend_re_pos):
        
    fig, ax = plt.subplots()
    groups = df.groupby(['strat','complaint_ratio'])
    # Create a scatter plot for each group based on shape and color
    for (strat, ratio), group in groups:
        marker = shape_assignment_dict.get(strat)
        color=color_assignment_dict.get(ratio)
        ax.scatter(group[x], group[y], marker=marker, color=color, label=(strat, ratio))

    # Create separate shape and color legends
    legend_elements_1 = []
    legend_elements_2 = []

    for shape, marker in shape_assignment_dict.items():
        legend_elements_1.append(plt.Line2D([0], [0], marker=marker, color='w', label=shape, markerfacecolor='k'))
    for ratio, color in color_assignment_dict.items():
        legend_elements_2.append(plt.Line2D([0], [0], marker='o', color='w', label=ratio, markerfacecolor=color))

    # Add shape and color legends
    first_legend=ax.legend(handles=legend_elements_1, loc=first_legend_pos, 
                           bbox_to_anchor=strat_legend_rel_pos,
                           title='strategy', 
                           fontsize=13,title_fontsize=14)
    ax.legend(handles=legend_elements_2, loc=second_legend_pos, 
              bbox_to_anchor=ratio_legend_re_pos, title='complaint%',
              fontsize=13, title_fontsize=14)

    ax.add_artist(first_legend)

    ax.set_xlabel(xlabel, fontsize=14)
    ax.set_ylabel(ylabel, fontsize=14)
    ax.set_title(title, fontsize=14)

    plt.show()
    plt.close(fig)
    

def scatter_plot_single_group_value(df, x, y, xlabel, ylabel, title, color_assignment_dict, group_col,
                                   scale, xticks=[], yticks=[]):
    fig, ax = plt.subplots()
    groups = df.groupby([group_col])
    for (g), group in groups:
        color = color_assignment_dict.get(g)
        ax.scatter(group[x], group[y], color=color, label=(g), s=20)  # Set linestyle to an empty string
    if(ylabel=='new_global_accuracy'):
        ax.plot(df[x], df['global_accuracy'], color='black', label='accuracy_before_fix')
    ax.set_xlabel(xlabel, fontsize=14)
    ax.set_ylabel(ylabel, fontsize=14)
    ax.set_title(title, fontsize=14)
    ax.set_yscale(scale)  # Set the y-axis to logarithmic scale
    if(yticks):
        ax.set_yticks(yticks)
    if(xticks):
        ax.set_xticks(xticks)
    # Customize y-axis ticks and labels
#     ax.set_yticks([0.1, 1, 10, 100])
#     ax.set_yticklabels(['10^-1', '10^0', '10^1', '10^2'])
    ax.legend(fontsize=13,title_fontsize=14)
    plt.savefig(f'graph_pdfs/{title}.pdf', format='pdf')
    
    plt.show()
    plt.close(fig)
    plt.show()
    plt.close(fig)

    
def line_plots(df, x, y, xlabel, ylabel, title, color_assignment_dict, legend_pos,
#                group_name,
#                group_value, 
               gtype='general',
               ylim=None,
              ):
    fig, ax = plt.subplots()
#     df=df[df[group_name]==group_value]
    # Create a scatter plot for each group based on shape and color
    groups = df.groupby(['strat'])
    for strat, group in groups:
        color = color_assignment_dict.get(strat)
        print(f'color: ')
        ax.plot(group[x], group[y], color=color, label=strat)
    if(gtype=='accuracy'):
        ax.plot(df[x], df['global_accuracy'], color='blue', label='accuracy_before_fix')
    ax.legend()
    ax.set_xlabel(xlabel, fontsize=14)
    ax.set_ylabel(ylabel, fontsize=14)
    ax.set_title(title, fontsize=14)
    if(ylim):
        ax.set_ylim(ylim)
    plt.savefig(f'graph_pdfs/{title}.pdf', format='pdf')
    plt.show()
    plt.close(fig)


def boxplot(df, x, y, xlabel, ylabel, title, yscale=[], xscale=[], yticks=[], xticks=[]):
    plt.figure(figsize=(8, 6))
    
    # Set the y-axis scale
    strat_list = list(df['strat'].unique())
    strat_list.sort()
    
    if(y=='rbbm_runtime' or y=='avg_tree_size_increase'):
        ax=sns.swarmplot(data=df, x=x, y=y, hue='strat', hue_order=strat_list)
    else:
        ax = sns.boxplot(data=df, x=x, y=y, hue='strat', hue_order=strat_list)
        # Change tick label font size for both x and y axes
        
#     ax = sns.boxplot(data=df, x=x, y=y, hue='strat', hue_order=strat_list)
        # Change tick label font size for both x and y axes
    ax.tick_params(axis='both', labelsize=12)
    
    # Add a horizontal line at the specified y-coordinate (line_y)
    if ylabel == 'new_global_accuracy':
#     if line_y is not None:
        ax.axhline(y=float(df['global_accuracy'].mean()), color='red', label='gloabl accuracy before fix',
                  linewidth=5)
        ax.legend(fontsize=14,title_fontsize=15)
        # Create a separate legend for the box plot (customize as needed)
        box_legend = ax.legend(title='Strat',fontsize=13,title_fontsize=15)
    
        # Combine the legends
        ax.add_artist(box_legend)
    else:
        # Create a separate legend for the box plot (customize as needed)
        ax.legend(title='Strat',fontsize=14,title_fontsize=15)
    if yscale:
        ax.set_yscale(yscale)
    
    if xscale:
        ax.set_xscale(xscale)
    
    if yticks:
        ax.set_yticks(yticks)
    
    if xticks:
        ax.set_xticks(xticks)

    plt.title(title,fontsize=20)
    plt.xlabel(xlabel,fontsize=14)
    plt.ylabel(ylabel,fontsize=14)

    plt.savefig(f"graph_pdfs/box_{title}.pdf", format="pdf")
    plt.show()



In [27]:
def draw_deletion_plots(df_del):
    df_del['deletion_absolute_threshold'] = df_del['deletion_absolute_threshold'].astype(str)
    df_del.loc[df_del['deletion_type']=='ratio','deletion_absolute_threshold']='no_deletion'
    # Convert 'deletion_absolute_threshold' to strings
    unique_threshs = list(df_del['deletion_absolute_threshold'].unique())
    unique_threshs.remove('no_deletion')
    unique_threshs = sorted(unique_threshs, key=lambda x: int(x))
    custom_sort_order = unique_threshs + ['no_deletion']
    df_del['deletion_absolute_threshold'] = pd.Categorical(df_del['deletion_absolute_threshold'], \
                                                           categories=custom_sort_order, ordered=True)

    # Sort the DataFrame based on the custom order
    df_del = df_del.sort_values('deletion_absolute_threshold')
#     print(df_del)
    # Create a figure and axis for the bar plot
    fig, ax = plt.subplots()

    # Group data by 'deletion_absolute_threshold'
    groups = df_del.groupby('deletion_absolute_threshold')
#     groups.sort()
#     print([g for g in groups])
    # Create bar plots for 'fix_rate', 'global_accuracy', 'new_global_accuracy', and 'post_fix_num_funcs'
    bar_width = 0.2
    bar_positions = np.arange(len(df_del['deletion_absolute_threshold'].unique()))  # Generate evenly spaced x-positions
    print(f"bar_positions: {bar_positions}")
    # Bar plots for 'fix_rate', 'global_accuracy', 'new_global_accuracy'
    ax.bar(bar_positions - 1.5*bar_width, groups['fix_rate'].mean(), width=bar_width, label='Fix Rate', color='tab:blue')
    ax.bar(bar_positions - 0.5*bar_width, groups['global_accuracy'].mean(), width=bar_width, label='Global Accuracy', color='tab:orange', alpha=0.7)
    ax.bar(bar_positions + 0.5*bar_width, groups['new_global_accuracy'].mean(), width=bar_width, label='New Global Accuracy', color='tab:green', alpha=0.7)

    # Set labels and title for the bar plot
    ax.set_xticks(bar_positions)
#     print(bar_positions)
    ax.set_xticklabels(df_del['deletion_absolute_threshold'].unique())
    ax.set_xlabel('Deletion Absolute Threshold')
    ax.set_ylabel('Measure')
    ax.set_title('deletion vs accuracy')

    # Add legend for the bar plot
    ax.legend(loc='lower right')

    # Show the plot
    plt.show()

    # Convert 'deletion_absolute_threshold' to strings
#     df_del['deletion_absolute_threshold'] = df_del['deletion_absolute_threshold'].astype(str)

    # Create a figure and axis for the bar plot
    fig, ax = plt.subplots()

    # Group data by 'deletion_absolute_threshold'
    groups = df_del.groupby('deletion_absolute_threshold')
#     print([g for g in groups])
    bar_width = 0.2
    # Create a bar plot for 'post_fix_num_funcs'
    bar_positions = df_del['deletion_absolute_threshold'].unique()
    print(f"bar_positions: {bar_positions}")
#     bar_positions.sort()
    bar_heights = groups['post_fix_num_funcs'].mean()
    print(f"bar_heights: {bar_heights}")
    # Bar plot for 'post_fix_num_funcs'
    ax.bar(bar_positions, bar_heights, color='tab:blue')

    # Set labels and title for the bar plot
    ax.set_xlabel('Deletion Absolute Threshold')
    ax.set_ylabel('Post Fix Num Funcs')
    ax.set_title('Bar Plot for Post Fix Num Funcs')

    # Show the plot
    plt.show()


In [28]:
df_del_vary_bad_lfs = show_stats('../experiment_results_folders/deletion_factor_lf_1014_absolute_change_bad_lf_cnt/')


In [29]:
df_del_vary_bad_lfs.shape

(280, 31)

In [30]:
list(df_del_vary_bad_lfs)

['strat',
 'seed',
 'pickle_file_name',
 'table_name',
 'timestamp_str',
 'deletion_type',
 'deletion_absolute_threshold',
 'rbbm_runtime',
 'bbox_runtime',
 'avg_tree_size_increase',
 'user_input_size',
 'complaint_ratio',
 'num_complaints',
 'num_confirmations',
 'global_accuracy',
 'fix_rate',
 'confirm_preserve_rate',
 'new_global_accuracy',
 'prev_signaled_cnt',
 'new_signaled_cnt',
 'num_functions',
 'deletion_factor',
 'post_fix_num_funcs',
 'num_of_funcs_processed_by_algo',
 'complaint_reached_max',
 'confirm_reached_max',
 'lf_source',
 'retrain_after_percent',
 'retrain_accuracy_thresh',
 'load_funcs_from_pickle',
 'pre_deletion_threshold']

In [31]:
time_strs = list(df_del_vary_bad_lfs['timestamp_str'].unique())

In [32]:
time_strs

[20231015132734,
 20231015132756,
 20231015132820,
 20231015132842,
 20231015132907,
 20231015132930,
 20231015132953,
 20231015133016,
 20231015133042,
 20231015133107,
 20231015133140,
 20231015133206,
 20231015133232,
 20231015133300,
 20231015133328,
 20231015133402,
 20231015133438,
 20231015133512,
 20231015133546,
 20231015133621,
 20231015133705,
 20231015133740,
 20231015133812,
 20231015133846,
 20231015133918,
 20231015133951,
 20231015134026,
 20231015134100,
 20231015134134,
 20231015134214,
 20231015134254,
 20231015134332,
 20231015134411,
 20231015134504,
 20231015134543,
 20231015134625,
 20231015134711,
 20231015134758,
 20231015134845,
 20231015134948,
 20231015135035,
 20231015135124,
 20231015135213,
 20231015135247,
 20231015135322,
 20231015135357,
 20231015135432,
 20231015135507,
 20231015135556,
 20231015135633,
 20231015135716,
 20231015135758,
 20231015135841,
 20231015135925,
 20231015140010,
 20231015140055,
 20231015140141,
 20231015140206,
 2023101514023

In [33]:
# ['rule', 'deleted', 'pre_fix_size', 'after_fix_size', 'pre-deleted', 'fixed_treerule_text']


In [36]:
# tree_dir = '../experiment_results_folders/deletion_factor_lf_1014_absolute_change_bad_lf_cnt/_20231015132734/'

def calculate_delet_precision(tree_dir):
    fs = glob.glob(f'{tree_dir}/*fix_book_keeping_dict.pkl')[0]
    with open(fs, 'rb') as f:
        bkeepdict = pickle.load(f)
    return len([k for (k,v) in bkeepdict.items() if (v['rule'].is_good==False and v['deleted'])]),\
sum([v['after_fix_size'] for (k,v) in bkeepdict.items() if not v['deleted']]),\
sum([v['pre_fix_size'] for (k,v) in bkeepdict.items()])

In [37]:
import re
def extract_bad_func_cnt(x):
    return re.sub(r'.*bad_(\d+)', r'\1', x)

In [39]:
cur_tstamp=0
result_cnt=0
folder_dir = '../experiment_results_folders/deletion_factor_lf_1014_absolute_change_bad_lf_cnt/_'
while(cur_tstamp+56<=280):
    deletion_precisions = []
    for t in time_strs[cur_tstamp:cur_tstamp+56]:
        tree_dir= folder_dir+str(t)+'/'
        bad_deleted_cnt, total_size_left, initial_size = calculate_delet_precision(tree_dir)
        deletion_precisions.append({"timestamp_str":t, 'bad_deleted_cnt':bad_deleted_cnt, 
                                    'total_size_left':total_size_left, 'initial_size':initial_size})
    cur_tstamp+=56
    print(deletion_precisions)
    print('\n')
    precision_df = pd.DataFrame(deletion_precisions)
    merged_pd = pd.merge(df_del_vary_bad_lfs, precision_df, on='timestamp_str')
    mcols = ["pickle_file_name","deletion_type","deletion_absolute_threshold",\
             "rbbm_runtime","bbox_runtime","avg_tree_size_increase","post_fix_num_funcs","bad_deleted_cnt",\
             "global_accuracy","new_global_accuracy",
             "fix_rate","confirm_preserve_rate",\
             "prev_signaled_cnt","new_signaled_cnt","timestamp_str", 'total_size_left', 'initial_size']
    merged_pd_smaller = merged_pd[mcols]
    merged_pd_smaller['gt_bad_func_cnt'] = merged_pd_smaller['pickle_file_name'].apply(extract_bad_func_cnt)
    merged_pd_smaller.to_csv(f'../experiments_graphs/csv_deletion_factor_lf_{result_cnt}.csv',index=False)
    result_cnt+=1

[{'timestamp_str': 20231015132734, 'bad_deleted_cnt': 3, 'total_size_left': 110, 'initial_size': 90}, {'timestamp_str': 20231015132756, 'bad_deleted_cnt': 2, 'total_size_left': 128, 'initial_size': 90}, {'timestamp_str': 20231015132820, 'bad_deleted_cnt': 2, 'total_size_left': 150, 'initial_size': 90}, {'timestamp_str': 20231015132842, 'bad_deleted_cnt': 2, 'total_size_left': 176, 'initial_size': 90}, {'timestamp_str': 20231015132907, 'bad_deleted_cnt': 2, 'total_size_left': 206, 'initial_size': 90}, {'timestamp_str': 20231015132930, 'bad_deleted_cnt': 2, 'total_size_left': 206, 'initial_size': 90}, {'timestamp_str': 20231015132953, 'bad_deleted_cnt': 0, 'total_size_left': 250, 'initial_size': 90}, {'timestamp_str': 20231015133016, 'bad_deleted_cnt': 6, 'total_size_left': 71, 'initial_size': 90}, {'timestamp_str': 20231015133042, 'bad_deleted_cnt': 6, 'total_size_left': 80, 'initial_size': 90}, {'timestamp_str': 20231015133107, 'bad_deleted_cnt': 6, 'total_size_left': 124, 'initial_siz

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_pd_smaller['gt_bad_func_cnt'] = merged_pd_smaller['pickle_file_name'].apply(extract_bad_func_cnt)


[{'timestamp_str': 20231015140141, 'bad_deleted_cnt': 3, 'total_size_left': 80, 'initial_size': 90}, {'timestamp_str': 20231015140206, 'bad_deleted_cnt': 3, 'total_size_left': 98, 'initial_size': 90}, {'timestamp_str': 20231015140230, 'bad_deleted_cnt': 3, 'total_size_left': 98, 'initial_size': 90}, {'timestamp_str': 20231015140301, 'bad_deleted_cnt': 2, 'total_size_left': 124, 'initial_size': 90}, {'timestamp_str': 20231015140326, 'bad_deleted_cnt': 2, 'total_size_left': 139, 'initial_size': 90}, {'timestamp_str': 20231015140352, 'bad_deleted_cnt': 2, 'total_size_left': 139, 'initial_size': 90}, {'timestamp_str': 20231015140417, 'bad_deleted_cnt': 0, 'total_size_left': 250, 'initial_size': 90}, {'timestamp_str': 20231015140443, 'bad_deleted_cnt': 6, 'total_size_left': 62, 'initial_size': 90}, {'timestamp_str': 20231015140506, 'bad_deleted_cnt': 6, 'total_size_left': 89, 'initial_size': 90}, {'timestamp_str': 20231015140531, 'bad_deleted_cnt': 5, 'total_size_left': 133, 'initial_size':

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_pd_smaller['gt_bad_func_cnt'] = merged_pd_smaller['pickle_file_name'].apply(extract_bad_func_cnt)


[{'timestamp_str': 20231015143903, 'bad_deleted_cnt': 3, 'total_size_left': 79, 'initial_size': 90}, {'timestamp_str': 20231015143926, 'bad_deleted_cnt': 3, 'total_size_left': 97, 'initial_size': 90}, {'timestamp_str': 20231015143949, 'bad_deleted_cnt': 3, 'total_size_left': 108, 'initial_size': 90}, {'timestamp_str': 20231015144015, 'bad_deleted_cnt': 3, 'total_size_left': 121, 'initial_size': 90}, {'timestamp_str': 20231015144038, 'bad_deleted_cnt': 2, 'total_size_left': 151, 'initial_size': 90}, {'timestamp_str': 20231015144102, 'bad_deleted_cnt': 2, 'total_size_left': 168, 'initial_size': 90}, {'timestamp_str': 20231015144126, 'bad_deleted_cnt': 0, 'total_size_left': 258, 'initial_size': 90}, {'timestamp_str': 20231015144151, 'bad_deleted_cnt': 6, 'total_size_left': 81, 'initial_size': 90}, {'timestamp_str': 20231015144218, 'bad_deleted_cnt': 6, 'total_size_left': 99, 'initial_size': 90}, {'timestamp_str': 20231015144246, 'bad_deleted_cnt': 6, 'total_size_left': 110, 'initial_size'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_pd_smaller['gt_bad_func_cnt'] = merged_pd_smaller['pickle_file_name'].apply(extract_bad_func_cnt)


[{'timestamp_str': 20231015151808, 'bad_deleted_cnt': 3, 'total_size_left': 107, 'initial_size': 90}, {'timestamp_str': 20231015151833, 'bad_deleted_cnt': 3, 'total_size_left': 125, 'initial_size': 90}, {'timestamp_str': 20231015151859, 'bad_deleted_cnt': 3, 'total_size_left': 125, 'initial_size': 90}, {'timestamp_str': 20231015151926, 'bad_deleted_cnt': 3, 'total_size_left': 125, 'initial_size': 90}, {'timestamp_str': 20231015151952, 'bad_deleted_cnt': 2, 'total_size_left': 155, 'initial_size': 90}, {'timestamp_str': 20231015152019, 'bad_deleted_cnt': 2, 'total_size_left': 155, 'initial_size': 90}, {'timestamp_str': 20231015152048, 'bad_deleted_cnt': 0, 'total_size_left': 270, 'initial_size': 90}, {'timestamp_str': 20231015152114, 'bad_deleted_cnt': 6, 'total_size_left': 57, 'initial_size': 90}, {'timestamp_str': 20231015152140, 'bad_deleted_cnt': 6, 'total_size_left': 93, 'initial_size': 90}, {'timestamp_str': 20231015152205, 'bad_deleted_cnt': 5, 'total_size_left': 104, 'initial_siz

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_pd_smaller['gt_bad_func_cnt'] = merged_pd_smaller['pickle_file_name'].apply(extract_bad_func_cnt)


[{'timestamp_str': 20231015155744, 'bad_deleted_cnt': 3, 'total_size_left': 94, 'initial_size': 90}, {'timestamp_str': 20231015155806, 'bad_deleted_cnt': 3, 'total_size_left': 94, 'initial_size': 90}, {'timestamp_str': 20231015155827, 'bad_deleted_cnt': 2, 'total_size_left': 105, 'initial_size': 90}, {'timestamp_str': 20231015155847, 'bad_deleted_cnt': 2, 'total_size_left': 118, 'initial_size': 90}, {'timestamp_str': 20231015155909, 'bad_deleted_cnt': 2, 'total_size_left': 118, 'initial_size': 90}, {'timestamp_str': 20231015155932, 'bad_deleted_cnt': 1, 'total_size_left': 152, 'initial_size': 90}, {'timestamp_str': 20231015155953, 'bad_deleted_cnt': 0, 'total_size_left': 242, 'initial_size': 90}, {'timestamp_str': 20231015160018, 'bad_deleted_cnt': 6, 'total_size_left': 76, 'initial_size': 90}, {'timestamp_str': 20231015160047, 'bad_deleted_cnt': 6, 'total_size_left': 112, 'initial_size': 90}, {'timestamp_str': 20231015160115, 'bad_deleted_cnt': 6, 'total_size_left': 134, 'initial_size

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  merged_pd_smaller['gt_bad_func_cnt'] = merged_pd_smaller['pickle_file_name'].apply(extract_bad_func_cnt)


In [None]:
# merged_pd = pd.merge(df_del_vary_bad_lfs, precision_df, on='timestamp_str')
# mcols = ["pickle_file_name","deletion_type","deletion_absolute_threshold",\
#          "rbbm_runtime","bbox_runtime","avg_tree_size_increase","post_fix_num_funcs","bad_deleted_cnt",\
#          "global_accuracy","new_global_accuracy",
#          "fix_rate","confirm_preserve_rate",\
#          "prev_signaled_cnt","new_signaled_cnt","timestamp_str"]
# merged_pd_smaller = merged_pd[mcols]

In [None]:
# merged_pd_smaller['gt_bad_func_cnt'] = merged_pd_smaller['pickle_file_name'].apply(extract_bad_func_cnt)

In [None]:
# merged_pd_smaller = merged_pd_smaller[["gt_bad_func_cnt","deletion_type","deletion_absolute_threshold",\
#          "rbbm_runtime","bbox_runtime","avg_tree_size_increase","post_fix_num_funcs","bad_deleted_cnt",\
#          "global_accuracy","new_global_accuracy",
#          "fix_rate","confirm_preserve_rate",\
#          "prev_signaled_cnt","new_signaled_cnt","timestamp_str"]]

In [None]:
# merged_pd_smaller.to_csv('../experiments_graphs/csv_deletion_factor_lf.csv',index=False)

In [None]:
x=[1,2,3]
sum(x)