# Data Evaluation

The purpose of this notebook is to streamline the process of manually evaluating the extracted test case data from notebook 1.

For this we will sample test cases from the data (using a fixed seed for reproducibility) and have information about the test cases be displayed for manual evaluation, including the relevant lines of code. 

## Imports

In [30]:
import pandas as pd
import os
import numpy as np

## Setup

In [40]:
# Set library root folder:
dl_library_root = "A:/BachelorThesis/DLL_Testing_Tool/DL_Libraries/"

# For TensorFlow
library_root_tensorflow = dl_library_root + "Tensorflow/tensorflow-master/tensorflow/python/"
save_data_to_tensorflow = "extracted_data/tensorflow_evaluation_data.csv"

# For TensorFlow 1.12.0
library_root_tensorflow_1_12_0 = dl_library_root + "Tensorflow/tensorflow-1.12.0/tensorflow/python/"
save_data_to_tensorflow_1_12_0 = "extracted_data/tensorflow_1.12.0_evaluation_data.csv"

# For Pytorch
library_root_pytorch = dl_library_root + "PyTorch/pytorch-1.9.0/" 
save_data_to_pytorch = "extracted_data/pytorch_evaluation_data.csv"

# For Theano 1.0.3
library_root_theano = dl_library_root + "Theano/Theano-rel-1.0.3/theano/"
save_data_to_theano = "extracted_data/theano_evaluation_data.csv"

# For Keras
library_root_keras = dl_library_root + "Keras/keras-2.6.0/keras/"
save_data_to_keras = "extracted_data/keras_evaluation_data.csv"

# For Scipy
library_root_scipy = dl_library_root + "Scipy/scipy-master/scipy"
save_data_to_scipy = "extracted_data/scipy_evaluation_data.csv"

# For Numpy
library_root_numpy = dl_library_root + "Numpy/numpy-main"
save_data_to_numpy = "extracted_data/numpy_evaluation_data.csv"

## Import the data

In [4]:
# import tensorflow data and preview
df_tensorflow = pd.read_csv('extracted_data/tensorflow_data.csv')
df_tensorflow.head(10)

Unnamed: 0,File_Path,Line_Number,Found_in_Function,Function_Definition_Line_Number,Assert_Statement_Type,Oracle_Argument_ Position,Differential_Function_Line_Number,Differential_Test_Function
0,autograph\converters\break_statements_test.py,61,test_while_loop_preserves_directives,48,assertIs,2,57,object
1,autograph\converters\break_statements_test.py,90,test_for_loop_preserves_directives,79,assertIs,2,87,object
2,autograph\converters\control_flow_test.py,51,assertValuesEqual,47,assertAllEqual,1,48,nest.map_structure
3,autograph\converters\functions_test.py,45,test_basic,33,assertEqual,2,41,self.transform
4,autograph\converters\functions_test.py,127,test_lambda_in_return_value,119,assertTrue,1,126,tr
5,autograph\converters\lists_test.py,45,test_empty_list,35,assertEqual,1,42,tr
6,autograph\converters\variables_test.py,49,test_aug_assign,41,assertEqual,2,49,UNSUPPORTED Binary Operation
7,autograph\core\converter_test.py,56,test_to_ast,38,assertEqual,1,39,converter.ConversionOptions
8,autograph\core\converter_test.py,56,test_to_ast,38,assertEqual,1,50,converter.ConversionOptions
9,autograph\core\converter_test.py,56,test_to_ast,38,assertEqual,2,54,reparsed.f


In [9]:
# import tensorflow 1.12.0 data and preview
df_tensorflow_1_12_0 = pd.read_csv('extracted_data/tensorflow_1.12.0_data.csv')
df_tensorflow_1_12_0.head(10)

Unnamed: 0,File_Path,Line_Number,Found_in_Function,Function_Definition_Line_Number,Assert_Statement_Type,Oracle_Argument_ Position,Differential_Function_Line_Number,Differential_Test_Function
0,autograph\converters\builtin_functions_test.py,42,test_len,33,assertEqual,1,41,result.test_fn
1,autograph\converters\builtin_functions_test.py,42,test_len,33,assertEqual,1,40,array_ops.placeholder
2,autograph\converters\call_trees_test.py,102,test_py_func_no_retval,88,assertFalse,1,99,Dummy
3,autograph\converters\call_trees_test.py,104,test_py_func_no_retval,88,assertEquals,2,99,Dummy
4,autograph\converters\call_trees_test.py,134,test_uncompiled_modules,117,assertEquals,1,133,result.test_fn
5,autograph\converters\lists_test.py,47,test_empty_list,39,assertTrue,1,45,result.test_fn
6,autograph\converters\lists_test.py,48,test_empty_list,39,assertEqual,1,45,result.test_fn
7,autograph\converters\lists_test.py,71,test_list_append,58,assertAllEqual,1,70,list_ops.tensor_list_stack
8,autograph\converters\lists_test.py,71,test_list_append,58,assertAllEqual,1,69,result.test_fn
9,autograph\converters\lists_test.py,94,test_list_pop,73,assertAllEqual,1,93,list_ops.tensor_list_stack


In [10]:
# import pytorch data and preview
df_pytorch = pd.read_csv('extracted_data/pytorch_data.csv')
df_pytorch.head(10)

Unnamed: 0,File_Path,Line_Number,Found_in_Function,Function_Definition_Line_Number,Assert_Statement_Type,Oracle_Argument_ Position,Differential_Function_Line_Number,Differential_Test_Function
0,tools/test/test_test_history.py,61,test_help_examples,59,assertEqual,1,60,parse_description
1,tools/test/test_test_history.py,70,test_help_examples,59,assertEqual,1,66,list
2,tools/test/test_stats.py,331,test_analysis,225,assertEqual,2,326,print_test_stats.analyze
3,tools/test/test_actions_local_runner.py,49,test_step_extraction,24,assertEqual,1,42,actions_local_runner.grab_specific_steps
4,tools/test/test_actions_local_runner.py,49,test_step_extraction,24,assertEqual,1,25,
5,tools/test/test_translate_annotations.py,160,test_translate_lao_tzu,154,assertEqual,1,157,parse_diff
6,tools/test/test_translate_annotations.py,161,test_translate_lao_tzu,154,assertEqual,1,157,parse_diff
7,tools/test/test_translate_annotations.py,166,test_translate_lao_tzu,154,assertEqual,1,157,parse_diff
8,tools/test/test_translate_annotations.py,170,test_translate_lao_tzu,154,assertEqual,1,157,parse_diff
9,tools/test/test_translate_annotations.py,171,test_translate_lao_tzu,154,assertEqual,1,157,parse_diff


In [11]:
# import theano data and preview
df_theano = pd.read_csv('extracted_data/theano_data.csv')
df_theano.head(10)

Unnamed: 0,File_Path,Line_Number,Found_in_Function,Function_Definition_Line_Number,Assert_Statement_Type,Oracle_Argument_ Position,Differential_Function_Line_Number,Differential_Test_Function
0,/gpuarray/tests/test_gemmcorr.py,53,run_conv_valid,20,assert_allclose,1,51,f_ref
1,/gpuarray/tests/test_gemmcorr.py,53,run_conv_valid,20,assert_allclose,2,52,f
2,/gpuarray/tests/test_gemmcorr.py,185,run_gradweight,157,assert_allclose,1,183,f_ref
3,/gpuarray/tests/test_gemmcorr.py,185,run_gradweight,157,assert_allclose,2,184,f
4,/gpuarray/tests/test_gemmcorr.py,236,run_gradinput,205,assert_allclose,1,234,f_ref
5,/gpuarray/tests/test_gemmcorr.py,236,run_gradinput,205,assert_allclose,2,235,f
6,/gpuarray/tests/test_gemmcorr3d.py,45,run_conv_valid,19,assert_allclose,1,43,f_ref
7,/gpuarray/tests/test_gemmcorr3d.py,45,run_conv_valid,19,assert_allclose,2,44,f
8,/gpuarray/tests/test_gemmcorr3d.py,147,run_gradweight,119,assert_allclose,1,145,f_ref
9,/gpuarray/tests/test_gemmcorr3d.py,147,run_gradweight,119,assert_allclose,2,146,f


In [12]:
# import theano data and preview
df_keras = pd.read_csv('extracted_data/keras_data.csv')
df_keras.head(10)

Unnamed: 0,File_Path,Line_Number,Found_in_Function,Function_Definition_Line_Number,Assert_Statement_Type,Oracle_Argument_ Position,Differential_Function_Line_Number,Differential_Test_Function
0,/metrics_functional_test.py,35,test_metrics,29,assertEqual,1,34,metric
1,/metrics_functional_test.py,35,test_metrics,29,assertEqual,1,31,backend.variable
2,/metrics_functional_test.py,35,test_metrics,29,assertEqual,1,32,backend.variable
3,/metrics_functional_test.py,42,test_sparse_categorical_accuracy_int,37,assertEqual,1,40,backend.variable
4,/metrics_functional_test.py,42,test_sparse_categorical_accuracy_int,37,assertEqual,1,41,backend.variable
5,/metrics_functional_test.py,48,test_sparse_categorical_accuracy_int,37,assertAllEqual,1,40,backend.variable
6,/metrics_functional_test.py,48,test_sparse_categorical_accuracy_int,37,assertAllEqual,1,45,backend.variable
7,/metrics_functional_test.py,48,test_sparse_categorical_accuracy_int,37,assertAllEqual,1,41,backend.variable
8,/metrics_functional_test.py,48,test_sparse_categorical_accuracy_int,37,assertAllEqual,1,46,backend.variable
9,/metrics_functional_test.py,55,test_sparse_categorical_accuracy_int,37,assertAllEqual,1,40,backend.variable


In [13]:
# import scipy data and preview
df_scipy = pd.read_csv('extracted_data/scipy_data.csv')
df_scipy.head(10)

Unnamed: 0,File_Path,Line_Number,Found_in_Function,Function_Definition_Line_Number,Assert_Statement_Type,Oracle_Argument_ Position,Differential_Function_Line_Number,Differential_Test_Function
0,\interpolate\tests\test_bsplines.py,267,test_derivative_jumps,246,allclose,1,266,np.asarray
1,\interpolate\tests\test_bsplines.py,267,test_derivative_jumps,246,allclose,2,266,np.asarray
2,\interpolate\tests\test_bsplines.py,271,test_derivative_jumps,246,allclose,1,257,np.asarray
3,\interpolate\tests\test_bsplines.py,271,test_derivative_jumps,246,allclose,2,257,np.asarray
4,\linalg\tests\test_basic.py,1496,test_matrix_norms,1480,allclose,1,1492,norm
5,\linalg\tests\test_basic.py,1496,test_matrix_norms,1480,allclose,1,1487,astype
6,\linalg\tests\test_basic.py,1496,test_matrix_norms,1480,allclose,2,1493,np.linalg.norm
7,\linalg\tests\test_basic.py,1496,test_matrix_norms,1480,allclose,2,1487,astype
8,\linalg\tests\test_blas.py,861,test_symm_wrong_uplo,853,allclose,1,860,f
9,\linalg\tests\test_blas.py,864,test_symm_wrong_uplo,853,allclose,1,860,f


In [14]:
# import numpy data and preview
df_numpy = pd.read_csv('extracted_data/numpy_data.csv')
df_numpy.head(10)

Unnamed: 0,File_Path,Line_Number,Found_in_Function,Function_Definition_Line_Number,Assert_Statement_Type,Oracle_Argument_ Position,Differential_Function_Line_Number,Differential_Test_Function
0,\core\tests\test_api.py,20,test_array_array,15,assert_equal,1,17,np.ones
1,\core\tests\test_api.py,20,test_array_array,15,assert_equal,2,17,np.ones
2,\core\tests\test_api.py,24,test_array_array,15,assert_equal,1,22,sys.getrefcount
3,\core\tests\test_api.py,24,test_array_array,15,assert_equal,1,18,type
4,\core\tests\test_api.py,24,test_array_array,15,assert_equal,1,17,np.ones
5,\core\tests\test_api.py,24,test_array_array,15,assert_equal,2,18,type
6,\core\tests\test_api.py,32,test_array_array,15,assert_equal,1,22,sys.getrefcount
7,\core\tests\test_api.py,32,test_array_array,15,assert_equal,1,30,sys.getrefcount
8,\core\tests\test_api.py,32,test_array_array,15,assert_equal,1,16,type
9,\core\tests\test_api.py,32,test_array_array,15,assert_equal,2,16,type


## Analyze coverage

To track the progress of our test case extraction we display statistics about how many cases still are still unsupported. This is either denoted by an "UNSUPPORTED ..." statement in the "Differential Test Function" column of the data or by an empty string in this column, i.e. `NaN`.

In [15]:
def print_not_covered(df, name):
    not_covered_df = df[df['Differential_Test_Function'].str.contains('UNSUPPORTED', na=False) | df['Differential_Test_Function'].isna()]
    print(name+":\t\t"+ str(len(not_covered_df)) + " out of " + str(len(df)) +   " cases not covered ({}%).".format(round(len(not_covered_df)/len(df)*100, 2)))
    print(not_covered_df.Differential_Test_Function.value_counts(dropna=False))
    print("\n")

print_not_covered(df_tensorflow, "Tensorflow")
print_not_covered(df_tensorflow_1_12_0, "Tensorflow 1.12.0")
print_not_covered(df_pytorch, "PyTorch")
print_not_covered(df_theano, "Theano")
print_not_covered(df_keras, "Keras")
print_not_covered(df_scipy, "Scipy")
print_not_covered(df_numpy, "Numpy")

Tensorflow:		4619 out of 45336 cases not covered (10.19%).
UNSUPPORTED Binary Operation                                                      3459
NaN                                                                                810
UNSUPPORTED List Comprehension                                                     264
UNSUPPORTED Unary Operation                                                         41
UNSUPPORTED Compare                                                                 10
UNSUPPORTED Name (named variable or defined function: last_assignment)              10
UNSUPPORTED Constant                                                                 8
UNSUPPORTED Name (named variable or defined function: v)                             4
UNSUPPORTED Name (named variable or defined function: mirrored)                      2
UNSUPPORTED Name (named variable or defined function: converted_concrete_func)       2
UNSUPPORTED Name (named variable or defined function: aggregating)     

## Differentiate between data entries and test cases

The number of identified test cases is smaller than the number of data entries, because
each identified test case may have entries for multiple assert statement arguments in the case that the algorithm
can not clearly decide which argument represents the oracle. Additionally, each argument may have multiple differential testing functions that effect it within the test case's code. 


In [16]:
# Since for our definition each test case is defined by its assert statement, we can drop all entries that 
# have the same file path + line number combination. Thus we get the number of test cases identified.
def get_num_test_cases(df): 
    return str(len(df.drop_duplicates(subset=['File_Path', 'Line_Number'])))
    
print("TensorFlow  2.6.0: " + get_num_test_cases(df_tensorflow))
print("TensorFlow 1.12.0: " + get_num_test_cases(df_tensorflow_1_12_0))
print("PyTorch     1.9.0: " + get_num_test_cases(df_pytorch))
print("Theano:     1.0.3: " + get_num_test_cases(df_theano))
print("Keras       2.6.0: " + get_num_test_cases(df_keras))

TensorFlow  2.6.0: 19720
TensorFlow 1.12.0: 14005
PyTorch     1.9.0: 10854
Theano:     1.0.3: 1290
Keras       2.6.0: 3783


## Filter data for external functions

In [91]:
def select_cases_from_package(df, package_name, additional_keywords=[]):
    """additional_keywords can accept multiple keywords to filter the differential test functions for."""
    # for filter keywords we use the '^' regex to mark the start of the string
    searchfor = ['^'+package_name+'\.']
    for keyword in additional_keywords:
        searchfor.append('^'+keyword+'\.')

    column_to_filter = 'Differential_Test_Function'

    # for multiple keywords, join them with the regex "or"
    if len(searchfor) > 1:
        filter_keyword = '|'.join(searchfor)
    else:
        filter_keyword = searchfor[0]

    filtered_df = df[df[column_to_filter].str.contains(filter_keyword, na=False)]

    return filtered_df


df_tensorflow_filtered = select_cases_from_package(df_tensorflow, 'keras', ['pytorch', 'np', 'scipy'])

df_tensorflow_1_12_0_filtered = select_cases_from_package(df_tensorflow_1_12_0, 'keras', ['pytorch', 'np', 'scipy'])

df_pytorch_filtered = select_cases_from_package(df_pytorch, 'keras', ['tensorflow', 'tf', 'np', 'scipy'])

df_keras_filtered = select_cases_from_package(df_keras, 'pytorch', ['tensorflow', 'tf', 'np', 'scipy'])

df_theano_filtered = select_cases_from_package(df_theano, 'pytorch', ['tensorflow', 'tf', 'np', 'scipy'])

# Tool for manual evaluation

This tool is meant to help with quickly evaluating test cases from the dataset. For each test case, it prints all information collected about the case, including the oracle argument position and the extracted function name, as well as the code inside the function where the test case was defined. Then the evaluator is asked for an evaluation of the test case via input. This evaluation is then stored alongside the test case in the data.

Evaluation keys:  
y: Test case correctly identified  
n: Test case is not differential testing  
?: Allows for the entry of a comment. This is meant for situations where the current case is differential testing, but the differential testing function was not extracted correctly (or some other data is incorrect).  

In [103]:
UNEVALUATED_STRING = "UNEVALUATED"

class EvaluationAutomator:
    def __init__(self, df, library_root, save_data_to):
        """Initialize the evaluation automator.
        
        df: Dataframe to evaluate.
        library_root: The root folder of the DL library
        save_data_to: Relative location to load/save the evaluation data
        """
        self.df = df
        self.save_data_to = save_data_to
        self.library_root = library_root
        
        # try importing evaluation data if it already exists
        if os.path.isfile(self.save_data_to): 
            self.eval_df = pd.read_csv(self.save_data_to)
            print("Evaluation data opened.")
        
        # otherwise initialize evaluation df and add new column for the evaluation result
        else:
            self.eval_df = df.copy()
            todo_list = [UNEVALUATED_STRING] * len(self.eval_df.index)
            self.eval_df.insert(len(df.columns), 'Evaluation', todo_list)
            self.eval_df.to_csv(self.save_data_to)
            print("New evaluation data created.")
            
    def getEvalData(self):
        """Returns the data frame containing the evaluation data."""
        return self.eval_df
    
    def evaluate(self, index):
        """Present the data entry at the given index for evaluation."""
        
        # present the data entry
        print(self.df.iloc[index])
        print("\n")
        
        # check if it has already been evaluated
        if self.eval_df.at[index, 'Evaluation'] != UNEVALUATED_STRING:
            print("Already evaluated! Previous evaluation: " + self.eval_df.at[index, 'Evaluation'])
            if input("Re-evaluate? (y / n) ") != "y":
                return
            
        
        # print the relevant source code lines:
        
        # get source file of current test case and open it as an array of lines
        source_file_path = self.df.iloc[index]['File_Path'].replace('\\', '/')
        source = open(self.library_root + source_file_path).readlines()

        # set beginning and end line number for the code section to display
        beginning_line_no = self.df.iloc[index]['Function_Definition_Line_Number']
        end_line_no = self.df.iloc[index]['Line_Number']

        # print these lines 
        for line in range(beginning_line_no, end_line_no+1):
            print(str(line) + ": " + source[line-1])
            
        # check if the last line with the assert statement is complete or if the
        # assert arguments were moved to new lines, in which case: print more lines
        line = end_line_no
        last_line = source[line-1]       
        
        # we can check if the assert statement is complete if it ends with a closed bracket
        while not last_line.rstrip().endswith(")"):
            line += 1
            last_line = source[line-1]
            print(str(line) + ": " + last_line)
            
        # ask for a decision from the evaluator:
        decision_bool = True
        while decision_bool:
            decision = input("Correctly identified? (y / n / ?): ")
            
            if decision in ["y", "n"]:
                decision_bool = False

            elif decision == "?":
                decision = input("Please comment on this case: ")
                decision_bool = False
                
            else:
                print("Error. Please specify y/n/?")
                decision_bool = True
                
        # write the decision to the evaluation data
        self.eval_df.at[index, 'Evaluation'] = decision
        self.eval_df.to_csv(self.save_data_to, index=False)

# initialize automators:
# TensorFlow
evalAutomator_tensorflow = EvaluationAutomator(df_tensorflow, library_root_tensorflow, save_data_to_tensorflow)
evalAutomator_tensorflow_filtered = EvaluationAutomator(df_tensorflow_filtered, library_root_tensorflow, "extracted_data/tensorflow_filtered_evaluation_data.csv")

evalAutomator_tensorflow_1_12_0 = EvaluationAutomator(df_tensorflow_1_12_0, library_root_tensorflow_1_12_0, save_data_to_tensorflow_1_12_0)
evalAutomator_tensorflow_1_12_0_filtered = EvaluationAutomator(df_tensorflow_1_12_0_filtered, library_root_tensorflow_1_12_0, "extracted_data/tensorflow_1_12_0_filtered_evaluation_data.csv")

# PyTorch
evalAutomator_pytorch = EvaluationAutomator(df_pytorch, library_root_pytorch, save_data_to_pytorch)
evalAutomator_pytorch_filtered = EvaluationAutomator(df_pytorch_filtered, library_root_pytorch, "extracted_data/pytorch_filtered_evaluation_data.csv")

# Theano
evalAutomator_theano = EvaluationAutomator(df_theano, library_root_theano, save_data_to_theano)
evalAutomator_theano_filtered = EvaluationAutomator(df_theano_filtered, library_root_theano, "extracted_data/theano_filtered_evaluation_data.csv")

# Keras
evalAutomator_keras = EvaluationAutomator(df_keras, library_root_keras, save_data_to_keras)
evalAutomator_keras_filtered = EvaluationAutomator(df_keras_filtered, library_root_keras, "extracted_data/keras_filtered_evaluation_data.csv")

# Scipy
evalAutomator_scipy = EvaluationAutomator(df_scipy, library_root_scipy, save_data_to_scipy)

# Numpy
evalAutomator_numpy = EvaluationAutomator(df_numpy, library_root_numpy, save_data_to_numpy)

# for debugging: test evaluation on a particular case
#evalAutomator_tensorflow_1_12_0.evaluate(218)

Evaluation data opened.
Evaluation data opened.
Evaluation data opened.
Evaluation data opened.
Evaluation data opened.
Evaluation data opened.
Evaluation data opened.
Evaluation data opened.
Evaluation data opened.
Evaluation data opened.
Evaluation data opened.
Evaluation data opened.


# Guide for evaluation

For each test case, please try to check the following facts:

- Is the test case a differential test case? 
- Was the correct argument identified? (Check if `Oracle_Arugment_Position` is indeed the oracle)
- Is the extracted function one of the relevant internal or differential functions?

If the answer to all three is questions is yes, then this case was most likely correctly identified (`y`)

## Sampling cases for evaluation

Set a seed and the number of cases you would like to evaluate, as well as the data to evaluate by setting the `evalAutomator` used:

In [104]:
RANDOM_SEED = 42
NUM_CASES = 50

# change this to the evaluator you would like to evaluate the data of:
evalAutomator = evalAutomator_tensorflow_filtered

eval_data = evalAutomator.getEvalData()
eval_data

Unnamed: 0.1,Unnamed: 0,File_Path,Line_Number,Found_in_Function,Function_Definition_Line_Number,Assert_Statement_Type,Oracle_Argument_ Position,Differential_Function_Line_Number,Differential_Test_Function,Evaluation
0,447,client\session_clusterspec_prop_test.py,285,testClusterSpecPropagationThreeServers2Graphs,235,assertAllEqual,1,282,np.zeros,UNEVALUATED
1,451,client\session_clusterspec_prop_test.py,286,testClusterSpecPropagationThreeServers2Graphs,235,assertAllEqual,1,282,np.zeros,UNEVALUATED
2,455,client\session_clusterspec_prop_test.py,288,testClusterSpecPropagationThreeServers2Graphs,235,assertAllEqual,1,283,np.ones,UNEVALUATED
3,459,client\session_clusterspec_prop_test.py,289,testClusterSpecPropagationThreeServers2Graphs,235,assertAllEqual,1,283,np.ones,UNEVALUATED
4,462,client\session_clusterspec_prop_test.py,290,testClusterSpecPropagationThreeServers2Graphs,235,assertAllEqual,1,282,np.zeros,UNEVALUATED
...,...,...,...,...,...,...,...,...,...,...
4068,43831,training\training_ops_test.py,447,_testTypesForAdam,417,assertAllCloseAccordingToType,1,430,np.array,UNEVALUATED
4069,43849,training\warm_starting_util_test.py,149,testWarmStartVarPrevVarPartitioned,133,assertAllClose,1,139,np.concatenate,UNEVALUATED
4070,43853,training\warm_starting_util_test.py,171,testWarmStartVarCurrentVarPartitioned,151,assertAllClose,2,169,np.concatenate,UNEVALUATED
4071,43855,training\warm_starting_util_test.py,198,testWarmStartVarBothVarsPartitioned,173,assertAllClose,1,179,np.concatenate,UNEVALUATED


In [105]:
# sample cases
#sampled_cases = eval_data.sample(n=NUM_CASES, random_state=RANDOM_SEED)
np.random.seed(RANDOM_SEED)
sampled_cases = np.random.randint(0, len(eval_data), size=NUM_CASES)

sample_counter = 0

# iterate over each case and evaluate
#for i, row in sampled_cases.iterrows():
for i in sampled_cases:
    print("\nCase " + str(i) + " (" + str(sample_counter) + " / " + str(len(sampled_cases)) + ")\n")
    evalAutomator.evaluate(i)
    sample_counter += 1


Case 3174 (0 / 50)

File_Path                            kernel_tests\self_adjoint_eig_op_test.py
Line_Number                                                               175
Found_in_Function                                                        Test
Function_Definition_Line_Number                                           142
Assert_Statement_Type                                          assertAllClose
Oracle_Argument_ Position                                                   1
Differential_Function_Line_Number                                         158
Differential_Test_Function                                     np.linalg.eigh
Name: 29763, dtype: object


142:   def Test(self):

143:     np.random.seed(1)

144:     n = shape_[-1]

145:     batch_shape = shape_[:-2]

146:     np_dtype = dtype_.as_numpy_dtype

147:     a = np.random.uniform(

148:         low=-1.0, high=1.0, size=n * n).reshape([n, n]).astype(np_dtype)

149:     if dtype_.is_complex:

150:       a += 1j * np.ra

Correctly identified? (y / n / ?): y

Case 1095 (5 / 50)

File_Path                            keras\layers\merge_test.py
Line_Number                                                 209
Found_in_Function                                test_merge_dot
Function_Definition_Line_Number                             197
Assert_Statement_Type                               assertEqual
Oracle_Argument_ Position                                     1
Differential_Function_Line_Number                           198
Differential_Test_Function                   keras.layers.Input
Name: 17106, dtype: object


197:   def test_merge_dot(self):

198:     i1 = keras.layers.Input(shape=(4,))

199:     i2 = keras.layers.Input(shape=(4,))

200:     o = keras.layers.dot([i1, i2], axes=1)

201:     self.assertListEqual(o.shape.as_list(), [None, 1])

202:     model = keras.models.Model([i1, i2], o)

203:     model.run_eagerly = testing_utils.should_run_eagerly()

204:     _ = keras.layers.Dot(axes=1).get_config()

Correctly identified? (y / n / ?): y

Case 1238 (11 / 50)

File_Path                                               keras\layers\recurrent_test.py
Line_Number                                                                        541
Found_in_Function                    test_rnn_cell_with_constants_layer_passing_ini...
Function_Definition_Line_Number                                                    495
Assert_Statement_Type                                                   assertAllClose
Oracle_Argument_ Position                                                            2
Differential_Function_Line_Number                                                  497
Differential_Test_Function                                                 keras.Input
Name: 17346, dtype: object


495:   def test_rnn_cell_with_constants_layer_passing_initial_state(self):

496:     # Test basic case.

497:     x = keras.Input((None, 5))

498:     c = keras.Input((3,))

499:     s = keras.Input((32,))

500:     

KeyboardInterrupt: Interrupted by user

## Analyse evaluations

Now we can gain summary statistics about the performance of our function extraction.

In [101]:
evaluation_counts = eval_data.Evaluation.value_counts()

print(evaluation_counts)

total_cases_evaluated = len(eval_data) - evaluation_counts[UNEVALUATED_STRING] 

try:
    print("\nn: " + str(round((evaluation_counts['n'] / total_cases_evaluated)*100)) + " %")

    print("y: " + str(round((evaluation_counts['y'] / total_cases_evaluated)*100)) + " %")

    print("?: " + str(round(((total_cases_evaluated - evaluation_counts['y'] - evaluation_counts['n']) / total_cases_evaluated)*100)) + " %")
    
except:
    print("\nNo evaluations of 'y' or 'n' found!")

UNEVALUATED    4023
y                27
n                23
Name: Evaluation, dtype: int64

n: 46 %
y: 54 %
?: 0 %



## Show the extracted functions

Here we can gain a glimpse into the functions that were extracted:


In [43]:
# Filter out all unsupported rows
extracted_functions_df = eval_data[~eval_data['Differential_Test_Function'].str.contains('UNSUPPORTED', na=False)]


def filter_functions_of_library(library_name, extracted_functions_df):
    """Filter out all unique functions of a given library and print them."""
    print(library_name + ": ", end='')
    library_name += '\.'
    filtered_df = extracted_functions_df[extracted_functions_df['Differential_Test_Function'].str.contains(library_name, na=False)]
    extracted_functions_df.drop(extracted_functions_df.loc[extracted_functions_df['Differential_Test_Function'].str.contains(library_name, na=False)].index, inplace=True)
    
    print(filtered_df.Differential_Test_Function.unique(), end='\n\n')
    return filtered_df
    
    
filter_functions_of_library('np', extracted_functions_df)
filter_functions_of_library('stats', extracted_functions_df)
filter_functions_of_library('scipy', extracted_functions_df)
filter_functions_of_library('keras', extracted_functions_df)
filter_functions_of_library('theano', extracted_functions_df)
filter_functions_of_library('torch', extracted_functions_df)

extracted_functions = extracted_functions_df.Differential_Test_Function.unique()
print(extracted_functions)

np: ['np.random.random' 'np.zeros' 'np.array' 'np.random.uniform' 'np.tanh'
 'np.exp' 'np.random.normal' 'np.reshape' 'np.transpose' 'np.repeat'
 'np.sum' 'np.multiply' 'np.mean' 'np.asarray' 'np.ones' 'np.arange'
 'np.concatenate' 'inp._to_placeholder' 'np.random.randint'
 'np.random.rand' 'np.full' 'np.expand_dims' 'np.flip' 'np.identity'
 'np.take']

stats: []

scipy: ['scipy.sparse.eye' 'scipy.sparse.coo_matrix']

keras: ['keras.callbacks.ModelCheckpoint' 'keras.callbacks.ProgbarLogger'
 'keras.callbacks.ReduceLROnPlateau' 'keras.models.Sequential'
 'keras.callbacks.CallbackList' 'keras.callbacks.TensorBoard'
 'keras.Sequential' 'keras.Input' 'keras.Model' 'keras.backend.variable'
 'keras.optimizers.serialize' 'keras.optimizers.deserialize'
 'keras.layers.Dense' 'keras.layers.Input' 'tf.keras.optimizers.Adam'
 'tf.keras.optimizers.SGD' 'tf.keras.metrics.AUC' 'keras.models.Model'
 'keras.losses.mean_squared_error' 'keras.metrics.AUC'
 'keras.metrics.FalsePositives' 'keras.models.Seq

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


In [None]:
# utility: write remaining unique extracted functions to txt file
with open('extracted_data/unique_functions.txt', 'w') as f:
    for item in sorted(extracted_functions.astype(str)):
        f.write("%s\n" % item)