# Data Evaluation

The purpose of this notebook is to streamline the process of manually evaluating the extracted test case data from notebook 1.

For this we will sample test cases from the data (using a fixed seed for reproducibility) and have information about the test cases be displayed for manual evaluation, including the relevant lines of code. 

## Imports

In [1]:
import pandas as pd
import os

## Setup

In [2]:
# Set library root folder:
dl_library_root = "/Users/Alex/Desktop/BachelorThesis/DLL_Testing_Tool/DL_Libraries/"

# For TensorFlow
library_root_tensorflow = dl_library_root + "Tensorflow/tensorflow-2.6.0/tensorflow/python/"
save_data_to_tensorflow = "extracted_data/tensorflow_evaluation_data.csv"

# For TensorFlow 1.12.0
library_root_tensorflow_1_12_0 = dl_library_root + "Tensorflow/tensorflow-1.12.0/tensorflow/python/"
save_data_to_tensorflow_1_12_0 = "extracted_data/tensorflow_1.12.0_evaluation_data.csv"

# For Pytorch
library_root_pytorch = dl_library_root + "PyTorch/pytorch-1.9.0/" 
save_data_to_pytorch = "extracted_data/pytorch_evaluation_data.csv"

# For Theano 1.0.3
library_root_theano = dl_library_root + "Theano/Theano-rel-1.0.3/theano/"
save_data_to_theano = "extracted_data/theano_evaluation_data.csv"

# For Keras
library_root_keras = dl_library_root + "Keras/keras-2.6.0/keras/"
save_data_to_keras = "extracted_data/keras_evaluation_data.csv"

# For Scipy
library_root_scipy = dl_library_root + "Scipy/scipy-master/scipy"
save_data_to_scipy = "extracted_data/scipy_evaluation_data.csv"

# For Numpy
library_root_numpy = dl_library_root + "Numpy/numpy-main"
save_data_to_numpy = "extracted_data/numpy_evaluation_data.csv"

## Import the data

In [8]:
# import tensorflow data and preview
df_tensorflow = pd.read_csv('extracted_data/tensorflow_data.csv')
df_tensorflow.head(10)

Unnamed: 0,File_Path,Line_Number,Found_in_Function,Function_Definition_Line_Number,Assert_Statement_Type,Oracle_Argument_ Position,Differential_Function_Line_Number,Differential_Test_Function
0,autograph\converters\break_statements_test.py,61,test_while_loop_preserves_directives,48,assertIs,2,57,object
1,autograph\converters\break_statements_test.py,90,test_for_loop_preserves_directives,79,assertIs,2,87,object
2,autograph\converters\control_flow_test.py,51,assertValuesEqual,47,assertAllEqual,1,48,nest.map_structure
3,autograph\converters\functions_test.py,45,test_basic,33,assertEqual,2,41,self.transform
4,autograph\converters\functions_test.py,127,test_lambda_in_return_value,119,assertTrue,1,126,tr
5,autograph\converters\lists_test.py,45,test_empty_list,35,assertEqual,1,42,tr
6,autograph\converters\variables_test.py,49,test_aug_assign,41,assertEqual,2,49,UNSUPPORTED Binary Operation
7,autograph\core\converter_test.py,56,test_to_ast,38,assertEqual,1,39,converter.ConversionOptions
8,autograph\core\converter_test.py,56,test_to_ast,38,assertEqual,1,50,converter.ConversionOptions
9,autograph\core\converter_test.py,56,test_to_ast,38,assertEqual,2,54,reparsed.f


In [35]:
# import tensorflow 1.12.0 data and preview
df_tensorflow_1_12_0 = pd.read_csv('extracted_data/tensorflow_1.12.0_data.csv')
df_tensorflow_1_12_0.head(10)

Unnamed: 0,File_Path,Line_Number,Found_in_Function,Function_Definition_Line_Number,Assert_Statement_Type,Oracle_Argument_ Position,Differential_Function_Line_Number,Differential_Test_Function
0,autograph\converters\builtin_functions_test.py,42,test_len,33,assertEqual,1,41,result.test_fn
1,autograph\converters\builtin_functions_test.py,42,test_len,33,assertEqual,1,40,array_ops.placeholder
2,autograph\converters\call_trees_test.py,102,test_py_func_no_retval,88,assertFalse,1,99,Dummy
3,autograph\converters\call_trees_test.py,104,test_py_func_no_retval,88,assertEquals,2,99,Dummy
4,autograph\converters\call_trees_test.py,134,test_uncompiled_modules,117,assertEquals,1,133,result.test_fn
5,autograph\converters\lists_test.py,47,test_empty_list,39,assertTrue,1,45,result.test_fn
6,autograph\converters\lists_test.py,48,test_empty_list,39,assertEqual,1,45,result.test_fn
7,autograph\converters\lists_test.py,71,test_list_append,58,assertAllEqual,1,70,list_ops.tensor_list_stack
8,autograph\converters\lists_test.py,71,test_list_append,58,assertAllEqual,1,69,result.test_fn
9,autograph\converters\lists_test.py,94,test_list_pop,73,assertAllEqual,1,93,list_ops.tensor_list_stack


In [10]:
# import pytorch data and preview
df_pytorch = pd.read_csv('extracted_data/pytorch_data.csv')
df_pytorch.head(10)

Unnamed: 0,File_Path,Line_Number,Found_in_Function,Function_Definition_Line_Number,Assert_Statement_Type,Oracle_Argument_ Position,Differential_Function_Line_Number,Differential_Test_Function
0,tools/test/test_test_history.py,61,test_help_examples,59,assertEqual,1,60,parse_description
1,tools/test/test_test_history.py,70,test_help_examples,59,assertEqual,1,66,list
2,tools/test/test_stats.py,331,test_analysis,225,assertEqual,2,326,print_test_stats.analyze
3,tools/test/test_actions_local_runner.py,49,test_step_extraction,24,assertEqual,1,42,actions_local_runner.grab_specific_steps
4,tools/test/test_actions_local_runner.py,49,test_step_extraction,24,assertEqual,1,25,
5,tools/test/test_translate_annotations.py,160,test_translate_lao_tzu,154,assertEqual,1,157,parse_diff
6,tools/test/test_translate_annotations.py,161,test_translate_lao_tzu,154,assertEqual,1,157,parse_diff
7,tools/test/test_translate_annotations.py,166,test_translate_lao_tzu,154,assertEqual,1,157,parse_diff
8,tools/test/test_translate_annotations.py,170,test_translate_lao_tzu,154,assertEqual,1,157,parse_diff
9,tools/test/test_translate_annotations.py,171,test_translate_lao_tzu,154,assertEqual,1,157,parse_diff


In [11]:
# import theano data and preview
df_theano = pd.read_csv('extracted_data/theano_data.csv')
df_theano.head(10)

Unnamed: 0,File_Path,Line_Number,Found_in_Function,Function_Definition_Line_Number,Assert_Statement_Type,Oracle_Argument_ Position,Differential_Function_Line_Number,Differential_Test_Function
0,/gpuarray/tests/test_gemmcorr.py,53,run_conv_valid,20,assert_allclose,1,51,f_ref
1,/gpuarray/tests/test_gemmcorr.py,53,run_conv_valid,20,assert_allclose,2,52,f
2,/gpuarray/tests/test_gemmcorr.py,185,run_gradweight,157,assert_allclose,1,183,f_ref
3,/gpuarray/tests/test_gemmcorr.py,185,run_gradweight,157,assert_allclose,2,184,f
4,/gpuarray/tests/test_gemmcorr.py,236,run_gradinput,205,assert_allclose,1,234,f_ref
5,/gpuarray/tests/test_gemmcorr.py,236,run_gradinput,205,assert_allclose,2,235,f
6,/gpuarray/tests/test_gemmcorr3d.py,45,run_conv_valid,19,assert_allclose,1,43,f_ref
7,/gpuarray/tests/test_gemmcorr3d.py,45,run_conv_valid,19,assert_allclose,2,44,f
8,/gpuarray/tests/test_gemmcorr3d.py,147,run_gradweight,119,assert_allclose,1,145,f_ref
9,/gpuarray/tests/test_gemmcorr3d.py,147,run_gradweight,119,assert_allclose,2,146,f


In [12]:
# import theano data and preview
df_keras = pd.read_csv('extracted_data/keras_data.csv')
df_keras.head(10)

Unnamed: 0,File_Path,Line_Number,Found_in_Function,Function_Definition_Line_Number,Assert_Statement_Type,Oracle_Argument_ Position,Differential_Function_Line_Number,Differential_Test_Function
0,/metrics_functional_test.py,35,test_metrics,29,assertEqual,1,34,metric
1,/metrics_functional_test.py,35,test_metrics,29,assertEqual,1,31,backend.variable
2,/metrics_functional_test.py,35,test_metrics,29,assertEqual,1,32,backend.variable
3,/metrics_functional_test.py,42,test_sparse_categorical_accuracy_int,37,assertEqual,1,40,backend.variable
4,/metrics_functional_test.py,42,test_sparse_categorical_accuracy_int,37,assertEqual,1,41,backend.variable
5,/metrics_functional_test.py,48,test_sparse_categorical_accuracy_int,37,assertAllEqual,1,40,backend.variable
6,/metrics_functional_test.py,48,test_sparse_categorical_accuracy_int,37,assertAllEqual,1,45,backend.variable
7,/metrics_functional_test.py,48,test_sparse_categorical_accuracy_int,37,assertAllEqual,1,41,backend.variable
8,/metrics_functional_test.py,48,test_sparse_categorical_accuracy_int,37,assertAllEqual,1,46,backend.variable
9,/metrics_functional_test.py,55,test_sparse_categorical_accuracy_int,37,assertAllEqual,1,40,backend.variable


In [13]:
# import scipy data and preview
df_scipy = pd.read_csv('extracted_data/scipy_data.csv')
df_scipy.head(10)

Unnamed: 0,File_Path,Line_Number,Found_in_Function,Function_Definition_Line_Number,Assert_Statement_Type,Oracle_Argument_ Position,Differential_Function_Line_Number,Differential_Test_Function
0,\interpolate\tests\test_bsplines.py,267,test_derivative_jumps,246,allclose,1,266,np.asarray
1,\interpolate\tests\test_bsplines.py,267,test_derivative_jumps,246,allclose,2,266,np.asarray
2,\interpolate\tests\test_bsplines.py,271,test_derivative_jumps,246,allclose,1,257,np.asarray
3,\interpolate\tests\test_bsplines.py,271,test_derivative_jumps,246,allclose,2,257,np.asarray
4,\linalg\tests\test_basic.py,1496,test_matrix_norms,1480,allclose,1,1492,norm
5,\linalg\tests\test_basic.py,1496,test_matrix_norms,1480,allclose,1,1487,astype
6,\linalg\tests\test_basic.py,1496,test_matrix_norms,1480,allclose,2,1493,np.linalg.norm
7,\linalg\tests\test_basic.py,1496,test_matrix_norms,1480,allclose,2,1487,astype
8,\linalg\tests\test_blas.py,861,test_symm_wrong_uplo,853,allclose,1,860,f
9,\linalg\tests\test_blas.py,864,test_symm_wrong_uplo,853,allclose,1,860,f


In [14]:
# import numpy data and preview
df_numpy = pd.read_csv('extracted_data/numpy_data.csv')
df_numpy.head(10)

Unnamed: 0,File_Path,Line_Number,Found_in_Function,Function_Definition_Line_Number,Assert_Statement_Type,Oracle_Argument_ Position,Differential_Function_Line_Number,Differential_Test_Function
0,\core\tests\test_api.py,20,test_array_array,15,assert_equal,1,17,np.ones
1,\core\tests\test_api.py,20,test_array_array,15,assert_equal,2,17,np.ones
2,\core\tests\test_api.py,24,test_array_array,15,assert_equal,1,22,sys.getrefcount
3,\core\tests\test_api.py,24,test_array_array,15,assert_equal,1,18,type
4,\core\tests\test_api.py,24,test_array_array,15,assert_equal,1,17,np.ones
5,\core\tests\test_api.py,24,test_array_array,15,assert_equal,2,18,type
6,\core\tests\test_api.py,32,test_array_array,15,assert_equal,1,22,sys.getrefcount
7,\core\tests\test_api.py,32,test_array_array,15,assert_equal,1,30,sys.getrefcount
8,\core\tests\test_api.py,32,test_array_array,15,assert_equal,1,16,type
9,\core\tests\test_api.py,32,test_array_array,15,assert_equal,2,16,type


## Analyze coverage

To track the progress of our test case extraction we display statistics about how many cases still are still unsupported. This is either denoted by an "UNSUPPORTED ..." statement in the "Differential Test Function" column of the data or by an empty string in this column, i.e. `NaN`.

In [36]:
def print_not_covered(df, name):
    not_covered_df = df[df['Differential_Test_Function'].str.contains('UNSUPPORTED', na=False) | df['Differential_Test_Function'].isna()]
    print(name+":\t\t"+ str(len(not_covered_df)) + " out of " + str(len(df)) +   " cases not covered ({}%).".format(round(len(not_covered_df)/len(df)*100, 2)))
    print(not_covered_df.Differential_Test_Function.value_counts(dropna=False))
    print("\n")

print_not_covered(df_tensorflow, "Tensorflow")
print_not_covered(df_tensorflow_1_12_0, "Tensorflow 1.12.0")
print_not_covered(df_pytorch, "PyTorch")
print_not_covered(df_theano, "Theano")
print_not_covered(df_keras, "Keras")
print_not_covered(df_scipy, "Scipy")
print_not_covered(df_numpy, "Numpy")

Tensorflow:		4619 out of 45336 cases not covered (10.19%).
UNSUPPORTED Binary Operation                                                      3459
NaN                                                                                810
UNSUPPORTED List Comprehension                                                     264
UNSUPPORTED Unary Operation                                                         41
UNSUPPORTED Name (named variable or defined function: last_assignment)              10
UNSUPPORTED Compare                                                                 10
UNSUPPORTED Constant                                                                 8
UNSUPPORTED Name (named variable or defined function: v)                             4
UNSUPPORTED Name (named variable or defined function: converted_concrete_func)       2
UNSUPPORTED Name (named variable or defined function: orig_dict)                     2
UNSUPPORTED Name (named variable or defined function: tpu)             

## Differentiate between data entries and test cases

The number of identified test cases is smaller than the number of data entries, because
each identified test case may have entries for multiple assert statement arguments in the case that the algorithm
can not clearly decide which argument represents the oracle. Additionally, each argument may have multiple differential testing functions that effect it within the test case's code. 


In [37]:
# Since for our definition each test case is defined by its assert statement, we can drop all entries that 
# have the same file path + line number combination. Thus we get the number of test cases identified.
def get_num_test_cases(df): 
    return str(len(df.drop_duplicates(subset=['File_Path', 'Line_Number'])))
    
print("TensorFlow  2.6.0: " + get_num_test_cases(df_tensorflow))
print("TensorFlow 1.12.0: " + get_num_test_cases(df_tensorflow_1_12_0))
print("PyTorch     1.9.0: " + get_num_test_cases(df_pytorch))
print("Theano:     1.0.3: " + get_num_test_cases(df_theano))
print("Keras       2.6.0: " + get_num_test_cases(df_keras))

TensorFlow  2.6.0: 19720
TensorFlow 1.12.0: 14005
PyTorch     1.9.0: 10854
Theano:     1.0.3: 1290
Keras       2.6.0: 3783


# Tool for manual evaluation

This tool is meant to help with quickly evaluating test cases from the dataset. For each test case, it prints all information collected about the case, including the oracle argument position and the extracted function name, as well as the code inside the function where the test case was defined. Then the evaluator is asked for an evaluation of the test case via input. This evaluation is then stored alongside the test case in the data.

Evaluation keys:  
y: Test case correctly identified  
n: Test case is not differential testing  
?: Allows for the entry of a comment. This is meant for situations where the current case is differential testing, but the differential testing function was not extracted correctly (or some other data is incorrect).  

In [38]:
UNEVALUATED_STRING = "UNEVALUATED"

class EvaluationAutomator:
    def __init__(self, df, library_root, save_data_to):
        """Initialize the evaluation automator.
        
        df: Dataframe to evaluate.
        library_root: The root folder of the DL library
        save_data_to: Relative location to load/save the evaluation data
        """
        self.df = df
        self.save_data_to = save_data_to
        self.library_root = library_root
        
        # try importing evaluation data if it already exists
        if os.path.isfile(self.save_data_to): 
            self.eval_df = pd.read_csv(self.save_data_to)
            print("Evaluation data opened.")
        
        # otherwise initialize evaluation df and add new column for the evaluation result
        else:
            self.eval_df = df.copy()
            todo_list = [UNEVALUATED_STRING] * len(self.eval_df.index)
            self.eval_df.insert(len(df.columns), 'Evaluation', todo_list)
            self.eval_df.to_csv(self.save_data_to)
            print("New evaluation data created.")
            
    def getEvalData(self):
        """Returns the data frame containing the evaluation data."""
        return self.eval_df
    
    def evaluate(self, index):
        """Present the data entry at the given index for evaluation."""
        
        # present the data entry
        print(self.df.iloc[index])
        print("\n")
        
        # check if it has already been evaluated
        if self.eval_df.at[index, 'Evaluation'] != UNEVALUATED_STRING:
            print("Already evaluated! Previous evaluation: " + self.eval_df.at[index, 'Evaluation'])
            if input("Re-evaluate? (y / n) ") != "y":
                return
            
        
        # print the relevant source code lines:
        
        # get source file of current test case and open it as an array of lines
        source_file_path = self.df.iloc[index]['File_Path'].replace('\\', '/')
        source = open(self.library_root + source_file_path).readlines()

        # set beginning and end line number for the code section to display
        beginning_line_no = self.df.iloc[index]['Function_Definition_Line_Number']
        end_line_no = self.df.iloc[index]['Line_Number']

        # print these lines 
        for line in range(beginning_line_no, end_line_no+1):
            print(str(line) + ": " + source[line-1])
            
        # check if the last line with the assert statement is complete or if the
        # assert arguments were moved to new lines, in which case: print more lines
        line = end_line_no
        last_line = source[line-1]       
        
        # we can check if the assert statement is complete if it ends with a closed bracket
        while not last_line.rstrip().endswith(")"):
            line += 1
            last_line = source[line-1]
            print(str(line) + ": " + last_line)
            
        # ask for a decision from the evaluator:
        decision_bool = True
        while decision_bool:
            decision = input("Correctly identified? (y / n / ?): ")
            
            if decision in ["y", "n"]:
                decision_bool = False

            elif decision == "?":
                decision = input("Please comment on this case: ")
                decision_bool = False
                
            else:
                print("Error. Please specify y/n/?")
                decision_bool = True
                
        # write the decision to the evaluation data
        self.eval_df.at[index, 'Evaluation'] = decision
        self.eval_df.to_csv(self.save_data_to, index=False)

# initialize automators:
# TensorFlow
evalAutomator_tensorflow = EvaluationAutomator(df_tensorflow, library_root_tensorflow, save_data_to_tensorflow)

evalAutomator_tensorflow_1_12_0 = EvaluationAutomator(df_tensorflow_1_12_0, library_root_tensorflow_1_12_0, save_data_to_tensorflow_1_12_0)

# PyTorch
evalAutomator_pytorch = EvaluationAutomator(df_pytorch, library_root_pytorch, save_data_to_pytorch)

# Theano
evalAutomator_theano = EvaluationAutomator(df_theano, library_root_theano, save_data_to_theano)

# Keras
evalAutomator_keras = EvaluationAutomator(df_keras, library_root_keras, save_data_to_keras)

# Scipy
evalAutomator_scipy = EvaluationAutomator(df_scipy, library_root_scipy, save_data_to_scipy)

# Numpy
evalAutomator_numpy = EvaluationAutomator(df_numpy, library_root_numpy, save_data_to_numpy)

# for debugging: test evaluation on a particular case
#evalAutomator_tensorflow_1_12_0.evaluate(218)

Evaluation data opened.
Evaluation data opened.
Evaluation data opened.
Evaluation data opened.
Evaluation data opened.
Evaluation data opened.
Evaluation data opened.


# Guide for evaluation

For each test case, please try to check the following facts:

- Is the test case a differential test case? 
- Was the correct argument identified? (Check if `Oracle_Arugment_Position` is indeed the oracle)
- Is the extracted function one of the relevant internal or differential functions?

If the answer to all three is questions is yes, then this case was most likely correctly identified (`y`)

## Sampling cases for evaluation

Set a seed and the number of cases you would like to evaluate, as well as the data to evaluate by setting the `evalAutomator` used:

In [39]:
RANDOM_SEED = 42
NUM_CASES = 50

# change this to the evaluator you would like to evaluate the data of:
evalAutomator = evalAutomator_tensorflow_1_12_0

eval_data = evalAutomator.getEvalData()
eval_data

Unnamed: 0.1,Unnamed: 0,File_Path,Line_Number,Found_in_Function,Function_Definition_Line_Number,Assert_Statement_Type,Oracle_Argument_ Position,Differential_Function_Line_Number,Differential_Test_Function,Evaluation
0,0,autograph\converters\builtin_functions_test.py,42,test_len,33,assertEqual,1,41,result.test_fn,UNEVALUATED
1,1,autograph\converters\builtin_functions_test.py,42,test_len,33,assertEqual,1,40,array_ops.placeholder,UNEVALUATED
2,2,autograph\converters\call_trees_test.py,102,test_py_func_no_retval,88,assertFalse,1,99,Dummy,UNEVALUATED
3,3,autograph\converters\call_trees_test.py,104,test_py_func_no_retval,88,assertEquals,2,99,Dummy,UNEVALUATED
4,4,autograph\converters\call_trees_test.py,134,test_uncompiled_modules,117,assertEquals,1,133,result.test_fn,UNEVALUATED
...,...,...,...,...,...,...,...,...,...,...
43563,43563,util\protobuf\compare_test.py,226,testNormalizesInts,214,assertTrue,1,215,compare_test_pb2.Large,UNEVALUATED
43564,43564,util\protobuf\compare_test.py,244,testNormalizesFloats,236,assertEqual,1,237,compare_test_pb2.Large,UNEVALUATED
43565,43565,util\protobuf\compare_test.py,244,testNormalizesFloats,236,assertEqual,2,239,compare_test_pb2.Large,UNEVALUATED
43566,43566,util\protobuf\compare_test.py,261,testNormalizesDoubles,253,assertEqual,1,254,compare_test_pb2.Large,UNEVALUATED


In [33]:
# sample cases
sampled_cases = eval_data.sample(n=NUM_CASES, random_state=RANDOM_SEED)

sample_counter = 0

# iterate over each case and evaluate
for i, row in sampled_cases.iterrows():
    print("\nCase " + str(i) + " (" + str(sample_counter) + " / " + str(len(sampled_cases)) + ")\n")
    evalAutomator.evaluate(i)
    sample_counter += 1


Case 32130 (0 / 50)

File_Path                            client/session_test.py
Line_Number                                             672
Found_in_Function                     testFetchSparseTensor
Function_Definition_Line_Number                         618
Assert_Statement_Type                        assertAllEqual
Oracle_Argument_ Position                                 2
Differential_Function_Line_Number                       622
Differential_Test_Function                           astype
Name: 32130, dtype: object


618:   def testFetchSparseTensor(self):

619:     with session.Session() as s:

620:       indices = np.array([[3, 2, 0], [4, 5, 1]]).astype(np.int64)

621:       values = np.array([1.0, 2.0]).astype(np.float32)

622:       shape = np.array([7, 9, 2]).astype(np.int64)

623:       sp = sparse_tensor.SparseTensor(

624:           constant_op.constant(indices), constant_op.constant(values),

625:           constant_op.constant(shape))

626:       # Single fetch, use a

Correctly identified? (y / n / ?): n

Case 31081 (5 / 50)

File_Path                            data/kernel_tests/shuffle_dataset_op_test.py
Line_Number                                                                   159
Found_in_Function                                                    testSeedZero
Function_Definition_Line_Number                                               136
Assert_Statement_Type                                                 assertEqual
Oracle_Argument_ Position                                                       2
Differential_Function_Line_Number                                             141
Differential_Test_Function                                      iterator.get_next
Name: 31081, dtype: object


136:   def testSeedZero(self):

137:     """Test for same behavior when the seed is a Python or Tensor zero."""

138:     iterator = (

139:         dataset_ops.Dataset.range(10).shuffle(10, seed=0)

140:         .make_one_shot_iterator())

141:     get_ne

Correctly identified? (y / n / ?): n

Case 32450 (11 / 50)

File_Path                            client/session_test.py
Line_Number                                            1043
Found_in_Function                    testOperationRunMethod
Function_Definition_Line_Number                        1027
Assert_Statement_Type                        assertAllEqual
Oracle_Argument_ Position                                 2
Differential_Function_Line_Number                      1042
Differential_Test_Function                           v.eval
Name: 32450, dtype: object


1027:   def testOperationRunMethod(self):

1028:     with session.Session():

1029:       a = constant_op.constant(1.0, shape=[1, 2])

1030:       b = constant_op.constant(2.0, shape=[1, 2], name='b')

1031:       v = variables.VariableV1(a, a.dtype)

1032:       assign_a_to_v = state_ops.assign(v, a)

1033: 

1034:       assign_a_to_v.eval()

1035: 

1036:       v_val = v.eval()

1037:       self.assertAllEqual([[1.0, 1.0]], v

Correctly identified? (y / n / ?): n

Case 8193 (17 / 50)

File_Path                               kernel_tests/cwise_ops_test.py
Line_Number                                                        625
Found_in_Function                                    _compareGradientY
Function_Definition_Line_Number                                    616
Assert_Statement_Type                                   assertAllClose
Oracle_Argument_ Position                                            2
Differential_Function_Line_Number                                  622
Differential_Test_Function           gradient_checker.compute_gradient
Name: 8193, dtype: object


616:   def _compareGradientY(self, func, x, y):

617:     with self.cached_session():

618:       inx = ops.convert_to_tensor(x)

619:       iny = ops.convert_to_tensor(y)

620:       out = func(inx, iny)

621:       s = list(np.shape(x))

622:       jacob_t, jacob_n = gradient_checker.compute_gradient(

623:           iny, s, out, s, x_init_v

Correctly identified? (y / n / ?): n

Case 4165 (22 / 50)

File_Path                            framework/function_test.py
Line_Number                                                 860
Found_in_Function                             testSignatureHash
Function_Definition_Line_Number                             830
Assert_Statement_Type                            assertAllEqual
Oracle_Argument_ Position                                     1
Differential_Function_Line_Number                           856
Differential_Test_Function                                  Bar
Name: 4165, dtype: object


830:   def testSignatureHash(self):

831:     # Foo.Inner and Bar.Inner have identical function body but have

832:     # different signatures. They should be treated as two different functions.

833: 

834:     @function.Defun()

835:     def Foo(x):

836: 

837:       @function.Defun()

838:       def Inner(x):

839:         return x + 10.

840: 

841:       return Inner(x)

842: 

843:     @func

Correctly identified? (y / n / ?): n

Case 30743 (28 / 50)

File_Path                            data/util/nest_test.py
Line_Number                                             367
Found_in_Function                           testFlattenUpTo
Function_Definition_Line_Number                         305
Assert_Statement_Type                           assertEqual
Oracle_Argument_ Position                                 1
Differential_Function_Line_Number                       326
Differential_Test_Function               nest.flatten_up_to
Name: 30743, dtype: object


305:   def testFlattenUpTo(self):

306:     input_tree = (((2, 2), (3, 3)), ((4, 9), (5, 5)))

307:     shallow_tree = ((True, True), (False, True))

308:     flattened_input_tree = nest.flatten_up_to(shallow_tree, input_tree)

309:     flattened_shallow_tree = nest.flatten_up_to(shallow_tree, shallow_tree)

310:     self.assertEqual(flattened_input_tree, [(2, 2), (3, 3), (4, 9), (5, 5)])

311:     self.assertEqual(flattened_sh

Correctly identified? (y / n / ?): n

Case 34193 (32 / 50)

File_Path                                    debug/cli/curses_ui_test.py
Line_Number                                                          402
Found_in_Function                    testRunUIScrollTallOutputPageDownUp
Function_Definition_Line_Number                                      386
Assert_Statement_Type                                        assertEqual
Oracle_Argument_ Position                                              2
Differential_Function_Line_Number                                    390
Differential_Test_Function                                  MockCursesUI
Name: 34193, dtype: object


386:   def testRunUIScrollTallOutputPageDownUp(self):

387:     """Scroll tall output with PageDown and PageUp."""

388: 

389:     # Use PageDown and PageUp to scroll back and forth a little before exiting.

390:     ui = MockCursesUI(

391:         40,

392:         80,

393:         command_sequence=[string_to_codes("babbl

Correctly identified? (y / n / ?): n

Case 13544 (37 / 50)

File_Path                            kernel_tests/pad_op_test.py
Line_Number                                                  291
Found_in_Function                    testPartialShapeInformation
Function_Definition_Line_Number                              280
Assert_Statement_Type                                assertEqual
Oracle_Argument_ Position                                      2
Differential_Function_Line_Number                            289
Differential_Test_Function                  constant_op.constant
Name: 13544, dtype: object


280:   def testPartialShapeInformation(self):

281:     unknown = array_ops.placeholder(dtypes.int32)

282: 

283:     # Known input shape, partial unknown padding (one dimension).

284:     inp = constant_op.constant(0.0, shape=[4, 4])

285:     padded = array_ops.pad(inp, [[1, 2], unknown])

286:     self.assertEqual([7, None], padded.get_shape().as_list())

287: 

288:     # Known inpu

Correctly identified? (y / n / ?): n

Case 15299 (43 / 50)

File_Path                            kernel_tests/tensor_array_ops_test.py
Line_Number                                                           1173
Found_in_Function                                           testSplitShape
Function_Definition_Line_Number                                       1151
Assert_Statement_Type                                          assertEqual
Oracle_Argument_ Position                                                2
Differential_Function_Line_Number                                     1160
Differential_Test_Function                                        ta.split
Name: 15299, dtype: object


1151:   def testSplitShape(self):

1152:     with self.test_session(use_gpu=True):

1153:       ta = tensor_array_ops.TensorArray(

1154:           dtype=dtypes.float32,

1155:           tensor_array_name="foo",

1156:           size=0,

1157:           dynamic_size=True,

1158:           infer_shape=True)

11

Correctly identified? (y / n / ?): n

Case 4639 (48 / 50)

File_Path                            framework/importer_test.py
Line_Number                                                 988
Found_in_Function                                testWithDevice
Function_Definition_Line_Number                             965
Assert_Statement_Type                               assertEqual
Oracle_Argument_ Position                                     2
Differential_Function_Line_Number                           986
Differential_Test_Function            importer.import_graph_def
Name: 4639, dtype: object


965:   def testWithDevice(self):

966:     with ops.Graph().as_default() as g:

967:       # No device.

968:       a = constant_op.constant(3.0, name="a")

969: 

970:       with ops.device("/cpu:0"):

971:         b = constant_op.constant(4.0, name="b")

972:       with ops.device("/job:worker"):

973:         c = constant_op.constant(5.0, name="c")

974: 

975:     gdef = g.as_graph_def()

976: 

## Analyse evaluations

Now we can gain summary statistics about the performance of our function extraction.

In [40]:
evaluation_counts = eval_data.Evaluation.value_counts()

print(evaluation_counts)

total_cases_evaluated = len(eval_data) - evaluation_counts[UNEVALUATED_STRING] 

try:
    print("\nn: " + str(round((evaluation_counts['n'] / total_cases_evaluated)*100)) + " %")

    print("y: " + str(round((evaluation_counts['y'] / total_cases_evaluated)*100)) + " %")

    print("?: " + str(round(((total_cases_evaluated - evaluation_counts['y'] - evaluation_counts['n']) / total_cases_evaluated)*100)) + " %")
    
except:
    print("\nNo evaluations of 'y' or 'n' found!")

UNEVALUATED    43518
n                 47
y                  3
Name: Evaluation, dtype: int64

n: 94 %
y: 6 %
?: 0 %



## Show the extracted functions

Here we can gain a glimpse into the functions that were extracted:


In [43]:
# Filter out all unsupported rows
extracted_functions_df = eval_data[~eval_data['Differential_Test_Function'].str.contains('UNSUPPORTED', na=False)]


def filter_functions_of_library(library_name, extracted_functions_df):
    """Filter out all unique functions of a given library and print them."""
    print(library_name + ": ", end='')
    library_name += '\.'
    filtered_df = extracted_functions_df[extracted_functions_df['Differential_Test_Function'].str.contains(library_name, na=False)]
    extracted_functions_df.drop(extracted_functions_df.loc[extracted_functions_df['Differential_Test_Function'].str.contains(library_name, na=False)].index, inplace=True)
    
    print(filtered_df.Differential_Test_Function.unique(), end='\n\n')
    return filtered_df
    
    
filter_functions_of_library('np', extracted_functions_df)
filter_functions_of_library('stats', extracted_functions_df)
filter_functions_of_library('scipy', extracted_functions_df)
filter_functions_of_library('keras', extracted_functions_df)
filter_functions_of_library('theano', extracted_functions_df)
filter_functions_of_library('torch', extracted_functions_df)

extracted_functions = extracted_functions_df.Differential_Test_Function.unique()
print(extracted_functions)

np: ['np.random.random' 'np.zeros' 'np.array' 'np.random.uniform' 'np.tanh'
 'np.exp' 'np.random.normal' 'np.reshape' 'np.transpose' 'np.repeat'
 'np.sum' 'np.multiply' 'np.mean' 'np.asarray' 'np.ones' 'np.arange'
 'np.concatenate' 'inp._to_placeholder' 'np.random.randint'
 'np.random.rand' 'np.full' 'np.expand_dims' 'np.flip' 'np.identity'
 'np.take']

stats: []

scipy: ['scipy.sparse.eye' 'scipy.sparse.coo_matrix']

keras: ['keras.callbacks.ModelCheckpoint' 'keras.callbacks.ProgbarLogger'
 'keras.callbacks.ReduceLROnPlateau' 'keras.models.Sequential'
 'keras.callbacks.CallbackList' 'keras.callbacks.TensorBoard'
 'keras.Sequential' 'keras.Input' 'keras.Model' 'keras.backend.variable'
 'keras.optimizers.serialize' 'keras.optimizers.deserialize'
 'keras.layers.Dense' 'keras.layers.Input' 'tf.keras.optimizers.Adam'
 'tf.keras.optimizers.SGD' 'tf.keras.metrics.AUC' 'keras.models.Model'
 'keras.losses.mean_squared_error' 'keras.metrics.AUC'
 'keras.metrics.FalsePositives' 'keras.models.Seq

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


In [None]:
# utility: write remaining unique extracted functions to txt file
with open('extracted_data/unique_functions.txt', 'w') as f:
    for item in sorted(extracted_functions.astype(str)):
        f.write("%s\n" % item)