# Data Evaluation

The purpose of this notebook is to streamline the process of manually evaluating the extracted test case data from notebook 1.

For this we will sample test cases from the data (using a fixed seed for reproducibility) and have information about the test cases be displayed for manual evaluation, including the relevant lines of code. 

## Imports

In [22]:
import pandas as pd
import os

## Setup

In [21]:
# Set library root folder:

# For TensorFlow
library_root_tensorflow = "A:/BachelorThesis/DLL_Testing_Tool/DL_Libraries/Tensorflow/tensorflow-master/tensorflow/python/" 
save_data_to_tensorflow = "extracted_data/tensorflow_evaluation_data.csv"

# For TensorFlow 1.12.0
library_root_tensorflow_1_12_0 = "A:/BachelorThesis/DLL_Testing_Tool/DL_Libraries/Tensorflow/tensorflow-1.12.0/tensorflow/python/" 
save_data_to_tensorflow_1_12_0 = "extracted_data/tensorflow_1.12.0_evaluation_data.csv"

# For Pytorch
library_root_pytorch = "A:/BachelorThesis/DLL_Testing_Tool/DL_Libraries/PyTorch/pytorch-master/" 
save_data_to_pytorch = "extracted_data/pytorch_evaluation_data.csv"

# For Theano 1.0.3
library_root_theano = "A:/BachelorThesis/DLL_Testing_Tool/DL_Libraries/Theano-rel-1.0.3/theano/"
save_data_to_theano = "extracted_data/theano_evaluation_data.csv"

# For Keras
library_root_keras = "A:/BachelorThesis/DLL_Testing_Tool/DL_Libraries/Keras/keras-master/keras/"
save_data_to_keras = "extracted_data/keras_evaluation_data.csv"

# For Scipy
library_root_scipy = "A:/BachelorThesis/DLL_Testing_Tool/DL_Libraries/Scipy/scipy-master/scipy"
save_data_to_scipy = "extracted_data/scipy_evaluation_data.csv"

# For Numpy
library_root_numpy = "A:/BachelorThesis/DLL_Testing_Tool/DL_Libraries/Numpy/numpy-main"
save_data_to_numpy = "extracted_data/numpy_evaluation_data.csv"



## Import the data

In [3]:
# import tensorflow data and preview
df_tensorflow = pd.read_csv('extracted_data/tensorflow_data.csv')
df_tensorflow.head(10)

Unnamed: 0,File_Path,Line_Number,Found_in_Function,Function_Definition_Line_Number,Assert_Statement_Type,Oracle_Argument_ Position,Differential_Function_Line_Number,Differential_Test_Function
0,autograph\converters\break_statements_test.py,61,test_while_loop_preserves_directives,48,assertIs,2,57,object
1,autograph\converters\break_statements_test.py,90,test_for_loop_preserves_directives,79,assertIs,2,87,object
2,autograph\converters\control_flow_test.py,51,assertValuesEqual,47,assertAllEqual,1,48,nest.map_structure
3,autograph\converters\functions_test.py,45,test_basic,33,assertEqual,2,41,self.transform
4,autograph\converters\functions_test.py,127,test_lambda_in_return_value,119,assertTrue,1,126,tr
5,autograph\converters\lists_test.py,45,test_empty_list,35,assertEqual,1,42,tr
6,autograph\converters\variables_test.py,49,test_aug_assign,41,assertEqual,2,49,UNSUPPORTED Binary Operation
7,autograph\core\converter_test.py,56,test_to_ast,38,assertEqual,1,39,converter.ConversionOptions
8,autograph\core\converter_test.py,56,test_to_ast,38,assertEqual,1,50,converter.ConversionOptions
9,autograph\core\converter_test.py,56,test_to_ast,38,assertEqual,2,54,reparsed.f


In [4]:
# import tensorflow 1.12.0 data and preview
df_tensorflow_1_12_0 = pd.read_csv('extracted_data/tensorflow_1.12.0_data.csv')
df_tensorflow_1_12_0.head(10)

Unnamed: 0,File_Path,Line_Number,Found_in_Function,Function_Definition_Line_Number,Assert_Statement_Type,Oracle_Argument_ Position,Differential_Function_Line_Number,Differential_Test_Function
0,autograph\converters\builtin_functions_test.py,42,test_len,33,assertEqual,1,41,result.test_fn
1,autograph\converters\builtin_functions_test.py,42,test_len,33,assertEqual,1,40,array_ops.placeholder
2,autograph\converters\call_trees_test.py,102,test_py_func_no_retval,88,assertFalse,1,99,Dummy
3,autograph\converters\call_trees_test.py,104,test_py_func_no_retval,88,assertEquals,2,99,Dummy
4,autograph\converters\call_trees_test.py,134,test_uncompiled_modules,117,assertEquals,1,133,result.test_fn
5,autograph\converters\lists_test.py,47,test_empty_list,39,assertTrue,1,45,result.test_fn
6,autograph\converters\lists_test.py,48,test_empty_list,39,assertEqual,1,45,result.test_fn
7,autograph\converters\lists_test.py,71,test_list_append,58,assertAllEqual,1,70,list_ops.tensor_list_stack
8,autograph\converters\lists_test.py,71,test_list_append,58,assertAllEqual,1,69,result.test_fn
9,autograph\converters\lists_test.py,94,test_list_pop,73,assertAllEqual,1,93,list_ops.tensor_list_stack


In [76]:
# import pytorch data and preview
df_pytorch = pd.read_csv('extracted_data/pytorch_1.9.0_data.csv')
#df_pytorch = pd.read_csv('extracted_data/pytorch_data.csv')
df_pytorch.head(10)

Unnamed: 0,File_Path,Line_Number,Found_in_Function,Function_Definition_Line_Number,Assert_Statement_Type,Oracle_Argument_ Position,Differential_Function_Line_Number,Differential_Test_Function
0,benchmarks\operator_benchmark\pt_extension\cpp...,23,test_jit_consume_op,8,assertEqual,1,22,r
1,benchmarks\operator_benchmark\pt_extension\cpp...,23,test_jit_consume_op,8,assertEqual,1,21,torch.rand
2,benchmarks\operator_benchmark\pt_extension\cpp...,23,test_jit_consume_op,8,assertEqual,2,21,torch.rand
3,benchmarks\operator_benchmark\pt_extension\cpp...,24,test_jit_consume_op,8,assertEqual,1,19,graph.count
4,benchmarks\operator_benchmark\pt_extension\cpp...,24,test_jit_consume_op,8,assertEqual,1,18,str
5,benchmarks\operator_benchmark\pt_extension\cpp...,24,test_jit_consume_op,8,assertEqual,1,16,torch.jit.trace
6,benchmarks\operator_benchmark\pt_extension\cpp...,45,test_jit_consume_op_for_list_input,26,assertEqual,1,37,graph.count
7,benchmarks\operator_benchmark\pt_extension\cpp...,45,test_jit_consume_op_for_list_input,26,assertEqual,1,36,str
8,benchmarks\operator_benchmark\pt_extension\cpp...,45,test_jit_consume_op_for_list_input,26,assertEqual,1,34,torch.jit.trace
9,caffe2\contrib\fakelowp\test\test_batchmatmul_...,73,test_batch_matmul,29,assert_equal,1,71,sum


In [16]:
# import theano data and preview
df_theano = pd.read_csv('extracted_data/theano_data.csv')
df_theano.head(10)

Unnamed: 0,File_Path,Line_Number,Found_in_Function,Function_Definition_Line_Number,Assert_Statement_Type,Oracle_Argument_ Position,Differential_Function_Line_Number,Differential_Test_Function
0,\compile\tests\test_builders.py,85,test_grad_grad,74,allclose,2,82,np.ones
1,\compile\tests\test_builders.py,102,test_shared,88,allclose,2,97,np.ones
2,\compile\tests\test_builders.py,103,test_shared,88,allclose,2,97,np.ones
3,\compile\tests\test_builders.py,117,test_shared_grad,106,allclose,1,117,UNSUPPORTED Binary Operation
4,\compile\tests\test_builders.py,117,test_shared_grad,106,allclose,2,114,np.ones
5,\compile\tests\test_builders.py,123,test_shared_grad,106,allclose,1,123,UNSUPPORTED Binary Operation
6,\compile\tests\test_builders.py,123,test_shared_grad,106,allclose,2,114,np.ones
7,\compile\tests\test_builders.py,150,test_grad_override,127,allclose,1,150,UNSUPPORTED Binary Operation
8,\compile\tests\test_builders.py,150,test_grad_override,127,allclose,2,149,fn
9,\compile\tests\test_builders.py,150,test_grad_override,127,allclose,2,147,astype


In [18]:
# import theano data and preview
df_keras = pd.read_csv('extracted_data/keras_data.csv')
df_keras.head(10)

Unnamed: 0,File_Path,Line_Number,Found_in_Function,Function_Definition_Line_Number,Assert_Statement_Type,Oracle_Argument_ Position,Differential_Function_Line_Number,Differential_Test_Function
0,\activations_test.py,67,test_serialization_with_layers,60,assertEqual,1,65,serialization.deserialize
1,\activations_test.py,67,test_serialization_with_layers,60,assertEqual,1,63,serialization.serialize
2,\activations_test.py,67,test_serialization_with_layers,60,assertEqual,1,62,core.Dense
3,\activations_test.py,67,test_serialization_with_layers,60,assertEqual,2,62,core.Dense
4,\activations_test.py,69,test_serialization_with_layers,60,assertEqual,1,65,serialization.deserialize
5,\activations_test.py,69,test_serialization_with_layers,60,assertEqual,2,61,advanced_activations.LeakyReLU
6,\activations_test.py,73,test_serialization_with_layers,60,assertEqual,1,65,serialization.deserialize
7,\activations_test.py,73,test_serialization_with_layers,60,assertEqual,1,72,serialization.deserialize
8,\activations_test.py,73,test_serialization_with_layers,60,assertEqual,1,63,serialization.serialize
9,\activations_test.py,73,test_serialization_with_layers,60,assertEqual,2,62,core.Dense


In [7]:
# import scipy data and preview
df_scipy = pd.read_csv('extracted_data/scipy_data.csv')
df_scipy.head(10)

Unnamed: 0,File_Path,Line_Number,Found_in_Function,Function_Definition_Line_Number,Assert_Statement_Type,Oracle_Argument_ Position,Differential_Function_Line_Number,Differential_Test_Function
0,\interpolate\tests\test_bsplines.py,267,test_derivative_jumps,246,allclose,1,266,np.asarray
1,\interpolate\tests\test_bsplines.py,267,test_derivative_jumps,246,allclose,2,266,np.asarray
2,\interpolate\tests\test_bsplines.py,271,test_derivative_jumps,246,allclose,1,257,np.asarray
3,\interpolate\tests\test_bsplines.py,271,test_derivative_jumps,246,allclose,2,257,np.asarray
4,\linalg\tests\test_basic.py,1496,test_matrix_norms,1480,allclose,1,1492,norm
5,\linalg\tests\test_basic.py,1496,test_matrix_norms,1480,allclose,1,1487,astype
6,\linalg\tests\test_basic.py,1496,test_matrix_norms,1480,allclose,2,1493,np.linalg.norm
7,\linalg\tests\test_basic.py,1496,test_matrix_norms,1480,allclose,2,1487,astype
8,\linalg\tests\test_blas.py,861,test_symm_wrong_uplo,853,allclose,1,860,f
9,\linalg\tests\test_blas.py,864,test_symm_wrong_uplo,853,allclose,1,860,f


In [8]:
# import numpy data and preview
df_numpy = pd.read_csv('extracted_data/numpy_data.csv')
df_numpy.head(10)

Unnamed: 0,File_Path,Line_Number,Found_in_Function,Function_Definition_Line_Number,Assert_Statement_Type,Oracle_Argument_ Position,Differential_Function_Line_Number,Differential_Test_Function
0,\core\tests\test_api.py,20,test_array_array,15,assert_equal,1,17,np.ones
1,\core\tests\test_api.py,20,test_array_array,15,assert_equal,2,17,np.ones
2,\core\tests\test_api.py,24,test_array_array,15,assert_equal,1,22,sys.getrefcount
3,\core\tests\test_api.py,24,test_array_array,15,assert_equal,1,18,type
4,\core\tests\test_api.py,24,test_array_array,15,assert_equal,1,17,np.ones
5,\core\tests\test_api.py,24,test_array_array,15,assert_equal,2,18,type
6,\core\tests\test_api.py,32,test_array_array,15,assert_equal,1,22,sys.getrefcount
7,\core\tests\test_api.py,32,test_array_array,15,assert_equal,1,30,sys.getrefcount
8,\core\tests\test_api.py,32,test_array_array,15,assert_equal,1,16,type
9,\core\tests\test_api.py,32,test_array_array,15,assert_equal,2,16,type


## Analyze coverage

To track the progress of our test case extraction we display statistics about how many cases still are still unsupported. This is either denoted by an "UNSUPPORTED ..." statement in the "Differential Test Function" column of the data or by an empty string in this column, i.e. `NaN`.

In [77]:
def print_not_covered(df, name):
    not_covered_df = df[df['Differential_Test_Function'].str.contains('UNSUPPORTED', na=False) | df['Differential_Test_Function'].isna()]
    print(name+":\t\t"+ str(len(not_covered_df)) + " out of " + str(len(df)) +   " cases not covered ({}%).".format(round(len(not_covered_df)/len(df)*100, 2)))
    print(not_covered_df.Differential_Test_Function.value_counts(dropna=False))
    print("\n")

print_not_covered(df_tensorflow, "Tensorflow")
print_not_covered(df_tensorflow_1_12_0, "Tensorflow 1.12.0")
print_not_covered(df_pytorch, "PyTorch")
print_not_covered(df_theano, "Theano")
print_not_covered(df_keras, "Keras")
print_not_covered(df_scipy, "Scipy")
print_not_covered(df_numpy, "Numpy")

Tensorflow:		4619 out of 45336 cases not covered (10.19%).
UNSUPPORTED Binary Operation                                                      3459
NaN                                                                                810
UNSUPPORTED List Comprehension                                                     264
UNSUPPORTED Unary Operation                                                         41
UNSUPPORTED Compare                                                                 10
UNSUPPORTED Name (named variable or defined function: last_assignment)              10
UNSUPPORTED Constant                                                                 8
UNSUPPORTED Name (named variable or defined function: v)                             4
UNSUPPORTED Name (named variable or defined function: tpu)                           2
UNSUPPORTED Name (named variable or defined function: orig_dict)                     2
UNSUPPORTED Name (named variable or defined function: converted_concret

In [10]:
#not_covered_tensorflow[not_covered_tensorflow['Differential_Test_Function'].str.contains('UNSUPPORTED', na=False)]

not_covered_theano[not_covered_theano['Differential_Test_Function'].isna()]
#not_covered_tensorflow[not_covered_tensorflow['Differential_Test_Function'].isna()]

NameError: name 'not_covered_theano' is not defined

In [11]:
df_tensorflow_1_12_0[df_tensorflow_1_12_0['File_Path'].str.contains('rnn_test.py', na=False)]
df_tensorflow_1_12_0[df_tensorflow_1_12_0['Differential_Test_Function'].str.contains('keras.', na=False) & df_tensorflow_1_12_0['File_Path'].str.contains('kernel_test', na=False)]

df_theano[df_theano['Differential_Test_Function'].str.contains('scipy.', na=False)]# & df_theano['File_Path'].str.contains('kernel_test', na=False)]



Unnamed: 0,File_Path,Line_Number,Found_in_Function,Function_Definition_Line_Number,Assert_Statement_Type,Oracle_Argument_ Position,Differential_Function_Line_Number,Differential_Test_Function
1268,\sparse\tests\test_basic.py,320,test_transpose_csc,315,assertTrue,2,316,scipy.sparse.csc_matrix
1270,\sparse\tests\test_basic.py,321,test_transpose_csc,315,assertTrue,2,316,scipy.sparse.csc_matrix
1273,\sparse\tests\test_basic.py,323,test_transpose_csc,315,assertTrue,2,316,scipy.sparse.csc_matrix
1276,\sparse\tests\test_basic.py,324,test_transpose_csc,315,assertTrue,2,316,scipy.sparse.csc_matrix
1362,\sparse\tests\test_basic.py,785,__generalized_ss_test,768,assertTrue,1,782,scipyType
1363,\sparse\tests\test_basic.py,785,__generalized_ss_test,768,assertTrue,1,783,scipyType
1364,\sparse\tests\test_basic.py,804,__generalized_sd_test,787,assertTrue,1,801,scipyType
1367,\sparse\tests\test_basic.py,823,__generalized_ds_test,806,assertTrue,1,820,scipyType
3775,\tensor\tests\test_slinalg.py,250,test_solve_correctness,220,allclose,1,249,scipy.linalg.cholesky
3780,\tensor\tests\test_slinalg.py,250,test_solve_correctness,220,allclose,2,249,scipy.linalg.cholesky


# Tool for manual evaluation

This tool is meant to help with quickly evaluating test cases from the dataset. For each test case, it prints all information collected about the case, including the oracle argument position and the extracted function name, as well as the code inside the function where the test case was defined. Then the evaluator is asked for an evaluation of the test case via input. This evaluation is then stored alongside the test case in the data.

Evaluation keys:  
y: Test case correctly identified  
n: Test case is not differential testing  
?: Allows for the entry of a comment. This is meant for situations where the current case is differential testing, but the differential testing function was not extracted correctly (or some other data is incorrect).  

In [24]:
class EvaluationAutomator:
    def __init__(self, df, library_root, save_data_to):
        """Initialize the evaluation automator.
        
        df: Dataframe to evaluate.
        library_root: The root folder of the DL library
        save_data_to: Relative location to load/save the evaluation data
        """
        self.df = df
        self.save_data_to = save_data_to
        self.library_root = library_root
        
        # try importing evaluation data if it already exists
        if os.path.isfile(self.save_data_to): 
            self.eval_df = pd.read_csv(self.save_data_to)
            print("Evaluation data opened.")
        
        # otherwise initialize evaluation df and add new column for the evaluation result
        else:
            self.eval_df = df.copy()
            todo_list = ["TODO"] * len(self.eval_df.index)
            self.eval_df.insert(len(df.columns), 'Evaluation', todo_list)
            self.eval_df.to_csv(self.save_data_to)
            print("New evaluation data created.")
            
    def getEvalData(self):
        """Returns the data frame containing the evaluation data."""
        return self.eval_df
    
    def evaluate(self, index):
        """Present the data entry at the given index for evaluation."""
        
        # present the data entry
        print(self.df.iloc[index])
        print("\n")
        
        # check if it has already been evaluated
        if self.eval_df.at[index, 'Evaluation'] != "TODO":
            print("Already evaluated! Previous evaluation: " + self.eval_df.at[index, 'Evaluation'])
            if input("Re-evaluate? (y / n) ") != "y":
                return
            
        
        # print the relevant source code lines:
        
        # get source file of current test case and open it as an array of lines
        source = open(self.library_root + self.df.iloc[index]['File_Path']).readlines()

        # set beginning and end line number for the code section to display
        beginning_line_no = self.df.iloc[index]['Function_Definition_Line_Number']
        end_line_no = self.df.iloc[index]['Line_Number']

        # print these lines 
        for line in range(beginning_line_no, end_line_no+1):
            print(str(line) + ": " + source[line-1])
            
        # check if the last line with the assert statement is complete or if the
        # assert arguments were moved to new lines, in which case: print more lines
        line = end_line_no
        last_line = source[line-1]       
        
        # we can check if the assert statement is complete if it ends with a closed bracket
        while not last_line.rstrip().endswith(")"):
            line += 1
            last_line = source[line-1]
            print(str(line) + ": " + last_line)
            
        # ask for a decision from the evaluator:
        decision_bool = True
        while decision_bool:
            decision = input("Correctly identified? (y / n / ?): ")
            
            if decision in ["y", "n"]:
                decision_bool = False

            elif decision == "?":
                decision = input("Please comment on this case: ")
                decision_bool = False
                
            else:
                print("Error. Please specify y/n/?")
                decision_bool = True
                
        # write the decision to the evaluation data
        self.eval_df.at[index, 'Evaluation'] = decision
        self.eval_df.to_csv(self.save_data_to, index=False)

# initialize automators:
# TensorFlow
evalAutomator_tensorflow = EvaluationAutomator(df_tensorflow, library_root_tensorflow, save_data_to_tensorflow)

evalAutomator_tensorflow_1_12_0 = EvaluationAutomator(df_tensorflow_1_12_0, library_root_tensorflow_1_12_0, save_data_to_tensorflow_1_12_0)

# PyTorch
evalAutomator_pytorch = EvaluationAutomator(df_pytorch, library_root_pytorch, save_data_to_pytorch)

# Theano
evalAutomator_theano = EvaluationAutomator(df_theano, library_root_theano, save_data_to_theano)

# Keras
evalAutomator_keras = EvaluationAutomator(df_keras, library_root_keras, save_data_to_keras)

# Scipy
evalAutomator_scipy = EvaluationAutomator(df_scipy, library_root_scipy, save_data_to_scipy)

# Numpy
evalAutomator_numpy = EvaluationAutomator(df_numpy, library_root_numpy, save_data_to_numpy)

# test evaluation on a particular case
#evalAutomator_tensorflow_1_12_0.evaluate(218)

New evaluation data created.
Evaluation data opened.
Evaluation data opened.
Evaluation data opened.
New evaluation data created.
Evaluation data opened.
Evaluation data opened.


# Guide for evaluation

For each test case, please try to check the following facts:

- Is the test case a differential test case? 
- Was the correct argument identified? (Check if `Oracle_Arugment_Position` is indeed the oracle)
- Is the extracted function one of the relevant internal or differential functions?

If the answer to all three is questions is yes, then this case was most likely correctly identified (`y`)

## Sampling cases for evaluation

Set a seed and the number of cases you would like to evaluate, as well as the data to evaluate by setting the `evalAutomator` used:

In [42]:
RANDOM_SEED = 42 + 3
NUM_CASES = 10

evalAutomator = evalAutomator_keras
eval_data = evalAutomator.getEvalData()
eval_data

Unnamed: 0,File_Path,Line_Number,Found_in_Function,Function_Definition_Line_Number,Assert_Statement_Type,Oracle_Argument_ Position,Differential_Function_Line_Number,Differential_Test_Function,Evaluation
0,\activations_test.py,67,test_serialization_with_layers,60,assertEqual,1,65,serialization.deserialize,TODO
1,\activations_test.py,67,test_serialization_with_layers,60,assertEqual,1,63,serialization.serialize,TODO
2,\activations_test.py,67,test_serialization_with_layers,60,assertEqual,1,62,core.Dense,TODO
3,\activations_test.py,67,test_serialization_with_layers,60,assertEqual,2,62,core.Dense,TODO
4,\activations_test.py,69,test_serialization_with_layers,60,assertEqual,1,65,serialization.deserialize,TODO
...,...,...,...,...,...,...,...,...,...
10145,utils\vis_utils_test.py,143,test_dot_layer_range,136,assertAllEqual,1,137,efficientnet.EfficientNetB0,TODO
10146,utils\vis_utils_test.py,143,test_dot_layer_range,136,assertAllEqual,2,142,get_layer_ids_from_dot,TODO
10147,utils\vis_utils_test.py,143,test_dot_layer_range,136,assertAllEqual,2,141,dot.get_edges,TODO
10148,utils\vis_utils_test.py,143,test_dot_layer_range,136,assertAllEqual,2,140,vis_utils.model_to_dot,TODO


In [None]:
# create custom evaluator for only gamma_test cases in tf 1.12.0
filtered_eval_cases = df_tensorflow_1_12_0[df_tensorflow_1_12_0.File_Path.str.contains('\\\\rnn_test', na=False)]
evalAutomator = EvaluationAutomator(filtered_eval_cases, library_root_tensorflow_1_12_0, "extracted_data/tf_1_12_0_rnn_test_evaluation_data.csv")
eval_data = evalAutomator.getEvalData()
eval_data

In [68]:
# sample cases

sampled_cases = eval_data.sample(n=NUM_CASES, random_state=RANDOM_SEED)
#display(sampled_cases)
#print(len(sampled_cases))



sample_counter = 0

# iterate over each case and evaluate
for i, row in sampled_cases.iterrows():
    print("\nCase " + str(i) + " (" + str(sample_counter) + " / " + str(len(sampled_cases)) + ")\n")
    evalAutomator.evaluate(i)
    sample_counter += 1


Case 104 (0 / 10)

File_Path                            kernel_tests\rnn_test.py
Line_Number                                               421
Found_in_Function                    testRNNWithKerasLSTMCell
Function_Definition_Line_Number                           389
Assert_Statement_Type                             assertEqual
Oracle_Argument_ Position                                   1
Differential_Function_Line_Number                         405
Differential_Test_Function              array_ops.placeholder
Name: 26218, dtype: object


389:   def testRNNWithKerasLSTMCell(self):

390:     with self.cached_session() as sess:

391:       input_shape = 10

392:       output_shape = 5

393:       timestep = 4

394:       batch = 100

395:       (x_train, y_train), _ = testing_utils.get_test_data(

396:           train_samples=batch,

397:           test_samples=0,

398:           input_shape=(timestep, input_shape),

399:           num_classes=output_shape)

400:       y_train = keras.ut

KeyboardInterrupt: Interrupted by user

## Analyse evaluations

Now we can gain summary statistics about the performance of our function extraction.

In [38]:
evaluation_counts = eval_data.Evaluation.value_counts()

print(evaluation_counts)

total_cases_evaluated = len(eval_data) - evaluation_counts['TODO'] 

print("\nn: " + str(round((evaluation_counts['n'] / total_cases_evaluated)*100)) + " %")

print("y: " + str(round((evaluation_counts['y'] / total_cases_evaluated)*100)) + " %")

print("?: " + str(round(((total_cases_evaluated - evaluation_counts['y'] - evaluation_counts['n']) / total_cases_evaluated)*100)) + " %")

TODO    10150
Name: Evaluation, dtype: int64


KeyError: 'n'

## Show the extracted functions

Here we can gain a glimpse into the functions that were extracted:


In [43]:
# Filter out all unsupported rows
extracted_functions_df = eval_data[~eval_data['Differential_Test_Function'].str.contains('UNSUPPORTED', na=False)]


def filter_functions_of_library(library_name, extracted_functions_df):
    """Filter out all unique functions of a given library and print them."""
    print(library_name + ": ", end='')
    library_name += '\.'
    filtered_df = extracted_functions_df[extracted_functions_df['Differential_Test_Function'].str.contains(library_name, na=False)]
    extracted_functions_df.drop(extracted_functions_df.loc[extracted_functions_df['Differential_Test_Function'].str.contains(library_name, na=False)].index, inplace=True)
    
    print(filtered_df.Differential_Test_Function.unique(), end='\n\n')
    return filtered_df
    
    
filter_functions_of_library('np', extracted_functions_df)
filter_functions_of_library('stats', extracted_functions_df)
filter_functions_of_library('scipy', extracted_functions_df)
filter_functions_of_library('keras', extracted_functions_df)
filter_functions_of_library('theano', extracted_functions_df)
filter_functions_of_library('torch', extracted_functions_df)

extracted_functions = extracted_functions_df.Differential_Test_Function.unique()
print(extracted_functions)

with open('extracted_data/unique_functions.txt', 'w') as f:
    for item in sorted(extracted_functions.astype(str)):
        f.write("%s\n" % item)

np: ['np.random.random' 'np.zeros' 'np.array' 'np.random.uniform' 'np.tanh'
 'np.exp' 'np.random.normal' 'np.reshape' 'np.transpose' 'np.repeat'
 'np.sum' 'np.multiply' 'np.mean' 'np.asarray' 'np.ones' 'np.arange'
 'np.concatenate' 'inp._to_placeholder' 'np.random.randint'
 'np.random.rand' 'np.full' 'np.expand_dims' 'np.flip' 'np.identity'
 'np.take']

stats: []

scipy: ['scipy.sparse.eye' 'scipy.sparse.coo_matrix']

keras: ['keras.callbacks.ModelCheckpoint' 'keras.callbacks.ProgbarLogger'
 'keras.callbacks.ReduceLROnPlateau' 'keras.models.Sequential'
 'keras.callbacks.CallbackList' 'keras.callbacks.TensorBoard'
 'keras.Sequential' 'keras.Input' 'keras.Model' 'keras.backend.variable'
 'keras.optimizers.serialize' 'keras.optimizers.deserialize'
 'keras.layers.Dense' 'keras.layers.Input' 'tf.keras.optimizers.Adam'
 'tf.keras.optimizers.SGD' 'tf.keras.metrics.AUC' 'keras.models.Model'
 'keras.losses.mean_squared_error' 'keras.metrics.AUC'
 'keras.metrics.FalsePositives' 'keras.models.Seq

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(
