In [1]:
import sys
import numpy as np
import re

In [2]:
sys.path.append('../')

In [3]:
from test_function_generator import GenerateFunctions

In [4]:
gf = GenerateFunctions(100)

In [5]:
data = gf.generate_functions()
targets = gf.generate_test_functions()

In [6]:
data[:2]

['\n\ndef wahixrnw_hya_bpcn(oejq, mb, hu, wudd = 54):\n    """\n    Comment\n    :test: wahixrnw_hya_bpcn(38, 44, 67) is 54\n    """\n    return wudd',
 '\n\ndef amc(vya, shzu, hoodmls = True):\n    """\n    Comment\n    :test: amc(30, 70) != False\n    """\n    return hoodmls']

In [7]:
targets[:2]

['\n\n    def test_wahixrnw_hya_bpcn(self):\n        self.assertEqual(wahixrnw_hya_bpcn(38, 44, 67),54)',
 '\n\n    def test_amc(self):\n        self.assertTrue(amc(30, 70))']

The main task is to learn to transform the intent in the test comment into an assertion. Thus it is a semi structured text to parse.

In [54]:
# parse intent
s = re.search(r'^\s+:test: (\w+\(.*\)) (.*) (\w+|\d+)$', data[1], re.MULTILINE)
intent = s.group(2)
intent

'!='

In [56]:
expected_result = s.group(3)
expected_result

'False'

In [57]:
function_signature = s.group(1)
function_signature

'amc(30, 70)'

In [58]:
function_name = function_signature.split('(')[0]
function_name

'amc'

In [59]:
test_function_name = '    def test_'+function_name+'(self):'
test_function_name

'    def test_amc(self):'

In [62]:
#target
print(test_function_name + '\n        self.' + 'ASSERTION' + '(' + function_signature + '[,' + expected_result + '])')

    def test_amc(self):
        self.ASSERTION(amc(30, 70)[,False])


Thus inputs are the intent and expected result and the targets are ASSERTION and OPTIONNAL_VALUE. ASSERTION is categorical and the OPTIONAL value is binary to include or not the expected result.

In [67]:
def parse_intents(data):
    function_signature = []
    function_name = []
    intent = []
    expected_result = []
    for line in data:
        s = re.search(r'^\s+:test: (\w+\(.*\)) (.*) (\w+|\d+)$', line, re.MULTILINE)
        function_signature.append(s.group(1))
        function_name.append(s.group(1).split('(')[0])
        intent.append(s.group(2))
        expected_result.append(s.group(3))
    return function_name, function_signature, intent, expected_result

In [68]:
test_names, fn_signatures, intents, expected_results = parse_intents(data)

In [69]:
intents[:5]

['is', '!=', 'is not', 'is not', '!=']

In [70]:
expected_results[:5]

['54', 'False', 'True', 'True', '0']

### Parse targets

In [71]:
targets[:3]

['\n\n    def test_wahixrnw_hya_bpcn(self):\n        self.assertEqual(wahixrnw_hya_bpcn(38, 44, 67),54)',
 '\n\n    def test_amc(self):\n        self.assertTrue(amc(30, 70))',
 '\n\n    def test_l_fo_pr(self):\n        self.assertFalse(l_fo_pr(95, 3, 5, 68, 63))']

In [77]:
a = re.search(r'^\s+self.(\w+)\(.*\)(.*)\)$', targets[1], re.MULTILINE)

In [78]:
assertion = a.group(1)
assertion

'assertTrue'

In [79]:
optional_result = a.group(2)
optional_result

''

In [80]:
def parse_targets(targets):
    assertion = []
    optional_result = []
    for line in targets:
        a = re.search(r'^\s+self.(\w+)\(.*\)(.*)\)$', line, re.MULTILINE)
        assertion.append(a.group(1))
        optional_result.append(a.group(2))
    return assertion, optional_result

In [81]:
assertions, optional_results = parse_targets(targets)

In [82]:
assertions[:5]

['assertEqual', 'assertTrue', 'assertFalse', 'assertFalse', 'assertNotEqual']

In [83]:
optional_results[:5]

[',54', '', '', '', ',0']

## Encoding inputs

In [84]:
len(set(intents))

4

In [85]:
len(set(expected_results))

17

In [99]:
# binary encoding expected results
def encode_expected_results(expected_results):
    bin_target = []
    for line in expected_results:
        try:
            int(line)
            bin_target.append(1)
        except ValueError:
            bin_target.append(0)
    return bin_target

In [100]:
bin_exp_results = encode_expected_results(expected_results)

In [101]:
bin_exp_results[:10]

[1, 0, 0, 0, 1, 1, 1, 0, 0, 1]

In [102]:
intent_dict = {x:i for i,x in enumerate(sorted(list(set(intents))))}

In [103]:
intent_dict

{'!=': 0, '=': 1, 'is': 2, 'is not': 3}

In [110]:
def one_hot_encoding_inputs(intents, expected_results):
    intent_dict = {x:i for i,x in enumerate(sorted(list(set(intents))))}
    exp_res = np.array(encode_expected_results(expected_results), dtype='float32')
    ohe = np.zeros((len(intents), len(intent_dict)), dtype='float32')
    for i, x in enumerate(intents):
        ohe[i,intent_dict[x]] = 1.
    return ohe, exp_res

In [111]:
inputs_data, optional_input = one_hot_encoding_inputs(intents, expected_results)

In [112]:
inputs_data[:5]

array([[0., 0., 1., 0.],
       [1., 0., 0., 0.],
       [0., 0., 0., 1.],
       [0., 0., 0., 1.],
       [1., 0., 0., 0.]], dtype=float32)

In [113]:
optional_input[:5]

array([1., 0., 0., 0., 1.], dtype=float32)

## Encoding outputs

In [107]:
def one_hot_encoding_targets(assertions, optional_results):
    assertion_dict = {x:i for i,x in enumerate(sorted(list(set(assertions))))}
    opt_res = np.array([1. if x else 0. for x in optional_results], dtype='float32')
    ohe = np.zeros((len(assertions), len(assertion_dict)), dtype='float32')
    for i, a in enumerate(assertions):
        ohe[i,assertion_dict[a]] = 1.
    return ohe, opt_res

In [114]:
output_data, optional_output = one_hot_encoding_targets(assertions, optional_results)

In [115]:
output_data[:5]

array([[1., 0., 0., 0.],
       [0., 0., 0., 1.],
       [0., 1., 0., 0.],
       [0., 1., 0., 0.],
       [0., 0., 1., 0.]], dtype=float32)

In [116]:
optional_output[:5]

array([1., 0., 0., 0., 1.])

## Model double inputs and double outputs

In [118]:
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, concatenate

In [137]:
main_input = Input(shape=(4,), name='main_input')
aux_input = Input(shape=(1,), name='aux_input')
x = concatenate([main_input, aux_input])
x = Dense(32, activation='relu')(x)
x = Dense(32, activation='relu')(x)
x = Dense(32, activation='relu')(x)
main_output = Dense(4, activation='sigmoid', name='main_output')(x)
aux_output = Dense(1, activation='sigmoid', name='aux_output')(x)
model = Model(inputs=[main_input, aux_input], outputs=[main_output, aux_output])
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
main_input (InputLayer)         (None, 4)            0                                            
__________________________________________________________________________________________________
aux_input (InputLayer)          (None, 1)            0                                            
__________________________________________________________________________________________________
concatenate_5 (Concatenate)     (None, 5)            0           main_input[0][0]                 
                                                                 aux_input[0][0]                  
__________________________________________________________________________________________________
dense_15 (Dense)                (None, 32)           192         concatenate_5[0][0]              
__________

In [138]:
model.compile(optimizer='rmsprop', 
              loss='binary_crossentropy', 
              loss_weights=[1.,0.2],
             metrics=['acc'])

In [139]:
model.fit([inputs_data, optional_input], [output_data, optional_output], 
          epochs=100, 
          batch_size=8, 
          validation_split=0.2)

Train on 80 samples, validate on 20 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epo

<tensorflow.python.keras.callbacks.History at 0x28e7b5e7be0>

### Inference

In [140]:
assertion, option = model.predict([inputs_data[-1:], optional_input[-1:]])

In [141]:
assertion

array([[9.8692752e-08, 2.0761082e-12, 9.9999988e-01, 3.2857939e-10]],
      dtype=float32)

In [142]:
option

array([[1.]], dtype=float32)

In [147]:
rev_assertion_dict = {i:x for i,x in enumerate(sorted(list(set(assertions))))}

In [148]:
rev_assertion_dict[np.argmax(assertion)]

'assertNotEqual'

In [155]:
[x if np.round(option) else '' for x in optional_results[-1:] ]

[',0']

In [159]:
#target
print('    def test_' + 
      test_names[-1:][0] +
      '(self):'+ 
      '\n        self.' + 
      rev_assertion_dict[np.argmax(assertion)] + 
      '(' + fn_signatures[-1:][0] + 
      [x if np.round(option) else '' for x in optional_results[-1:]][0] + ')')

    def test_fxxtija_vmy(self):
        self.assertNotEqual(fxxtija_vmy(45),0)
