In [None]:
#imports 
import json
import numpy as np
import pandas as pd
from scipy.spatial import distance_matrix as get_dm
from numba import njit, prange

In [None]:
# import JSON data

with open("2021_05_25_apis/allEndpoints.json","r",encoding="utf-8") as json_file:
     apis = json.load(json_file)
        

In [None]:
print("Number of APIs: ",len(apis))
#calculate number of endpoints
endpoint_lens = [len(apis[i]["endpoints"]) for i in range(len(apis))]
print("Total number of Endpoints: ",sum(endpoint_lens))
print("AVG number of Endpoints: ", sum(endpoint_lens)/len(apis))
apis[2]

## Define Datastructures

In [None]:
import operator


def string_to_list(value):
        s_list = list(value)
        out_list = []
        for i in range(len(s_list)):
            if operator.contains('!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n',s_list[i]):
                out_list.append('.')
            else:
                if s_list[i].isupper():
                    out_list.append('.')
                    out_list.append(s_list[i].lower())
                else:
                    out_list.append(s_list[i])

        out_string = "".join(out_list)
        return [x for x in out_string.split('.') if x]
    

class Api:
    def __init__(self,api_data):
        self._raw_data = api_data
        self.key = api_data["key"]
        self.name = api_data["name"]
        self.version_key = api_data["versionKey"]
        self.version_name = api_data["versionName"]
        self.endpoints = [Endpoint(api_data["endpoints"][i],i) for i in range(len(api_data["endpoints"]))]
    
    def get_property(self,name):
        return self._raw_data[name]
    
    def has_endpoints(self):
        return len(self.endpoints) != 0
    
    def __str__(self):
        json_dict = {}
        json_dict["name"] = self.name
        json_dict["key"] = self.key
        json_dict["version_name"] = self.version_name
        json_dict["version_key"] = self.version_key
        json_dict["endpoints_size"] = len(self.endpoints)
        return json.dumps(json_dict)
    
        
    
    
        
class Endpoint:
    def __init__(self,endpoint_data,endpoint_num):
        self._raw_data = endpoint_data
        self.path = endpoint_data["path"]
        self.method = endpoint_data["method"]
        self.request_parameters = [Parameter(endpoint_data["requestParameters"][i]) for i in range(len(endpoint_data["requestParameters"]))]
        self.response_parameters = [Parameter(endpoint_data["responseParameters"][i]) for i in range(len(endpoint_data["responseParameters"]))]
        self.path_list = self.path_to_list()
        self.num = endpoint_num
    
    def get_property(self,name):
        return self._raw_data[name]
    
    def has_parameters(self):
        return (len(self.request_parameters) != 0 or len(self.response_parameters) != 0)
    
    def path_to_list(self):
        return string_to_list(self.path)
    
    def __str__(self):
        json_dict = {}
        json_dict["method"] = self.method
        json_dict["path"] = self.path
        json_dict["request_parameters_size"] = len(self.request_parameters)
        json_dict["response_parameters_size"] = len(self.response_parameters)
        return json.dumps(json_dict)
    
class Parameter:
    def __init__(self,parameter_data):
        self._raw_data = parameter_data
        self.xpath = parameter_data["xpath"]
        self.name = parameter_data["name"]
        
        #if xpath is empty, overwrite it with name
        if not self.xpath:
            self.xpath = self.name
            
        self.xpath_list = self.xpath_to_list()
        self.name_list = self.name_to_list()
    
    def get_property(self,name):
        return self._raw_data[name]
    
    def name_to_list(self):
        return string_to_list(self.name)
        
    def xpath_to_list(self):
        return string_to_list(self.xpath)
    
    def __str__(self):
        json_dict = {}
        json_dict["name"] = self.name
        json_dict["xpath"] = self.xpath
        json_dict["name_list"] = self.name_list
        json_dict["xpath_list"] = self.xpath_list
        return json.dumps(json_dict)
    
    
        

## Load Data

In [None]:
apis_list = [Api(apis[i]) for i in range(len(apis))]

In [None]:
# Test: print data for one api
api = 2

#Some tests:
print("API:")
print(apis_list[api])
print()
print("Endpoints:")

for endpoint in apis_list[api].endpoints:
    print("Endpoint:")
    print(endpoint)
    print()
    print("request parameters:")
    for rp in endpoint.request_parameters:
        print(rp)
    print()
    print("response parameters:")
    for rp in endpoint.response_parameters:
        print(rp)
    print()
    print()
    
    

## Load Embeddings

In [None]:
# functions to load embeddings from file

def load_embedding_from_json(file_path:str):    
    with open(file_path,"r",encoding="utf-8") as json_file:
         word_embedding = json.load(json_file)       
    # convert vectors from list to np array
    for key, vector in word_embedding.items():
        word_embedding[key] = np.array(vector)
    return word_embedding

def load_glove_embedding_from_file(file_path):
    df = pd.read_csv(file_path, sep=" ", quoting=3, header=None, index_col=0)
    glove = {key: val.values for key, val in df.T.items()}
    return glove

In [None]:
# load open api embedding 
api_embedding = load_embedding_from_json("saved_embeddings/open_api_embedding_5d_314_words.json")

In [None]:
#load glove embedding
glove_embedding = load_glove_embedding_from_file("saved_embeddings/glove.6B.50d.txt")

### Reduce Glove Embedding to same Vocabulary as OpenAPI Embedding

In [None]:
def extract_words_and_vectors_from_embedding(embedding: dict):
    words = []
    vectors = []
    
    for word, vector in embedding.items():
        words.append(word)
        vectors.append(vector)
    
    return words, np.array(vectors)

In [None]:
api_embedding_words, api_embedding_vectors = extract_words_and_vectors_from_embedding(api_embedding)

In [None]:
def reduce_embedding_to_words(embedding: dict, words: list):
    reduced_embedding = {}
    for w in words:
        try:
            vector = embedding[w]
            reduced_embedding[w] = vector
        except:
            pass
    return reduced_embedding    

In [None]:
# reduce glove embedding to the same words as api embedding
glove_embedding = reduce_embedding_to_words(glove_embedding, api_embedding_words)

In [None]:
print("Size own embedding: "+str(len(api_embedding)))
print("Size glove embedding: "+str(len(glove_embedding)))

## Define Datastructure for Requests 

In [None]:
class Request:
    def __init__(self, endpoint_name:str, method:str, parameters:list):
        self.endpoint = endpoint_name
        self.method = method
        self.parameters = parameters
        
    def __str__(self):
        json_dict = {}
        json_dict["endpoint"] = self.endpoint
        json_dict["method"] = self.method
        json_dict["parameters"] = self.parameters
        return json.dumps(json_dict)

In [None]:
a = Request("mein endpoint", "post", ["das", "hier", "ist", "schoen"])

print(a)

### Create Request Object for requests that are completely embedded 

In [None]:


def create_request_obj_if_embedded(embedding:dict, endpoint, min_words:int, max_words:int):
    is_suitable = True
    parameter_words = []
    for request_parameter in endpoint.request_parameters:
        for word in request_parameter.xpath_list:
            if word in embedding:
                parameter_words.append(word)
            else:
                is_suitable = False
                break
        if not is_suitable:
            break
    
    parameter_words = list(set(parameter_words))
    
    if len(parameter_words) < min_words or len(parameter_words) > max_words:
        is_suitable = False        
    
    if is_suitable:
        request_obj = Request(endpoint.path[1:], endpoint.method, parameter_words)
    else:
        request_obj = None
        
    return is_suitable, request_obj

In [None]:
requests = []

for api in apis_list:
    for endpoint in api.endpoints:
        is_embedded, request_obj = create_request_obj_if_embedded(glove_embedding, endpoint, min_words=4, max_words=4)
        
        if is_embedded:
            requests.append(request_obj)
        
len(requests)

In [None]:
print(requests[5])

In [None]:
from functools import cmp_to_key
def compare(item1, item2):
    if len(item1.parameters) < len(item2.parameters):
        return 1
    elif len(item1.parameters) > len(item2.parameters):
        return -1
    else:
        return 0
    
requests.sort(key=cmp_to_key(compare))

In [None]:
print(requests[0])

In [None]:
# nearest words in vector space

from scipy.spatial import distance
from functools import cmp_to_key

def get_nearest_words(word_vector, embedding: dict):
    def get_wordlist_distance_to_point(center_point, embedding:dict):
        words = [] # list of elements (word, distance)
        for word, vector in embedding.items():
            words.append((distance.euclidean(center_point, vector), word))
        return words
            
    def compare(item):
            return item[0]
    
    words = get_wordlist_distance_to_point(word_vector, embedding)    
    list.sort(words, key=compare)
    
    return words 

# Prediction with NN 

## Reduce Glove to 5 Dimensions with PCA

In [None]:
glove_embedding_words, glove_embedding_vectors = extract_words_and_vectors_from_embedding(glove_embedding)

In [None]:
from sklearn.decomposition import PCA
pca = PCA(n_components = 5)
glove_embedding_vectors = pca.fit_transform(glove_embedding_vectors)

In [None]:
def create_embedding_dict(embedding_words, embedding_vectors):
    embedding = {}
    
    for i in range(len(embedding_words)):
        embedding[embedding_words[i]] = embedding_vectors[i]
        
    return embedding

In [None]:
glove_embedding = create_embedding_dict(glove_embedding_words, glove_embedding_vectors)

## Split Requests into Training and Test

In [None]:
from sklearn.model_selection import train_test_split
requests_train, requests_test = train_test_split(requests, test_size = 0.2, random_state = 0, shuffle=True)

print("len all requests "+str(len(requests)))
print("len requests train "+str(len(requests_train)))
print("len requests test "+str(len(requests_test)))

## Create Training/Test Data

In [None]:
# all possible subsets of size k
def get_all_possible_subsets_of_length_k_by_binomial_coefficient(number_to_choose_k:int, size_of_list_to_choose_from_n:int):
    '''returns all combinations of (number_to_choose_k) indices in List with size (size_of_list_to_choose_from_n)'''
    def get_next_combination(current_combination):
        def move_later_indices_to_min_value(index_in_combination_just_set:int, current_combination):
            value_just_set = current_combination[index_in_combination_just_set]
            for i in range(1, len(current_combination) - index_in_combination_just_set):
                current_combination[index_in_combination_just_set+i] = value_just_set+i
            return current_combination
        
        for pos in range(len(current_combination)-1, -1, -1): # iterate backwards through list
            pos_value = current_combination[pos]
            if pos_value+1 < size_of_list_to_choose_from_n and pos_value+1 not in current_combination:
                current_combination[pos] += 1
                current_combination = move_later_indices_to_min_value(pos, current_combination)
                break
        return current_combination

    sets = []
    combination = [i for i in range(number_to_choose_k)]
    last_combination = [i for i in range(size_of_list_to_choose_from_n-number_to_choose_k, size_of_list_to_choose_from_n)]

    sets.append(combination.copy())
    while combination != last_combination:
        combination = get_next_combination(combination)
        sets.append(combination.copy())
    return np.array(sets)

In [None]:
get_all_possible_subsets_of_length_k_by_binomial_coefficient(3, 5)

In [None]:
# get_data_and_wanted results from requests

def get_data_for_training(requests, embedding:dict):
    def get_input(words:list, embedding:dict):
        input = []
        for i in range(3):
            try:
                vector = embedding[words[i]]
            except:
                vector = np.zeros(5)
            
            input.append(vector)

        # result are all permutations
        result = np.array([np.concatenate((input[0],input[1],input[2])),
                           np.concatenate((input[0],input[2],input[1])),
                           np.concatenate((input[1],input[0],input[2])),
                           np.concatenate((input[1],input[2],input[1])),
                           np.concatenate((input[2],input[0],input[1])),
                           np.concatenate((input[2],input[1],input[0]))])
        return result

    input_data = []
    labels = []
    size_output = len(embedding)

    for req in requests:
        param = req.parameters

        index_variants_known_words = get_all_possible_subsets_of_length_k_by_binomial_coefficient(3, len(param))

        for indexes in index_variants_known_words:
            w0 = param[indexes[0]]
            w1 = param[indexes[1]]
            w2 = param[indexes[2]]
            words = [w0, w1, w2]

            new_inputs = get_input(words, embedding)

            # len(new_inputs) should be 6 (permutation of 3 input embeddings)

            words_to_predict = param.copy()
            words_to_predict.remove(w0)
            words_to_predict.remove(w1)
            words_to_predict.remove(w2)

            wanted_output = embedding[words_to_predict[0]]

            for ni in new_inputs:
                input_data.append(ni)
                labels.append(wanted_output)

    return np.array(input_data), np.array(labels)
    

In [None]:
# training / test data
input_train_api, output_train_api = get_data_for_training(requests_train, api_embedding)
input_test_api, output_test_api = get_data_for_training(requests_test, api_embedding)

print(input_train_api.shape)
print(output_train_api.shape)
print()
print(input_test_api.shape)
print(output_test_api.shape)

In [None]:
# training / test data glove embedding
input_train_glove, output_train_glove = get_data_for_training(requests_train, glove_embedding)
input_test_glove, output_test_glove = get_data_for_training(requests_test, glove_embedding)

print(input_train_glove.shape)
print(output_train_glove.shape)
print()
print(input_test_glove.shape)
print(output_test_glove.shape)

In [None]:
# shuffle api data
random_seed = 42  # guarantees that input and labels are shuffeled in the same way

np.random.seed(random_seed)
np.random.shuffle(input_train_api)

np.random.seed(random_seed)
np.random.shuffle(output_train_api)

np.random.seed(random_seed)
np.random.shuffle(input_test_api)

np.random.seed(random_seed)
np.random.shuffle(output_test_api)

In [None]:
# shuffle glove data
random_seed = 42  # guarantees that input and labels are shuffeled in the same way

np.random.seed(random_seed)
np.random.shuffle(input_train_glove)

np.random.seed(random_seed)
np.random.shuffle(output_train_glove)

np.random.seed(random_seed)
np.random.shuffle(input_test_glove)

np.random.seed(random_seed)
np.random.shuffle(output_test_glove)

# Build Neural Network

In [None]:
import tensorflow as tf
import tensorflow.keras.backend as kb

## Model API Embedding

In [None]:
model_with_api_embedding = tf.keras.models.Sequential()

model_with_api_embedding.add(tf.keras.layers.Dense(units=15, activation='linear')) # input layer
model_with_api_embedding.add(tf.keras.layers.Dense(units=32, activation='relu'))   # hidden layer
model_with_api_embedding.add(tf.keras.layers.Dense(units=5, activation='linear')) # output layer

In [None]:
model_with_api_embedding.compile(optimizer = 'adam', loss = 'mean_squared_error')

## Model Glove Embedding

In [None]:
model_with_glove_embedding = tf.keras.models.Sequential()

model_with_glove_embedding.add(tf.keras.layers.Dense(units=15, activation='linear')) # input layer
model_with_glove_embedding.add(tf.keras.layers.Dense(units=32, activation='relu'))   # hidden layer
model_with_glove_embedding.add(tf.keras.layers.Dense(units=5, activation='linear')) # output layer

In [None]:
model_with_glove_embedding.compile(optimizer = 'adam', loss = 'mean_squared_error')

## Test / Train Classifiers

In [None]:
def get_vector_to_word_dict(embedding:dict):
    res = {}
    for word, vec in embedding.items():
        res[vec.tobytes()] = word
    return res

In [None]:
# create vector to word embeddings
api_vector_to_word_dict = get_vector_to_word_dict(api_embedding)
glove_vector_to_word_dict = get_vector_to_word_dict(glove_embedding)

In [None]:
def get_delay_of_prediction(res_predicted, res_actual, input_data, embedding:dict, vector_to_word_dict:dict)->int:
    nearest_words = get_nearest_words(res_predicted, embedding)

    #remove input words from result
    input_word_1 = vector_to_word_dict[input_data[0:5].tobytes()]
    input_word_2 = vector_to_word_dict[input_data[5:10].tobytes()]
    input_word_3 = vector_to_word_dict[input_data[10:15].tobytes()]

    words_to_remove = []

    for i, el in enumerate(nearest_words):
        if el[1] == input_word_1 or el[1] == input_word_2 or el[1] == input_word_3:
            words_to_remove.append(el[1])
    
    for w in words_to_remove:
        words_to_remove.remove(w)

    word_to_predict =  vector_to_word_dict[res_actual.tobytes()]

    # get delay
    for i, el in enumerate(nearest_words):
        if el[1] == word_to_predict:
            return i
            break

    return None

In [None]:
def get_avg_prediction_delay(predictions_outputs, wanted_outputs, input_data, embedding, vector_to_word_dict):
    delays = []

    for i in range(len(predictions_outputs)):
        delays.append(get_delay_of_prediction(predictions_outputs[i], wanted_outputs[i], input_data[i], embedding, vector_to_word_dict))

    delays = np.array(delays)
    avg_delay = np.sum(delays)/len(delays)

    if None in delays:
        print("a delay is None")

    return avg_delay

## Result without Training

In [None]:
prediction_test_api = model_with_api_embedding.predict(input_test_api)
prediction_train_api = model_with_api_embedding.predict(input_train_api)

prediction_test_glove = model_with_glove_embedding.predict(input_test_glove)
prediction_train_glove = model_with_glove_embedding.predict(input_train_glove)

print("Result without Training")
avg_delay_api_test = get_avg_prediction_delay(prediction_test_api, output_test_api, input_test_api, api_embedding, api_vector_to_word_dict)
print("avg_delay_api_test: "+str(avg_delay_api_test))
avg_delay_glove_test = get_avg_prediction_delay(prediction_test_glove, output_test_glove, input_test_glove, glove_embedding, glove_vector_to_word_dict)
print("avg_delay_glove_test: "+str(avg_delay_glove_test))
avg_delay_api_train = get_avg_prediction_delay(prediction_train_api, output_train_api, input_train_api, api_embedding, api_vector_to_word_dict)
print("avg_delay_api_train: "+str(avg_delay_api_train))
avg_delay_glove_train = get_avg_prediction_delay(prediction_train_glove, output_train_glove, input_train_glove, glove_embedding, glove_vector_to_word_dict)
print("avg_delay_glove_train: "+str(avg_delay_glove_train))

## Training

In [None]:
model_with_glove_embedding.fit(input_train_glove, output_train_glove, batch_size = 4, epochs = 1)

In [None]:
model_with_api_embedding.fit(input_train_api, output_train_api, batch_size = 4, epochs = 1)

In [None]:
prediction_test_api = model_with_api_embedding.predict(input_test_api)
prediction_train_api = model_with_api_embedding.predict(input_train_api)

prediction_test_glove = model_with_glove_embedding.predict(input_test_glove)
prediction_train_glove = model_with_glove_embedding.predict(input_train_glove)

print("Result after 1 Epochs of Training")
avg_delay_api_test = get_avg_prediction_delay(prediction_test_api, output_test_api, input_test_api, api_embedding, api_vector_to_word_dict)
print("avg_delay_api_test: "+str(avg_delay_api_test))
avg_delay_glove_test = get_avg_prediction_delay(prediction_test_glove, output_test_glove, input_test_glove, glove_embedding, glove_vector_to_word_dict)
print("avg_delay_glove_test: "+str(avg_delay_glove_test))
avg_delay_api_train = get_avg_prediction_delay(prediction_train_api, output_train_api, input_train_api, api_embedding, api_vector_to_word_dict)
print("avg_delay_api_train: "+str(avg_delay_api_train))
avg_delay_glove_train = get_avg_prediction_delay(prediction_train_glove, output_train_glove, input_train_glove, glove_embedding, glove_vector_to_word_dict)
print("avg_delay_glove_train: "+str(avg_delay_glove_train))

In [None]:
model_with_glove_embedding.fit(input_train_glove, output_train_glove, batch_size = 4, epochs = 1)
model_with_api_embedding.fit(input_train_api, output_train_api, batch_size = 4, epochs = 1)

In [None]:
prediction_test_api = model_with_api_embedding.predict(input_test_api)
prediction_train_api = model_with_api_embedding.predict(input_train_api)

prediction_test_glove = model_with_glove_embedding.predict(input_test_glove)
prediction_train_glove = model_with_glove_embedding.predict(input_train_glove)

print("Result after 2 Epochs of Training")
avg_delay_api_test = get_avg_prediction_delay(prediction_test_api, output_test_api, input_test_api, api_embedding, api_vector_to_word_dict)
print("avg_delay_api_test: "+str(avg_delay_api_test))
avg_delay_glove_test = get_avg_prediction_delay(prediction_test_glove, output_test_glove, input_test_glove, glove_embedding, glove_vector_to_word_dict)
print("avg_delay_glove_test: "+str(avg_delay_glove_test))
avg_delay_api_train = get_avg_prediction_delay(prediction_train_api, output_train_api, input_train_api, api_embedding, api_vector_to_word_dict)
print("avg_delay_api_train: "+str(avg_delay_api_train))
avg_delay_glove_train = get_avg_prediction_delay(prediction_train_glove, output_train_glove, input_train_glove, glove_embedding, glove_vector_to_word_dict)
print("avg_delay_glove_train: "+str(avg_delay_glove_train))

In [None]:
model_with_glove_embedding.fit(input_train_glove, output_train_glove, batch_size = 4, epochs = 1)
model_with_api_embedding.fit(input_train_api, output_train_api, batch_size = 4, epochs = 1)

In [None]:
prediction_test_api = model_with_api_embedding.predict(input_test_api)
prediction_train_api = model_with_api_embedding.predict(input_train_api)

prediction_test_glove = model_with_glove_embedding.predict(input_test_glove)
prediction_train_glove = model_with_glove_embedding.predict(input_train_glove)

print("Result after 3 Epochs of Training")
avg_delay_api_test = get_avg_prediction_delay(prediction_test_api, output_test_api, input_test_api, api_embedding, api_vector_to_word_dict)
print("avg_delay_api_test: "+str(avg_delay_api_test))
avg_delay_glove_test = get_avg_prediction_delay(prediction_test_glove, output_test_glove, input_test_glove, glove_embedding, glove_vector_to_word_dict)
print("avg_delay_glove_test: "+str(avg_delay_glove_test))
avg_delay_api_train = get_avg_prediction_delay(prediction_train_api, output_train_api, input_train_api, api_embedding, api_vector_to_word_dict)
print("avg_delay_api_train: "+str(avg_delay_api_train))
avg_delay_glove_train = get_avg_prediction_delay(prediction_train_glove, output_train_glove, input_train_glove, glove_embedding, glove_vector_to_word_dict)
print("avg_delay_glove_train: "+str(avg_delay_glove_train))

In [None]:
model_with_glove_embedding.fit(input_train_glove, output_train_glove, batch_size = 4, epochs = 1)
model_with_api_embedding.fit(input_train_api, output_train_api, batch_size = 4, epochs = 1)

In [None]:
prediction_test_api = model_with_api_embedding.predict(input_test_api)
prediction_train_api = model_with_api_embedding.predict(input_train_api)

prediction_test_glove = model_with_glove_embedding.predict(input_test_glove)
prediction_train_glove = model_with_glove_embedding.predict(input_train_glove)

print("Result after 4 Epochs of Training")
avg_delay_api_test = get_avg_prediction_delay(prediction_test_api, output_test_api, input_test_api, api_embedding, api_vector_to_word_dict)
print("avg_delay_api_test: "+str(avg_delay_api_test))
avg_delay_glove_test = get_avg_prediction_delay(prediction_test_glove, output_test_glove, input_test_glove, glove_embedding, glove_vector_to_word_dict)
print("avg_delay_glove_test: "+str(avg_delay_glove_test))
avg_delay_api_train = get_avg_prediction_delay(prediction_train_api, output_train_api, input_train_api, api_embedding, api_vector_to_word_dict)
print("avg_delay_api_train: "+str(avg_delay_api_train))
avg_delay_glove_train = get_avg_prediction_delay(prediction_train_glove, output_train_glove, input_train_glove, glove_embedding, glove_vector_to_word_dict)
print("avg_delay_glove_train: "+str(avg_delay_glove_train))

In [None]:
model_with_glove_embedding.fit(input_train_glove, output_train_glove, batch_size = 4, epochs = 1)
model_with_api_embedding.fit(input_train_api, output_train_api, batch_size = 4, epochs = 1)

In [None]:
prediction_test_api = model_with_api_embedding.predict(input_test_api)
prediction_train_api = model_with_api_embedding.predict(input_train_api)

prediction_test_glove = model_with_glove_embedding.predict(input_test_glove)
prediction_train_glove = model_with_glove_embedding.predict(input_train_glove)

print("Result after 5 Epochs of Training")
avg_delay_api_test = get_avg_prediction_delay(prediction_test_api, output_test_api, input_test_api, api_embedding, api_vector_to_word_dict)
print("avg_delay_api_test: "+str(avg_delay_api_test))
avg_delay_glove_test = get_avg_prediction_delay(prediction_test_glove, output_test_glove, input_test_glove, glove_embedding, glove_vector_to_word_dict)
print("avg_delay_glove_test: "+str(avg_delay_glove_test))
avg_delay_api_train = get_avg_prediction_delay(prediction_train_api, output_train_api, input_train_api, api_embedding, api_vector_to_word_dict)
print("avg_delay_api_train: "+str(avg_delay_api_train))
avg_delay_glove_train = get_avg_prediction_delay(prediction_train_glove, output_train_glove, input_train_glove, glove_embedding, glove_vector_to_word_dict)
print("avg_delay_glove_train: "+str(avg_delay_glove_train))

## Result after 10 Epochs Training

In [None]:
model_with_glove_embedding.fit(input_train_glove, output_train_glove, batch_size = 4, epochs = 5)

In [None]:
model_with_api_embedding.fit(input_train_api, output_train_api, batch_size = 4, epochs = 5)

In [None]:
prediction_test_api = model_with_api_embedding.predict(input_test_api)
prediction_train_api = model_with_api_embedding.predict(input_train_api)

prediction_test_glove = model_with_glove_embedding.predict(input_test_glove)
prediction_train_glove = model_with_glove_embedding.predict(input_train_glove)

print("Result after 10 Epochs of Training")
avg_delay_api_test = get_avg_prediction_delay(prediction_test_api, output_test_api, input_test_api, api_embedding, api_vector_to_word_dict)
print("avg_delay_api_test: "+str(avg_delay_api_test))
avg_delay_glove_test = get_avg_prediction_delay(prediction_test_glove, output_test_glove, input_test_glove, glove_embedding, glove_vector_to_word_dict)
print("avg_delay_glove_test: "+str(avg_delay_glove_test))
avg_delay_api_train = get_avg_prediction_delay(prediction_train_api, output_train_api, input_train_api, api_embedding, api_vector_to_word_dict)
print("avg_delay_api_train: "+str(avg_delay_api_train))
avg_delay_glove_train = get_avg_prediction_delay(prediction_train_glove, output_train_glove, input_train_glove, glove_embedding, glove_vector_to_word_dict)
print("avg_delay_glove_train: "+str(avg_delay_glove_train))

In [None]:
model_with_glove_embedding.fit(input_train_glove, output_train_glove, batch_size = 4, epochs = 10)
model_with_api_embedding.fit(input_train_api, output_train_api, batch_size = 4, epochs = 10)

In [None]:
prediction_test_api = model_with_api_embedding.predict(input_test_api)
prediction_train_api = model_with_api_embedding.predict(input_train_api)

prediction_test_glove = model_with_glove_embedding.predict(input_test_glove)
prediction_train_glove = model_with_glove_embedding.predict(input_train_glove)

print("Result after 20 Epochs of Training")
avg_delay_api_test = get_avg_prediction_delay(prediction_test_api, output_test_api, input_test_api, api_embedding, api_vector_to_word_dict)
print("avg_delay_api_test: "+str(avg_delay_api_test))
avg_delay_glove_test = get_avg_prediction_delay(prediction_test_glove, output_test_glove, input_test_glove, glove_embedding, glove_vector_to_word_dict)
print("avg_delay_glove_test: "+str(avg_delay_glove_test))
avg_delay_api_train = get_avg_prediction_delay(prediction_train_api, output_train_api, input_train_api, api_embedding, api_vector_to_word_dict)
print("avg_delay_api_train: "+str(avg_delay_api_train))
avg_delay_glove_train = get_avg_prediction_delay(prediction_train_glove, output_train_glove, input_train_glove, glove_embedding, glove_vector_to_word_dict)
print("avg_delay_glove_train: "+str(avg_delay_glove_train))

# Inspect redundancy and inconsistency in training and test data

In [None]:
number_data_input = len(input_train_api) + len(input_test_api)

input_data_list = np.concatenate((input_train_api,input_test_api)).tolist()

input_data_list_of_tuples = []
for el in input_data_list:
    input_data_list_of_tuples.append(tuple(el))

input_data_unique = list(set(input_data_list_of_tuples))

# convert tuples back to list
for i, el in enumerate(input_data_unique):
    input_data_unique[i] = [k for k in el]


number_data_input_unique = len(list(set(input_data_list_of_tuples)))

In [None]:
print(number_data_input)
print(number_data_input_unique)