In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import json

In [2]:
#pathToCombined = "D:/Tmp/Coding/datasets/RICO/unique_uis/test_dataset"
traces_prefix = "D:/Tmp/Coding/datasets/RICO/traces/test_traces"
path_to_gesture_json = f"{traces_prefix}/*/trace_*/gestures.json"

In [3]:
import glob
#from pathlib import Path
import preprocessing as pp

In [4]:
gesture_json_paths = glob.glob(path_to_gesture_json)
gesture_json_paths

['D:/Tmp/Coding/datasets/RICO/traces/test_traces\\muchoapps.frasesdelavida\\trace_1\\gestures.json',
 'D:/Tmp/Coding/datasets/RICO/traces/test_traces\\muchoapps.frasesdelavida\\trace_2\\gestures.json',
 'D:/Tmp/Coding/datasets/RICO/traces/test_traces\\yourapp24.android.tools.alice_lite\\trace_0\\gestures.json',
 'D:/Tmp/Coding/datasets/RICO/traces/test_traces\\yourapp24.android.tools.alice_lite\\trace_1\\gestures.json',
 'D:/Tmp/Coding/datasets/RICO/traces/test_traces\\yourapp24.android.tools.alice_lite\\trace_2\\gestures.json']

In [5]:
def get_features_from_gestures(gesture_path):
    with tf.io.gfile.GFile(gesture_path) as f:
        gesture_json = json.load(f)
    trace_path = gesture_path.replace('gestures.json', '')
    # Throw away all gestures except the first
    gesture_dict = {gesture[0]:gesture[1][0] for gesture in gesture_json.items()}
    gesture_df = pd.DataFrame(gesture_dict).transpose()
    gesture_df.columns = ['x', 'y']
    return gesture_df, trace_path

In [25]:
keys_to_dtype = {
    #'developer_token_id': tf.int64,
    #'resource_token_id': tf.int64,
    #'screen_caption_token_ids': tf.int64,
    #'appdesc_token_id': tf.int64,
    'clickable_seq': tf.int64,
    'type_id_seq': tf.int64,
    'cord_x_seq': tf.float32,
    'cord_y_seq': tf.float32,
    'visibility_to_user_seq': tf.int64,
    'visibility_seq': tf.int64,
    #'attended_objects': tf.int64,
    #'label_flag': tf.int64,  # 0: padding 1: node
    # 'obj_img_mat': tf.int64,
    'obj_dom_pos': tf.int64,
    #'attention_boxes': tf.float32,
    #'gold_caption': tf.string,
}

In [26]:
def get_features_tensor_from_tree(frame_index, trace_path):
    file_prefix = f"{trace_path}view_hierarchies/{frame_index}"
    features_dict = pp.create_simple_features(file_prefix)
    del features_dict['node_id']
    feature_items = list(features_dict.items())
    features_adapt = to_var_len_feature_dict(feature_items)
    tree_feature_df = pd.DataFrame(features_adapt.items(), columns=['feat', frame_index])
    tree_feature_df = tree_feature_df.set_index('feat')
    tree_feature_df = tree_feature_df.transpose()
    return tree_feature_df
    #return pd.DataFrame(features_adapt, index=[frame_index])

In [27]:
def to_var_len_feature_dict(entries):
    dict = {}
    for feature_item in entries:
        dict[feature_item[0]] = tf.convert_to_tensor(feature_item[1], dtype=keys_to_dtype[feature_item[0]])
    return dict

In [43]:
gesture_path = gesture_json_paths[1]
gesture_df, trace_path = get_features_from_gestures(gesture_path)
frameIndex = gesture_df.index.values[1]
feature_df = get_features_tensor_from_tree(frameIndex, trace_path)
feature_df
#feature_item = features[0]
#feature_item
#tf.convert_to_tensor(feature_item[1], dtype=keys_to_dtype[feature_item[0]])

KeyboardInterrupt: 

In [29]:
# gesture_df['x'].dtype # float64
# gesture_df['y'].dtype # float64

In [30]:
for t,v in keys_to_dtype.items():
    print(feature_df[t]['4758'][0].dtype)

<dtype: 'int64'>
<dtype: 'int64'>
<dtype: 'float32'>
<dtype: 'float32'>
<dtype: 'int64'>
<dtype: 'int64'>
<dtype: 'int64'>


In [None]:
all_feat_df = pd.concat([gesture_df, feature_df], axis=1)
all_feat_df

In [39]:
feature_df['type_id_seq'].iloc[0][0].dtype

tf.int64

In [34]:
 # Get set of frames for each screen, but don't mix them during training LSTM
all_features_df = pd.DataFrame()
for trace_id, gesture_path in enumerate(gesture_json_paths):
    gesture_df, trace_path = get_features_from_gestures(gesture_path)
    tree_features_df = pd.DataFrame()
    for frame_id in gesture_df.index.values:
        tree_feature_df = get_features_tensor_from_tree(frame_id, trace_path)
        tree_features_df = pd.concat([tree_features_df, tree_feature_df], axis=0)
    trace_features_df = pd.concat([gesture_df, tree_features_df], axis=1)
    trace_features_df['trace_id'] = trace_id
    all_features_df = pd.concat([all_features_df, trace_features_df], axis=0)
    
all_features_df.head()
        

Unnamed: 0,x,y,obj_dom_pos,type_id_seq,visibility_seq,visibility_to_user_seq,clickable_seq,cord_x_seq,cord_y_seq,trace_id
2,0.094011,0.083688,"(tf.Tensor(1, shape=(), dtype=int64), tf.Tenso...","(tf.Tensor(4, shape=(), dtype=int64), tf.Tenso...","(tf.Tensor(1, shape=(), dtype=int64), tf.Tenso...","(tf.Tensor(1, shape=(), dtype=int64), tf.Tenso...","(tf.Tensor(0, shape=(), dtype=int64), tf.Tenso...","((tf.Tensor(0.0, shape=(), dtype=float32), tf....","((tf.Tensor(0.0, shape=(), dtype=float32), tf....",0
457,0.969031,0.02695,"(tf.Tensor(1, shape=(), dtype=int64), tf.Tenso...","(tf.Tensor(4, shape=(), dtype=int64), tf.Tenso...","(tf.Tensor(1, shape=(), dtype=int64), tf.Tenso...","(tf.Tensor(1, shape=(), dtype=int64), tf.Tenso...","(tf.Tensor(0, shape=(), dtype=int64), tf.Tenso...","((tf.Tensor(0.0, shape=(), dtype=float32), tf....","((tf.Tensor(0.0, shape=(), dtype=float32), tf....",0
2160,0.45461,0.648227,"(tf.Tensor(1, shape=(), dtype=int64), tf.Tenso...","(tf.Tensor(4, shape=(), dtype=int64), tf.Tenso...","(tf.Tensor(1, shape=(), dtype=int64), tf.Tenso...","(tf.Tensor(1, shape=(), dtype=int64), tf.Tenso...","(tf.Tensor(0, shape=(), dtype=int64), tf.Tenso...","((tf.Tensor(0.11111111, shape=(), dtype=float3...","((tf.Tensor(0.15585938, shape=(), dtype=float3...",0
2677,0.068794,0.070922,"(tf.Tensor(1, shape=(), dtype=int64), tf.Tenso...","(tf.Tensor(4, shape=(), dtype=int64), tf.Tenso...","(tf.Tensor(1, shape=(), dtype=int64), tf.Tenso...","(tf.Tensor(1, shape=(), dtype=int64), tf.Tenso...","(tf.Tensor(0, shape=(), dtype=int64), tf.Tenso...","((tf.Tensor(0.0, shape=(), dtype=float32), tf....","((tf.Tensor(0.0, shape=(), dtype=float32), tf....",0
4998,0.880772,0.821277,"(tf.Tensor(1, shape=(), dtype=int64), tf.Tenso...","(tf.Tensor(4, shape=(), dtype=int64), tf.Tenso...","(tf.Tensor(1, shape=(), dtype=int64), tf.Tenso...","(tf.Tensor(1, shape=(), dtype=int64), tf.Tenso...","(tf.Tensor(0, shape=(), dtype=int64), tf.Tenso...","((tf.Tensor(0.0, shape=(), dtype=float32), tf....","((tf.Tensor(0.0, shape=(), dtype=float32), tf....",0


In [35]:
# TODO: split by trace id
df = all_features_df
test_size = 6
test_ind = len(df) - test_size

train = df.iloc[:test_ind]
test = df.iloc[test_ind:]

In [36]:
#df['obj_dom_pos'].dtype

In [37]:
from keras.layers import Normalization

scaler = Normalization()
scaler_invert = Normalization(invert=True)

scaler.adapt(train)
scaler_invert.adapt(train)

scaled_train = scaler(train)
scaled_test = scaler(test)

print(scaler_invert(scaled_train))

ValueError: Failed to convert a NumPy array to a Tensor (Unsupported object type float).

ValueError: setting an array element with a sequence.

defaultdict(list,
            {'obj_dom_pos': array([ 1,  1, 46,  2,  2, 45,  2, 45,  2,  2, 46,  1,  3,  3, 44,  3,  4,
                    43,  4,  5, 42,  5,  6, 41,  5, 44,  3,  6,  7, 40,  7,  8, 39,  7,
                    41,  6,  8,  9, 38,  8, 13, 34,  8, 36, 11,  8, 40,  7,  8, 42,  5,
                     8, 43,  4,  9, 10, 37,  9, 11, 36,  9, 12, 35,  9, 14, 33,  9, 37,
                    10, 10, 15, 32, 10, 17, 30, 10, 38,  9, 11, 16, 31, 11, 18, 29, 11,
                    19, 28, 11, 39,  8, 12, 20, 27, 12, 24, 23, 12, 28, 19, 12, 32, 15,
                    13, 21, 26, 13, 22, 25, 13, 23, 24, 13, 25, 22, 13, 26, 21, 13, 27,
                    20, 13, 29, 18, 13, 30, 17, 13, 31, 16, 13, 33, 14, 13, 34, 13, 13,
                    35, 12]),
             'node_id': array([b'0', b'0.0', b'0.1', b'0.2', b'0.0.0', b'0.0.1', b'0.0.1.0',
                    b'0.0.1.0.0', b'0.0.1.0.1', b'0.0.1.0.0.0', b'0.0.1.0.0.0.0',
                    b'0.0.1.0.0.0.1', b'0.0.1.0.0.0.0.0', 

In [None]:
for baseName in baseNames:
    file_prefix = f"{pathToCombined}/{baseName}.json"
    features = pp.create_simple_features(file_prefix)