In [3]:
import glob
#from pathlib import Path
import json
import os

import keras
import numpy as np
import pandas as pd
import tensorflow as tf

import preprocessing as pp

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

In [4]:
length = 5
n_features=3
max_embed_length=500
out_cat_embed_dim=4 # This may can be increased, default for screen2words: 2
out_nested_embed_dim=4 # This may should be increased, as this is a heavy dimension reduction
label_length=1
training_frames = length + label_length
batch_size = 32
dropout_fraction = 0.2

train_test_split = 0.25 # 1/4 the data is for testing
dataset_share = 0.2 # Should be 1.0 in production

In [5]:
def get_features_from_gestures(gesture_path):
  with tf.io.gfile.GFile(gesture_path) as f:
    gesture_json = json.load(f)
  trace_path = gesture_path.replace('gestures.json', '')
  # Throw away all gestures except the first
  gesture_items = list(gesture_json.items())
  # Remove empty gestures
  gesture_items = [gesture for gesture in gesture_items if gesture[1]]
  # TODO: Add column: isSwipe, if gesture has more than one entry
  if gesture_items and gesture_items[0]:
    gesture_dict = {gesture[0]:gesture[1][0] for gesture in gesture_items}
  else:
    gesture_dict = {}

  first_gesture_item = list(gesture_dict.items())
  if (not gesture_dict) or (not first_gesture_item[0]) or (not first_gesture_item[0][1]):
    # Return empty dataframe, if no data is available
    return pd.DataFrame(columns=['x', 'y']), trace_path
  
  gesture_df = pd.DataFrame(gesture_dict).transpose()
  gesture_df.columns = ['x', 'y']
  return gesture_df, trace_path

In [6]:
traces_prefix = "../sources/datasets/RICO/traces/filtered_traces"
path_to_gesture_json = f"{traces_prefix}/*/trace_*/gestures.json"
path_to_gesture_json

'../sources/datasets/RICO/traces/filtered_traces/*/trace_*/gestures.json'

In [7]:
gesture_json_paths = glob.glob(path_to_gesture_json)
len(gesture_json_paths)

10292

In [8]:
def get_features_from_tree(frame_index, trace_path):
  file_prefix = f"{trace_path}view_hierarchies/{frame_index}"
  features_dict = pp.create_simple_features(file_prefix)
  if features_dict is None:
    return None
  del features_dict['node_id']
  feature_items = list(features_dict.items())
  #feature_items = to_var_len_feature_dict(feature_items).items()
  tree_feature_df = pd.DataFrame(feature_items, columns=['feat', frame_index])
  tree_feature_df = tree_feature_df.set_index('feat')
  tree_feature_df = tree_feature_df.transpose()
  return tree_feature_df
  #return pd.DataFrame(features_adapt, index=[frame_index])

In [9]:
gesture_path = gesture_json_paths[43]
gesture_df, trace_path = get_features_from_gestures(gesture_path)
frameIndex = gesture_df.index.values[0]
feature_df = get_features_from_tree(frameIndex, trace_path)
feature_df

feat,obj_dom_pos,type_id_seq,visibility_seq,visibility_to_user_seq,clickable_seq,cord_x_seq_start,cord_x_seq_end,cord_y_seq_start,cord_y_seq_end
391,"[1, 1, 7, 2, 2, 6, 2, 6, 2, 2, 7, 1, 3, 3, 5, ...","[4, 4, 4, 4, 4, 4, 4]","[1, 1, 1, 1, 0, 1, 1]","[1, 1, 1, 1, 0, 1, 1]","[0, 0, 0, 0, 0, 0, 1]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0]","[0.0, 0.0, 0.934375, 0.0, 0.0, 0.0328125, 0.03...","[1.0, 0.934375, 1.0, 0.0328125, 0.0, 0.934375,..."


In [10]:
all_feat_df = pd.concat([gesture_df, feature_df], axis=1)
all_feat_df

Unnamed: 0,x,y,obj_dom_pos,type_id_seq,visibility_seq,visibility_to_user_seq,clickable_seq,cord_x_seq_start,cord_x_seq_end,cord_y_seq_start,cord_y_seq_end
391,0.515094,0.732075,"[1, 1, 7, 2, 2, 6, 2, 6, 2, 2, 7, 1, 3, 3, 5, ...","[4, 4, 4, 4, 4, 4, 4]","[1, 1, 1, 1, 0, 1, 1]","[1, 1, 1, 1, 0, 1, 1]","[0, 0, 0, 0, 0, 0, 1]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0]","[0.0, 0.0, 0.934375, 0.0, 0.0, 0.0328125, 0.03...","[1.0, 0.934375, 1.0, 0.0328125, 0.0, 0.934375,..."


In [11]:
#feature_df['type_id_seq'].iloc[0][0].dtype

In [None]:
 # Get set of frames for each screen, but don't mix them during training LSTM
feature_traces = pd.DataFrame()
use_embeddings = True

trace_progress = 0
for trace_id, gesture_path in enumerate(gesture_json_paths):
    trace_progress = trace_progress + 1
    if trace_progress % 100 == 0:
      print(f'Processed {trace_progress} of {len(gesture_json_paths)} ({trace_progress/len(gesture_json_paths):.0%}) traces.\r', end="")
    gesture_df, trace_path = get_features_from_gestures(gesture_path)
    # Traces with too few frames will be dropped
    if len(gesture_df) < training_frames:
        #print(f'Gesture (traceID: {trace_id}) has only {len(gesture_df)} of {training_frames} required frames: {gesture_path}')
        continue
    if use_embeddings: # Try without features for now, only gestures
        tree_features_df = pd.DataFrame()
        for frame_id in gesture_df.index.values:
            tree_feature_df = get_features_from_tree(frame_id, trace_path)
            if tree_feature_df is None:
              print(f'Frame {frame_id} has no feature data')
              continue
            tree_features_df = pd.concat([tree_features_df, tree_feature_df], axis=0)
        trace_features_df = pd.concat([gesture_df, tree_features_df], axis=1)
    else:
        trace_features_df = gesture_df
    trace_features_df['trace_id'] = trace_id
    feature_traces = pd.concat([feature_traces, trace_features_df], axis=0)

len(feature_traces)
        

Frame 7384 has no feature datatraces.
Processed 3500 of 10292 (34%) traces.

In [None]:
feature_traces.head()

In [None]:
feature_traces.describe()

In [None]:
gestures_only = False # Model only uses gestures / clicks, to determine the next click.
correct_gesture_shift = True # Shifts the gesture / click sequence one to the right, as the gesture of the current screen should be predicted, not the one on the next screen

# TODO set different input_dim for each feature
embedding_specs = [
  # feat_name, is_cat, input_dim, output_dim, # maybe needed: vocab_size
  ('x', False, 1, None, None),
  ('y', False, 1, None, None),
]

if not gestures_only:
  cat_embedding_specs = [
    ('cord_x_seq_start', False, max_embed_length, None, None),
    ('cord_x_seq_end', False, max_embed_length, None, None),
    ('cord_y_seq_start', False, max_embed_length, None, None),
    ('cord_y_seq_end', False, max_embed_length, None, None),
    ('obj_dom_pos', True, max_embed_length, out_cat_embed_dim, 255),
    ('type_id_seq', True, max_embed_length, out_cat_embed_dim, 10),
    ('visibility_seq', False, max_embed_length, None, None),
    ('visibility_to_user_seq', False, max_embed_length, None, None),
    ('clickable_seq', False, max_embed_length, None, None),
    # ('obj_dom_pos_2', True, max_embed_length, out_cat_embed_dim),
  ]
  embedding_specs.extend(cat_embedding_specs)
  
considered_features = [specs[0] for specs in embedding_specs]
considered_cat_features = [specs[0] for specs in embedding_specs if specs[1]]

print(considered_features)
print(considered_cat_features)

In [None]:
feature_traces_preprocessed = feature_traces
print(len(feature_traces_preprocessed))
feature_traces_preprocessed = feature_traces_preprocessed.dropna()
feature_traces_preprocessed = feature_traces_preprocessed[considered_features + ['trace_id']]
print(len(feature_traces_preprocessed))
feature_traces_preprocessed.head()

In [None]:
import math
import statistics

# Preprocess data
# for t,v in keys_to_dtype.items():
#   # try to convert float to list
#   feature_traces_preprocessed[t] = feature_traces_preprocessed[t].apply(lambda x : [np.nan if (y is None or isinstance(y, float)) else y for y in x] if isinstance(x, list) else [x])
# 
# #lens of list
# print(feature_traces_preprocessed.isna().sum())
# feature_traces_preprocessed

# List of median value count for each feature.
feat_median_array_length = {}

# Check if feature is an array
isFeatList = (feature_traces_preprocessed.map(type) == list).all(axis='rows')
max_dom_pos = 500 # see screen2words

def print_feat_infos(feat_name, isList):
  print(f'Handle Feature "{feat_name}":')
  print(f'Shape of Feature "{np.array(feature_traces_preprocessed[feat_name]).shape}":')
  if isList:
    lengths_of_feature = feature_traces_preprocessed[feat_name].map(len)
    # FIXME: check why header is given as first row
    # print(lengths_of_feature)
    # for f_length in lengths_of_feature:
    #   if not isinstance(f_length, float):
    #     print(f_length)
    try:
      median_value = statistics.median(lengths_of_feature)
      feat_median_array_length[feat_name] = median_value
      print(f'\thas median array length of {median_value}')
      print(f'\thas max array length of {max(lengths_of_feature)}')
    except:
      print(f'Failed to process lengths:\n {lengths_of_feature}')
      raise
  
    # Remove all NaN inner values (?): TODO check!!!
    feature_traces_preprocessed[feat_name] = feature_traces_preprocessed[feat_name].map(lambda row: [x for x in row if not math.isnan(x)])
  
    # Extract the maximum value:
    allvals = [x for xs in feature_traces_preprocessed[feat_name] for x in xs]
    print(f'\thas median value of {statistics.median(allvals)}')
    print(f'\thas max value of {max(allvals)}')
  
    # Pad all values to same length
    # TODO need to pad to higher values per input
    feat_pad = keras.utils.pad_sequences(feature_traces_preprocessed[feat_name], value=0, padding='post', maxlen=max_dom_pos)
    feature_traces_preprocessed[feat_name] = feat_pad.tolist()
  else:
    print(f'\tis single value only')

for feat_name, isList in isFeatList.items():
  print_feat_infos(feat_name, isList)

feat_median_array_length

In [None]:
# feature_traces_flatten = feature_traces_preprocessed
# for feat_name, isList in isFeatList.items():
#   if isList:
#     max_len = len(feature_traces_flatten[feat_name])
#     feature_traces_flatten = feature_traces_flatten.drop(feat_name, axis= 1)
#     feature_traces_flatten[[f'{feat_name}_{i}' for i in max_len]] = pd.DataFrame(feature_traces_preprocessed[feat_name].tolist(), index= feature_traces_preprocessed.index)
#     #feature_traces_preprocessed = feature_traces_preprocessed + feature_traces_preprocessed.teams.apply(pd.Series)
#     #df3.columns = ['team1', 'team2']

In [None]:
feature_traces_preprocessed.info()

In [None]:
# The data is split by trace and not by frame windows as otherwise the model would be trained with similar apps before
feature_traces_list = [pd.DataFrame(x[1]).drop('trace_id', axis=1) for x in feature_traces_preprocessed.groupby('trace_id')]

In [None]:
# Shift click sequence by one to predict click of current screen
def shift_click_sequence(trace_df):
  trace_df['x'] = trace_df['x'].shift(1, fill_value=0.5) # Choose 0.5 to use the screen center as entry value
  trace_df['y'] = trace_df['y'].shift(1, fill_value=0.5)
  return trace_df
  
if correct_gesture_shift:
  feature_traces_list = list(map(shift_click_sequence,feature_traces_list))

In [None]:
feature_traces_list[0]
# len(feature_traces_preprocessed[0].iloc[0]['obj_dom_pos'])

In [None]:
# TODO: add validation split and use test split only to evaluation

all_size = len(feature_traces_list)
print(f'All size: {all_size}')
share_size = math.floor(all_size * dataset_share)
print(f'Share size: {share_size}')
test_size = math.floor(share_size * train_test_split)
test_ind = share_size - test_size
print(f'Train size: {test_ind}')
print(f'Test size: {test_size}')

train_traces = feature_traces_list[:test_ind]
test_traces = feature_traces_list[test_ind:share_size]

In [None]:
#df['obj_dom_pos'].dtype

In [None]:
from keras.layers import Normalization

isScale = False # Currently all values are already scaled, as far as known
if isScale:
  scaler = Normalization()
  scaler_invert = Normalization(invert=True)
  
  print('Adapt scaler')
  for train in train_traces:
    scaler.adapt(train[['x','y']])
    scaler_invert.adapt(train[['x','y']])
  
  print('Scale train data')
  scaled_train_traces = train_traces
  for train in train_traces[['x','y']]:
    scaled_train_traces[['x','y']] = scaler(train)
  
  print('Scale test data')
  scaled_test_traces = test_traces
  for test in test_traces[['x','y']]:
    scaled_test_traces[['x','y']] = scaler(test)
  
  print(scaler_invert(scaled_train_traces[0]))
else:
  scaled_train_traces = train_traces
  scaled_test_traces = test_traces
  

In [None]:
scaled_train_traces[0]


In [None]:
# from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator
# # Replace by timeseries_dataset_from_array
# generator = TimeseriesGenerator(scaled_train, scaled_train, length=length, batch_size=1)
# validation_generator = TimeseriesGenerator(scaled_test,scaled_test, length=length, batch_size=1)
# len(generator)

In [None]:
# X,y = generator[0]
# X

In [None]:
# Convert to list of dicts
scaled_train_traces = [v.to_dict(orient='records') for v in scaled_train_traces]
scaled_test_traces = [v.to_dict(orient='records') for v in scaled_test_traces]

In [None]:
def create_fake_dataset(length=5):
  my_arr = []
  for i in range(0, length):
    my_dict = {
      'x': np.random.uniform(0, 1),
      'y': np.random.uniform(0, 1),
      'obj_dom_pos': np.random.randint(0, 255, size=max_embed_length),
    }
    my_arr.append(my_dict)
  return my_arr

override_with_fake = False
if override_with_fake:
  scaled_train_traces = [create_fake_dataset(30) for i in range(0, 3)]
  scaled_test_traces = [create_fake_dataset(20) for i in range(0, 3)]

scaled_train_traces[0]
#for x in scaled_train_traces[0]:
#  print(x)

In [None]:
from keras.preprocessing.sequence import TimeseriesGenerator

In [None]:
from functools import reduce
def make_dataset_from_generator(data):
  generator = TimeseriesGenerator(data, data, length=length, batch_size=1)
  # shuffle=False, # TODO: Shuffle if possible
  return generator

# def reduce_generators(a,b):
#   a = list(map(lambda a1: a1, a))
#   b = list(map(lambda b1: b1, b))
#   return np.concatenate((a, b))

train_generators = list(map(make_dataset_from_generator,scaled_train_traces))
validation_generators = list(map(make_dataset_from_generator,scaled_test_traces))
print(f'#train generators: {len(train_generators)}')
print(f'#test generators: {len(validation_generators)}')
generator = train_generators[0]
validation_generator = validation_generators[0]
generator[0]
# train_dataset = reduce(reduce_generators, train_generators)
# print(train_dataset)

# print(generator)
# X,y = generator[0]
# print('X')
# print(X)
# print('y')
# print(y)

In [None]:
from keras.models import Model
from keras.layers import Dense,LSTM,Embedding, Input, Dropout
from keras.callbacks import EarlyStopping

In [None]:
generator[0][1]#[0][0]#['y'][:]

In [None]:
def merge_generators(generators):
  generators = [[v for v in g] for g in generators]
  return reduce(lambda a,b: a + b, generators)

def transform_generators_to_tuple(generators):
  timeseries = merge_generators(generators)
  # TODO: find out, why these arrays are nested in [0] and [0,0]
  return [xY[0][0] for xY in timeseries], [xY[1][0] for xY in timeseries]

# Filter out a feature from a time window to its own array
def filter_var(window, feat_name):
  return [step[feat_name] for step in window]

# def filter_var(generator, feat_name):
#   return [[[filter_var_row(row, feat_name) for row in g[0]],filter_var_row(g[1], feat_name)] for g in generator]

In [None]:
train_X, train_y = transform_generators_to_tuple(train_generators)
print(len(train_X))
print(len(train_y))

In [None]:
train_X[0]

In [None]:
train_y[0]

In [None]:
test_X, test_y = transform_generators_to_tuple(validation_generators)

In [None]:
def transform_window_to_input(window, feat_name, is_list):
  if is_list:
    return filter_var(window, feat_name)
  else:
    feat_vals = filter_var(window, feat_name)
    feat_vals = np.expand_dims(feat_vals, axis=1) # Bring to same shape as cats, just wrap the inner value in an array
    return feat_vals

# Transform list of windows of features to list of features of windows
def transform_X_to_inputs(X):
  inputs = []
  for feat_name, is_cat, input_dim, output_dim, vocab_dim in embedding_specs:
    feat_input = [transform_window_to_input(window, feat_name, is_list=input_dim > 1) for window in X]
    inputs.append(np.array(feat_input))
  return inputs

def transform_y_to_labels(y):
  labels_pos_x = filter_var(y, 'x')
  labels_pos_y = filter_var(y, 'y')
  return [np.array(labels_pos_x), np.array(labels_pos_y)]

train_inputs = transform_X_to_inputs(train_X)
train_labels = transform_y_to_labels(train_y)

test_inputs = transform_X_to_inputs(test_X)
test_labels = transform_y_to_labels(test_y)

for inp in train_inputs:
  print(inp.shape)

In [None]:
# https://stackoverflow.com/questions/52627739/how-to-merge-numerical-and-embedding-sequential-models-to-treat-categories-in-rn/52629902#comment136040845_52629902

In [None]:
numerical_inputs = []
num_embedded = []
cat_inputs = []
cat_embedded = []
for i, (feat_name, is_cat, input_length, out_cat_embed_dim, vocab_dim) in enumerate(embedding_specs):
  print(i, feat_name, is_cat, input_length, out_cat_embed_dim)
  if is_cat:
    # We have multiple inputs for the same category
    input = Input(shape=(length, input_length), name=feat_name)
    cat_inputs.append(input)
    embed = Embedding(input_dim=vocab_dim, output_dim=out_cat_embed_dim, mask_zero=True)(input)
    # Reshape embedding layer to be flattened:
    # we have `input_dim` values for this category, which should each be embedded in its own column
    # the third dimension is used as embedding dimension, which is shared with the float values dimension
    feat_embed_dim = input_length * out_cat_embed_dim
    embed = keras.layers.Reshape((length, feat_embed_dim))(embed)
    # TODO: play around with multiple DENSE and DROPOUT layers to reduce values of each category, which can be extremely high!! But also to raise performance.
    if feat_embed_dim > out_nested_embed_dim:
      embed = Dense(out_nested_embed_dim)(embed)
    cat_embedded.append(embed)
  elif input_length > 1:
    input = Input(shape=(length, input_length), name=feat_name)
    if input_length > out_nested_embed_dim:
      embed = Dense(out_nested_embed_dim)(input)
    else:
      embed = input
    numerical_inputs.append(input)
    num_embedded.append(embed)
  else:
    input = Input(shape=(length, input_length), name=feat_name)
    numerical_inputs.append(input)
    num_embedded.append(input)

cat_merged = []
num_merged = []
if len(cat_embedded) > 0:
  cat_merged = keras.layers.concatenate(cat_embedded)
  merged = cat_embedded
if len(num_embedded) > 0:
  num_merged = keras.layers.concatenate(num_embedded)
  merged = num_merged
if (len(cat_embedded) > 0) and (len(num_embedded) > 0):
  merged = keras.layers.concatenate([num_merged, cat_merged])
elif (len(cat_embedded) == 0) and (len(num_embedded) == 0):
  merged = keras.layers.concatenate(numerical_inputs)

# Consider adding `stateful=True` and `model.reset_states()` to keep interpreting windows as continous batch e.g. for one app.
# See: https://stackoverflow.com/a/50235563/5164462
out = LSTM(128, return_sequences=False)(merged)
# TODO: play with Dense layers and dropout layers
out = Dense(32)(out)
out = Dropout(dropout_fraction)(out)
# Can remove the dense layer, if want to predict the whole screen
out = Dense(2)(out)

model = Model(numerical_inputs + cat_inputs, out)
model.summary()

In [None]:
model.compile(optimizer='adam', loss='mse')

early_stop = EarlyStopping(monitor='val_loss',patience=4)

In [None]:
model.fit(
  train_inputs,
  train_labels,
  epochs=100,
  validation_data=(test_inputs,test_labels),
  callbacks=[early_stop],
  batch_size=batch_size
)

In [None]:
# For clicks only:
# loss: 0.0793
# val_loss: 0.0872
# 7 epochs, until early stop

# With features, but clicks not shifted:
# loss: 0.0824
# val_loss: 0.0948
# 9 epochs until early stop

In [None]:
loss = pd.DataFrame(model.history.history)
loss. index += 1
loss.plot()

In [None]:
## Evaluate on Test Data

In [None]:
scaled_test_traces_lengths = list(map(len, scaled_test_traces))
# Get the longest trace
longest_scaled_test_trace = max(scaled_test_traces_lengths)
print(f'Length of longest test trace: {longest_scaled_test_trace}')
index_longest_scaled_test_trace = scaled_test_traces_lengths.index(max(scaled_test_traces_lengths))
print(f'Index of longest test trace: {index_longest_scaled_test_trace}')

scaled_test_traces[105]

In [None]:
my_prediction_trace = index_longest_scaled_test_trace
scaled_test = scaled_test_traces[my_prediction_trace]

In [None]:
test_predictions = []

first_eval_batch = scaled_test[:length] # Use first batch from a test set and predict the next value
first_eval_batch = [first_eval_batch] # Must be wrapped in an array to represent shape of X (here: only one entry)
first_eval_batch = transform_X_to_inputs(first_eval_batch)
current_batch = first_eval_batch

print(current_batch)

# Of course cannot predict more than one step, as we only predict gesture_pos_x and gesture_pos_y and not the whole tree
# for i in range(len(scaled_test)):
# 
#   # get prediction 1 time stamp ahead ([0] is for grabbing just the number instead of [array])
#   current_pred = model.predict(current_batch)[0]
# 
#   # store prediction
#   test_predictions.append(current_pred)
#   
#   # drop first value
#   current_batch = [feat_input[:,1:,:] for feat_input in current_batch]
# 
#   # update batch to now include prediction
#   current_batch = np.append(current_batch,[[current_pred]],axis=1)

In [None]:
# Current prediction only
current_pred = model.predict(current_batch)
print(current_pred)
test_predictions.append(current_pred[0]) # Model only predicts one gesture per batch, this can be "easily" changed by handing over a batch of labels during training and setting `return_sequences` to true
test_predictions

In [None]:
test_predictions = np.array(test_predictions)
test_predictions

In [None]:
next_values = scaled_test[length:length + 1] # Remove the first batch and just select the next one, as we don't predict more right now
next_labels = transform_y_to_labels(next_values)
next_labels = np.transpose(next_labels, (1,0))
next_labels

In [None]:
if isScale:
  # TODO: validate
  true_predictions_labels = scaler_invert(test_predictions)
  true_next_labels = scaler_invert(next_labels)
else:
  true_predictions_labels = test_predictions
  true_next_labels = next_labels

comparison = np.concatenate([true_next_labels, true_predictions_labels], axis=1)
comparison_df = pd.DataFrame(comparison, columns=['x','y','PredictionsX', 'PredictionsY'])
comparison_df

In [None]:
comparison_df.plot(color = ['#00FF00', '#FF0000', '#00FF88', '#FF0088'])

In [None]:
from sklearn.metrics import mean_squared_error
print(np.sqrt(mean_squared_error(comparison_df['x'],comparison_df['PredictionsX'])))
print(np.sqrt(mean_squared_error(comparison_df['y'],comparison_df['PredictionsY'])))

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

plt.ylim(0, 1)
plt.xlim(0, 1)

comparison_2d = np.concatenate([true_next_labels, true_predictions_labels], axis=0)
comparison_2d = pd.DataFrame(comparison_2d, columns=['x', 'y'])
sns.scatterplot(x='x', y='y', data=comparison_2d)
# comparison_2d.plot.scatter(x='x', y='y', c='DarkBlue')