In [12]:
%reload_ext autoreload
%autoreload 2
import json
import sys
sys.path.append("../..")
from pathlib import Path
from data_processing.online_coordinate_data.create_dataset import *
import matplotlib.pyplot as plt
from hwr_utils import utils

import numpy as np
from hwr_utils.stroke_plotting import draw_from_gt

def distance_metric(x,y):
    """ Euclidean distance metric between x and x-1; first item in stroke has distance of epsilon
    Args:
        x: array-like
        y: array-like

    Returns:

    """

    output = np.zeros(x.size)
    output[1:] = ((x[:-1] - x[1:]) ** 2 + (y[:-1] - y[1:]) ** 2) ** (1 / 2)
    #output[0] = 1e-8
    return output

def reparameterize_as_func_of_distance(x,y,start_strokes):
    """ Instead of time, re-parameterize entire sequence as distance travelled

    Args:
        x: List of x's
        y: List of y's
        start_strokes: List of start stroke identifiers [1,0,0,1...

    Returns:
        distance travelled for each complete stroke
    """
    if isinstance(x, list):
        x=np.array(x)
    if isinstance(y, list):
        y=np.array(y)

    distances = distance_metric(x,y)
    distances[start_strokes==1] = 0
    cum_sum = np.cumsum(distances) # distance is 0 at first point; keeps length the same
    return cum_sum


x = np.array(range(0,10))
y = x.copy()
start_strokes = np.zeros(10)
start_strokes[0] =1
start_strokes[5] =1
start_strokes[8] =1

distances = distance_metric(x,y)
print(distances)
print(distances[start_strokes==1])
distances[start_strokes==1] = 0
print(distances)
    
reparameterize_as_func_of_distance(x,y,start_strokes)


[0.         1.41421356 1.41421356 1.41421356 1.41421356 1.41421356
 1.41421356 1.41421356 1.41421356 1.41421356]
[0.         1.41421356 1.41421356]
[0.         1.41421356 1.41421356 1.41421356 1.41421356 0.
 1.41421356 1.41421356 0.         1.41421356]


array([0.        , 1.41421356, 2.82842712, 4.24264069, 5.65685425,
       5.65685425, 7.07106781, 8.48528137, 8.48528137, 9.89949494])

In [None]:
start_strokes==1

In [None]:
def get_all_substrokes(stroke_dict, desired_num_of_strokes=3):
    """

    Args:
        stroke_dict: ['x', 'y', 't', 'start_times', 'x_to_y', 'start_strokes', 'raw', 'tmin', 'tmax', 'trange']
        desired_num_of_strokes:

    Returns:

    """
    if desired_num_of_strokes is None:
        yield stroke_dict
        return

    start_args = np.where(stroke_dict.start_strokes==1)[0] # returns an "array" of the list, just take first index
    start_args = np.append(start_args, None) # last start arg should be the end of the sequence

    # If fewer strokes, just return the whole thing
    if start_args.shape[0] <= desired_num_of_strokes:
        return stroke_dict

    for stroke_number in range(start_args.shape[0] - desired_num_of_strokes): # remember, last start_stroke is really the end stroke
        start_idx = start_args[stroke_number]
        end_idx = start_args[stroke_number + desired_num_of_strokes]

        t = stroke_dict.t[start_idx:end_idx].copy()
        x = stroke_dict.x[start_idx:end_idx].copy()
        y = stroke_dict.y[start_idx:end_idx].copy()
        raw = stroke_dict.raw[stroke_number:stroke_number + desired_num_of_strokes]
        start_strokes = stroke_dict.start_strokes[start_idx:end_idx]
        start_times = stroke_dict.start_times[stroke_number:stroke_number + desired_num_of_strokes + 1].copy()

        y, scale_param = normalize(y)
        x, scale_param = normalize(x, scale_param)
        x_to_y = np.max(x) / np.max(y)

        start_time = t[0]
        t -= start_time
        start_times -= start_time
        output = edict({"x": x,
                        "y": y,
                        "t": t,
                        "start_times": start_times,
                        "start_strokes": start_strokes,
                        "x_to_y":x_to_y,
                        "raw":raw})
        
        assert start_times[0]==t[0]
        yield output




# "../../data/online_coordinate_data/3_stroke_16_v2/train_online_coords.json"
# FROM ORIGINAL XMLs
json_path="../../data/prepare_online_data/online_augmentation.json"
original_img_folder = "prepare_online_data/lineImages"
xml_root = "../../data/prepare_online_data/line-level-xml/lineStrokes"

# # FROM GENERATED XMLs
# json_path="../../data/online_coordinate_data/3_stroke_16_v2/train_online_coords.json"
# original_img_folder = "online_coordinate_data/3_stroke_16_v2/images"
# xml_root = "../../data/prepare_online_data/line-level-xml/lineStrokes"


with open(json_path) as f:
    output_dict = json.load(f)
print(output_dict[0].keys())

for i in output_dict:
    if "a01-001w-08" in i["image_path"]:
        instance = i
        print(i["image_path"])

rel_path = Path(instance["image_path"]).relative_to(original_img_folder).with_suffix(".xml")
xml_path = xml_root / rel_path
print(xml_path)


stroke_list, _ = read_stroke_xml(xml_path)
print(stroke_list[1])
stroke_dict = prep_stroke_dict(stroke_list, time_interval=0, scale_time_distance=True) # list of dictionaries, 1 per file


xx = list(get_all_substrokes(stroke_dict))
xx = [stroke_dict]
for x in xx:
    plt.scatter(x["x"], x["y"])
    plt.plot(x["x"], x["y"])
    plt.show()
print(x)

In [None]:
json_path = "../../data/online_coordinate_data/3_stroke_16_v2/train_online_coords.json"

with open(json_path) as f:
    output_dict = json.load(f)

for x in output_dict:
    print(x.keys())
    plt.scatter(x["gt"][0], x["gt"][1])
    plt.plot(x["gt"][0], x["gt"][1])
    plt.show()
    stop


In [None]:
json_path = "../../data/online_coordinate_data/3_stroke_64_v2/train_online_coords.json"

with open(json_path) as f:
    output_dict = json.load(f)

for x in output_dict:
    if "a01-001w-08" in x["image_path"]:
        print(x.keys())
        plt.scatter(x["x"], x["y"])
        plt.plot(x["x"], x["y"])
        plt.show()


In [None]:
json_path = "../../data/online_coordinate_data/8_stroke_vSmall_16/train_online_coords.json"

with open(json_path) as f:
    output_dict = json.load(f)

    
for x in output_dict:
    #print(x["full_img_path"])
    if "a01-001w-08" in x["full_img_path"]:
        x = prep_stroke_dict(x["raw"], time_interval=0, scale_time_distance=True) # list of dictionaries, 1 per file
        plt.scatter(x["x"], x["y"])
        plt.plot(x["x"], x["y"])
        plt.axis('off')
        plt.axis('square')
        plt.show()
        print(x.keys())
        
# dict_keys(['full_img_path', 'xml_path', 'image_path', 'dataset', 'x', 'y', 't', 'start_times', 'start_strokes', 'x_to_y', 'raw', 'shape']) 

In [64]:
json_path = "../../data/online_coordinate_data/8_stroke_vSmall_16/train_online_coords.json"

parameter = "t"
with open(json_path) as f:
    output_dict = json.load(f)
  
i = 0
for x in output_dict:
    #print(x["full_img_path"])
    if "a01-001w-08" in x["full_img_path"]:
        i += 1
        if i ==20:
            break

# Original Processing
print(x.keys())
gt = np.array([x["x"],x["y"],x["start_strokes"]]).transpose(1,0)
draw_from_gt(gt, show=True, use_stroke_number=False)

output = prep_stroke_dict(x["raw"], time_interval=0, scale_time_distance=True) # list of dictionaries, 1 per file
print(output.keys())

# After prepping
gt = np.array([output["x"],output["y"],output["start_strokes"]]).transpose(1,0)
draw_from_gt(gt, show=True, use_stroke_number=False)

# After sampling
x_func, y_func = stroke_recovery.create_functions_from_strokes(output, parameter=parameter) # can be d if the function should be a function of distance
starts = output.start_times if parameter=="t" else output.start_distances
x, y, is_start_stroke = stroke_recovery.sample(x_func, y_func, starts, 1000, noise=None)

# plt.scatter(x,y)
# plt.plot(x,y)
gt = np.array([x,y,is_start_stroke]).transpose([1,0])
draw_from_gt(gt, show=True, use_stroke_number=False)

dict_keys(['full_img_path', 'xml_path', 'image_path', 'dataset', 'x', 'y', 't', 'start_times', 'start_strokes', 'x_to_y', 'raw', 'shape'])
dict_keys(['x', 'y', 't', 'd', 'start_times', 'start_distances', 'x_to_y', 'start_strokes', 'raw', 'tmin', 'tmax', 'trange', 'drange'])


array([[255, 255, 255, ..., 255, 255, 255],
       [255, 255, 255, ..., 255, 255, 255],
       [255, 255, 255, ..., 255, 255, 255],
       ...,
       [  0,   0,   0, ..., 255, 255, 255],
       [  0,   0,   0, ..., 255, 255, 255],
       [255,   0,   0, ..., 255, 255, 255]], dtype=uint8)

In [49]:
from hwr_utils.stroke_recovery import *

background = tuple([255])

img = Image.new("L", (50, 50), background)
draw = ImageDraw.Draw(img)
color = 0
linewidth = 2
pil_format = [np.array([[10,20,30],[10,20,30]]), np.array([[15],[15]])]

for line in pil_format:
    if line.size > 2:
        line = [tuple(x) for x in line.flatten().reshape(-1,2).tolist()]
        draw.line(line, fill=color, width=linewidth, joint='curve')
        draw.ellipse((15, 15, 16, 16), fill = 'blue', outline ='blue')

img.show()

In [63]:
from hwr_utils.stroke_recovery import *

background = tuple([255])

img = Image.new("L", (50, 50), background)
draw = ImageDraw.Draw(img)
color = 0
linewidth = 3
pil_format = [np.array([[10,20,30],[10,20,30]]), np.array([[15],[15]])]

for line in pil_format:
    if line.size > 2:
        line = [tuple(x) for x in line.flatten().reshape(-1,2).tolist()]
        draw.line(line, fill=color, width=linewidth, joint='curve')
    elif line.size == 2:
        line1 = line - linewidth/2
        line2 = line + linewidth/2
        line = np.r_[line1, line2].flatten().tolist()
        draw.ellipse(line, fill = 'black', outline ='black')

# line = (20,5,6,19)
# draw.ellipse(line, fill = 'blue', outline ='blue')
# draw.ellipse((15, 15, 16, 16), fill = 'blue', outline ='blue')


img.show()

In [11]:
json_path = "../../data/online_coordinate_data/8_stroke_vSmall_16/train_online_coords.json"

parameter = "d"
with open(json_path) as f:
    output_dict = json.load(f)
  
i = 0
for x in output_dict:
    #print(x["full_img_path"])
    if "a01-001w-08" in x["full_img_path"]:
        i += 1
        if i ==20:
            break
         
output = prep_stroke_dict(x["raw"], time_interval=0, scale_time_distance=True) # list of dictionaries, 1 per file

# After sampling
x_func, y_func = stroke_recovery.create_functions_from_strokes(output, parameter=parameter) # can be d if the function should be a function of distance
starts = output.start_times if parameter=="t" else output.start_distances
x, y, is_start_stroke = stroke_recovery.sample(x_func, y_func, starts, 1000, noise=None)
gt = np.array([x,y,is_start_stroke]).transpose([1,0])
img = draw_from_gt(gt, show=True, use_stroke_number=False, plot_points=True)

In [126]:
from hwr_utils.stroke_recovery import *
max_dist = .2
path = "/media/data/GitHub/simple_hwr/RESULTS/TEST_20200220_163954-baseline/imgs/1/train/example_data.pickle"
path = r"/home/taylor/shares/brodie/github/simple_hwr/RESULTS/ver2/20200220_164052-normal/imgs/9/train/example_data.pickle"
d = utils.unpickle_it(path)

preds = [x.detach().numpy() for x in d["preds"]]
item = d["item"]
gt = d["item"]["gt_list"][0].numpy()
pred = preds[0].transpose(1,0)
gt[:,2] = relativefy_numpy(gt[:,2])

gt[175][0:2] = np.array([2.5,.9])
gt[175][0:3] = np.array([2.5,.9,0])


distances = distance_metric(gt[:, 0], gt[:, 1])

# Where are the distances big
idx = np.argwhere(distances > max_dist).reshape(-1)
not_first_stroke = np.argwhere(gt[:, 2]==0).flatten()
bad_points = idx[np.where(np.diff(idx) == 1)]
bad_points = np.intersect1d(not_first_stroke, bad_points)

# Delete them
gt = np.delete(gt, bad_points, axis=0)

# Add new start point
gt[bad_points, 2] = 1
print(bad_points)
stop
draw_from_gt(gt, show=True, use_stroke_number=True, plot_points=True)
#draw_from_gt(pred, show=True, use_stroke_number=False, plot_points=True)


[175]


NameError: name 'stop' is not defined

In [78]:
np.diff(idx) 

array([28, 26,  4, 22, 27, 21,  1,  4, 11, 63, 32, 19, 45, 27, 40, 16, 12,
       22,  3, 67, 28])

In [98]:
path = "/media/data/GitHub/simple_hwr/RESULTS/TEST_20200220_163954-baseline/imgs/1/train/example_data.pickle"
path = r"/home/taylor/shares/brodie/github/simple_hwr/RESULTS/ver2/20200220_164052-normal/imgs/21/train/example_data.pickle"
d = utils.unpickle_it(path)

preds = [x.detach().numpy() for x in d["preds"]]
item = d["item"]
gt = d["item"]["gt_list"][0].numpy()
pred = preds[0].transpose(1,0)

draw_from_gt(gt, show=True, use_stroke_number=True, plot_points=True)
draw_from_gt(pred, show=True, use_stroke_number=False, plot_points=True)
pass