In [1]:
import svgwrite
import numpy as np
import time
import random
import pickle
import codecs
import collections
import os
import math
import json
import tensorflow as tf
from six.moves import xrange
import pandas as pd

In [2]:
data = np.load("cloud.npz", encoding="latin1", allow_pickle=True)
train = data["train"]

In [3]:
# helper function for draw_strokes
def get_bounds(data, factor):
    min_x = 0
    max_x = 0
    min_y = 0
    max_y = 0

    abs_x = 0
    abs_y = 0
    for i in xrange(len(data)):
        x = float(data[i,0])/factor
        y = float(data[i,1])/factor
        abs_x += x
        abs_y += y
        min_x = min(min_x, abs_x)
        min_y = min(min_y, abs_y)
        max_x = max(max_x, abs_x)
        max_y = max(max_y, abs_y)
    
    return (min_x, max_x, min_y, max_y)

# little function that displays vector images and saves them to .svg
def draw_strokes(data, factor=2, svg_filename = 'sample.svg'):
    min_x, max_x, min_y, max_y = get_bounds(data, factor)
    dims = (50 + max_x - min_x, 50 + max_y - min_y)
    dwg = svgwrite.Drawing(svg_filename, size=dims)
    dwg.add(dwg.rect(insert=(0, 0), size=dims,fill='white'))
    lift_pen = 1
    abs_x = 25 - min_x 
    abs_y = 25 - min_y
    p = "M%s,%s " % (abs_x, abs_y)
    command = "m"
    for i in xrange(0,20):
        if (lift_pen == 1):
            command = "m"
        elif (command != "l"):
            command = "l"
        else:
            command = ""
        x = float(data[i,0])/factor
        y = float(data[i,1])/factor
        lift_pen = data[i, 2]
        p += command+str(x)+","+str(y)+" "
    the_color = "red"
    stroke_width = 2
    dwg.add(dwg.path(p).stroke(the_color,stroke_width).fill("none"))
    for i in xrange(20,len(data)):
        if (lift_pen == 1):
            command = "m"
        elif (command != "l"):
            command = "l"
        else:
            command = ""
        x = float(data[i,0])/factor
        y = float(data[i,1])/factor
        lift_pen = data[i, 2]
        p += command+str(x)+","+str(y)+" "
    the_color = "black"
    stroke_width = 1
    dwg.add(dwg.path(p).stroke(the_color,stroke_width).fill("none"))
    dwg.save()
    display(SVG(dwg.tostring()))

# generate a 2D grid of many vector drawings
def make_grid_svg(s_list, grid_space=10.0, grid_space_x=15.0):
    
    def get_start_and_end(x):
        x = np.array(x)
        x = x[:, 0:2]
        x_start = x[0]
        x_end = x.sum(axis=0)
        x = x.cumsum(axis=0)
        x_max = x.max(axis=0)
        x_min = x.min(axis=0)
        center_loc = (x_max+x_min)*0.5
        return x_start-center_loc, x_end
  
    x_pos = 0.0
    y_pos = 0.0
    result = [[x_pos, y_pos, 1]]
    
    for sample in s_list:
        s = sample[0]
        grid_loc = sample[1]
        grid_y = grid_loc[0]*grid_space+grid_space*0.5
        grid_x = grid_loc[1]*grid_space_x+grid_space_x*0.5
        start_loc, delta_pos = get_start_and_end(s)

        loc_x = start_loc[0]
        loc_y = start_loc[1]
        new_x_pos = grid_x+loc_x
        new_y_pos = grid_y+loc_y
        result.append([new_x_pos-x_pos, new_y_pos-y_pos, 0])

        result += s.tolist()
        result[-1][2] = 1
        x_pos = new_x_pos+delta_pos[0]
        y_pos = new_y_pos+delta_pos[1]
    return np.array(result)

In [4]:
from typing import List, Tuple
import numpy as np
from IPython.display import clear_output
from sklearn.preprocessing import MinMaxScaler

def create_window(input_data: np.array, window_size: int) -> np.array:
    data_len = len(input_data)
    result = np.zeros((data_len-window_size+1, window_size, *input_data.shape[1:]))
    for i in range(data_len):
        if i+window_size <= data_len:
            result[i] = input_data[i:i+window_size]
    return result


def create_window_on_multiple_samples(input_data: np.array, window_size: int) -> np.array:
    """
    Similar to create_window, but now can take multiple samples, will output in one
    giant windowed np.array.
    """
    windowed_data = []
    for i, sample in enumerate(input_data):
        windowed_data.append(create_window(sample[:,:2], window_size))
        if i % 10000 == 0:
            print(f"Now at {i}")
            clear_output(wait=True)
    result = np.concatenate(windowed_data)
    print(f"Done processing {i} samples, total of {result.shape[0]} windows and {result.shape[0] * result.shape[1]} datapoints")
    return np.concatenate(windowed_data)


def split_train_test(input_data: List) -> Tuple[np.array, np.array]:
#     return input_data[:,0:-1], input_data[:,-1]
    return input_data[:,:-1], input_data[:,-1]

def normalise_windows(window_data):
    # A support function to normalize a dataset
    scaler = MinMaxScaler()
    normalised_data = []
    for i, window in enumerate(window_data):
        scaled = scaler.fit_transform(window)
        normalised_data.append(scaled)
        if i % 10000 == 0:
            print(f"Now at {i}")
    return np.concatenate(normalised_data)

In [27]:
x = create_window_on_multiple_samples(train[:100], 20)

Done processing 99 samples, total of 3903 windows and 78060 datapoints


In [6]:
# X_n = normalise_windows(x[:5000])
# normalise_windows(x)
# print(f" x = {len(x)} \t train = {train.shape}")

In [29]:
X, Y = split_train_test(x)

In [30]:
X_shape, nx, ny = X.shape

X.shape

(3903, 19, 2)

In [31]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout
from matplotlib import pyplot

# Initialize LSTM
model = Sequential()
# print (train_set[3].shape)
model.add(LSTM(units=10, return_sequences=True,
     input_shape=(X.shape[0], 2)))

# Adding a second LSTM layer and Dropout regularisation
# model.add(LSTM(units = 50, return_sequences = True))
# model.add(Dropout(0.2))

# Adding a third LSTM layer and Dropout regularisation
# model.add(LSTM(units = 10, return_sequences = True))
# model.add(Dropout(0.2))

# # Adding a fourth LSTM layer and Dropout regularisation
# model.add(LSTM(units = 10))
# model.add(Dropout(0.2))

# Adding the output layer
model.add(Dense(units = 2))
print(model.summary())

Model: "sequential_9"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_18 (LSTM)               (None, 3903, 10)          520       
_________________________________________________________________
dense_8 (Dense)              (None, 3903, 2)           22        
Total params: 542
Trainable params: 542
Non-trainable params: 0
_________________________________________________________________
None


In [None]:
# Compiling the RNN
model.compile(optimizer = 'adam', loss = 'mean_squared_error')

# history = model.fit(X, Y, epochs=10, validation_data=(test_set, valid_set), shuffle=False)
# model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])
history = model.fit(X, Y, epochs=100, batch_size=1, verbose=2)
# history = model.fit(X, Y, epochs = 100)

# plot train and validation loss
pyplot.plot(history.history['loss'])
# pyplot.plot(history.history['val_loss'])
pyplot.title('model train vs validation loss')
pyplot.ylabel('loss')
pyplot.xlabel('epoch')
pyplot.legend(['train', 'validation'], loc='upper right')
pyplot.show()

Epoch 1/100
3903/3903 - 13s - loss: 979.5454
Epoch 2/100
3903/3903 - 12s - loss: 969.5258
Epoch 3/100
3903/3903 - 12s - loss: 960.3857
Epoch 4/100
3903/3903 - 12s - loss: 951.1368
Epoch 5/100
3903/3903 - 13s - loss: 944.0785
Epoch 6/100
3903/3903 - 12s - loss: 937.5091
Epoch 7/100
3903/3903 - 12s - loss: 930.4607
Epoch 8/100
3903/3903 - 11s - loss: 928.5616
Epoch 9/100
3903/3903 - 11s - loss: 927.1063
Epoch 10/100
3903/3903 - 11s - loss: 919.8511
Epoch 11/100
3903/3903 - 11s - loss: 917.8271
Epoch 12/100
3903/3903 - 12s - loss: 917.9390
Epoch 13/100
3903/3903 - 12s - loss: 914.6411
Epoch 14/100
3903/3903 - 12s - loss: 914.3954
Epoch 15/100
3903/3903 - 12s - loss: 910.4819
Epoch 16/100
3903/3903 - 12s - loss: 912.2193
Epoch 17/100
3903/3903 - 12s - loss: 909.6640
Epoch 18/100
3903/3903 - 12s - loss: 907.0003
Epoch 19/100


In [77]:
ts = model.predict(X[:20])
ts.shape
draw_strokes(ts)

IndexError: index 2 is out of bounds for axis 1 with size 2