In [1]:
import numpy as np
from tensorflow.keras.preprocessing.sequence import pad_sequences




In [27]:
# Load SketchRNN data from .npz files
def load_data(npz_file):
    data = np.load(npz_file, allow_pickle=True, encoding='latin1')
    num_row = int(len(data['train']) * 0.1)
    return data['train'][:num_row]

In [28]:
# Load datasets
dog_data = load_data('sketchrnn_dog.npz')
cat_data = load_data('sketchrnn_cat.npz')
butterfly_data = load_data('sketchrnn_butterfly.npz')

In [29]:

# Combine datasets
combined_data = np.concatenate([dog_data, cat_data, butterfly_data])

In [46]:
print(combined_data.shape)
print(combined_data[9].shape)

(21000,)
(69, 3)


In [30]:
# Define maximum sequence length
max_seq_len = max(len(sketch) for sketch in combined_data)

In [48]:
max_seq_len

133

In [31]:
# Pad sequences to the same length and extract features
def preprocess_data(data, max_seq_len):
    sequences = []
    for sketch in data:
        sequence = []
        for i in range(len(sketch)):
            dx, dy, state = sketch[i]
            sequence.append([dx, dy, state])
        sequences.append(sequence)
    padded_sequences = pad_sequences(sequences, maxlen=max_seq_len, padding='post', dtype='float32')
    return padded_sequences

In [32]:
train_sequences = preprocess_data(combined_data, max_seq_len)

In [50]:
print(train_sequences.shape)
print(train_sequences[9].shape)
print(train_sequences[9])

(21000, 133, 3, 1)
(133, 3, 1)
[[[   0.]
  [  27.]
  [   0.]]

 [[   8.]
  [  38.]
  [   0.]]

 [[   8.]
  [  24.]
  [   0.]]

 [[  19.]
  [  24.]
  [   0.]]

 [[  20.]
  [  10.]
  [   0.]]

 [[  17.]
  [  -7.]
  [   0.]]

 [[  20.]
  [ -29.]
  [   0.]]

 [[  11.]
  [ -54.]
  [   0.]]

 [[  10.]
  [ -27.]
  [   0.]]

 [[   3.]
  [ -26.]
  [   1.]]

 [[ -90.]
  [ 117.]
  [   0.]]

 [[   2.]
  [  -6.]
  [   0.]]

 [[  18.]
  [ -20.]
  [   0.]]

 [[  15.]
  [   0.]
  [   0.]]

 [[  14.]
  [  17.]
  [   0.]]

 [[   0.]
  [  22.]
  [   1.]]

 [[ -13.]
  [ -26.]
  [   0.]]

 [[  -8.]
  [   6.]
  [   0.]]

 [[  -4.]
  [  19.]
  [   0.]]

 [[   5.]
  [  -6.]
  [   0.]]

 [[   2.]
  [  -8.]
  [   0.]]

 [[   0.]
  [ -14.]
  [   0.]]

 [[  -3.]
  [  -4.]
  [   0.]]

 [[  -8.]
  [  11.]
  [   0.]]

 [[   3.]
  [  16.]
  [   0.]]

 [[   7.]
  [   1.]
  [   0.]]

 [[   8.]
  [  -6.]
  [   0.]]

 [[   1.]
  [  -8.]
  [   0.]]

 [[ -10.]
  [  -2.]
  [   0.]]

 [[ -18.]
  [   6.]
  [   0.]]

 [[  -2.]

## Augment

In [34]:
import tensorflow as tf

def augment_data(sequences):
    # Example of augmenting sequences by adding Gaussian noise
    noise_factor = 0.05
    augmented_sequences = sequences + noise_factor * np.random.randn(*sequences.shape)
    return augmented_sequences

In [35]:
# Apply data augmentation
augmented_sequences = augment_data(train_sequences)

In [53]:
print(augmented_sequences.shape)
print(augmented_sequences[9].shape)
print(augmented_sequences[9])

(21000, 133, 3, 1)
(133, 3, 1)
[[[ 4.24426737e-02]
  [ 2.70093231e+01]
  [ 8.98629045e-02]]

 [[ 8.03831187e+00]
  [ 3.81375116e+01]
  [ 5.41318043e-03]]

 [[ 7.95885425e+00]
  [ 2.39556281e+01]
  [ 2.96324171e-02]]

 [[ 1.89233778e+01]
  [ 2.40645455e+01]
  [ 5.11166529e-02]]

 [[ 1.99565862e+01]
  [ 1.00313048e+01]
  [-3.65714916e-02]]

 [[ 1.69726717e+01]
  [-7.04302795e+00]
  [ 1.50090536e-02]]

 [[ 1.99999389e+01]
  [-2.89215804e+01]
  [-4.09527991e-02]]

 [[ 1.09962459e+01]
  [-5.39634817e+01]
  [-1.03168918e-01]]

 [[ 9.98049465e+00]
  [-2.71167876e+01]
  [-2.98724091e-03]]

 [[ 2.84624338e+00]
  [-2.60613194e+01]
  [ 8.72964005e-01]]

 [[-9.00609483e+01]
  [ 1.16888004e+02]
  [-9.95898698e-02]]

 [[ 2.04410104e+00]
  [-5.96948152e+00]
  [-4.39730625e-03]]

 [[ 1.79095136e+01]
  [-2.00511859e+01]
  [-6.17701314e-02]]

 [[ 1.50298129e+01]
  [ 3.66253034e-02]
  [ 3.43938398e-02]]

 [[ 1.39531685e+01]
  [ 1.69422791e+01]
  [-6.11227002e-02]]

 [[-6.70174858e-02]
  [ 2.20155370e+01]

## Modelling

In [36]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, LSTM, Dense, TimeDistributed

In [41]:
# Define the model architecture
model = Sequential([
    TimeDistributed(Conv1D(filters=64, kernel_size=3, activation='relu'), input_shape=(max_seq_len, 3, 1)),
    TimeDistributed(MaxPooling1D(pool_size=1)),
    TimeDistributed(Flatten()),
    LSTM(256, return_sequences=True),
    LSTM(256),
    Dense(3, activation='softmax')  # 3 classes: dog, cat, butterfly
])

# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

## Train

In [42]:
# Reshape data to add a channel dimension for CNN
train_sequences = np.expand_dims(train_sequences, axis=-1)
augmented_sequences = np.expand_dims(augmented_sequences, axis=-1)

In [43]:
# Assuming you have labels for the combined data
combined_labels = np.concatenate([np.zeros(len(dog_data)), np.ones(len(cat_data)), np.full(len(butterfly_data), 2)])

In [44]:
# Train the model
model.fit(augmented_sequences, combined_labels, epochs=10, batch_size=64, validation_split=0.2)

Epoch 1/10


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x14b15cd5610>