# Real-Valued Coordinate Prediction Using Multi-Label Multi-Class Models

This notebook aims to predict coordinates using multi-label multi-class models.

In [1]:
%matplotlib inline

# system libraries
import os
from glob import glob
import logging

# numerical,image and plotting stuff
import pandas as pd
import numpy as np
from PIL import Image
from skimage import io
import skimage.transform as tf
import matplotlib.pyplot as plt
import seaborn as sns

sns.set_style("whitegrid", {'axes.grid' : False})


We have a length-w\*w vector which we want to get the x and y coordinates of the entry with the highest signal.

x1, x2,              ... xw,

xw+1, xw+2,          ... x2w,

...

x(w-1)w+1, x(w-1)w+2,... xw\*w

y1 would be a w-vector indicating the x coordinate with the signal, y2 would be the corresponding w-vector for the y-coordinate.

we will train the inputs on 2 fc layers and then learn the coordinates. Each output y would have its own softmax stacked on top of the final fc layer

In [2]:
rng = np.random.RandomState(0)
n = 1000000
w = 10
X = np.round(rng.uniform(low=-1,high=1,size=(n,w*w)),decimals=2)

This is how our input data will look like:

In [3]:
X[0:5,:].reshape((5,w,w))

array([[[ 0.1 ,  0.43,  0.21,  0.09, -0.15,  0.29, -0.12,  0.78,  0.93,
         -0.23],
        [ 0.58,  0.06,  0.14,  0.85, -0.86, -0.83, -0.96,  0.67,  0.56,
          0.74],
        [ 0.96,  0.6 , -0.08,  0.56, -0.76,  0.28, -0.71,  0.89,  0.04,
         -0.17],
        [-0.47,  0.55, -0.09,  0.14, -0.96,  0.24,  0.22,  0.23,  0.89,
          0.36],
        [-0.28, -0.13,  0.4 , -0.88,  0.33,  0.34, -0.58, -0.74, -0.37,
         -0.27],
        [ 0.14, -0.12,  0.98, -0.8 , -0.58, -0.68,  0.31, -0.49, -0.07,
         -0.51],
        [-0.68, -0.78,  0.31, -0.72, -0.61, -0.26,  0.64, -0.81,  0.68,
         -0.81],
        [ 0.95, -0.06,  0.95,  0.21,  0.48, -0.92, -0.43, -0.76, -0.41,
         -0.76],
        [-0.36, -0.17, -0.87,  0.38,  0.13, -0.47,  0.05, -0.81,  0.15,
          0.86],
        [-0.36,  0.33, -0.74,  0.43, -0.42, -0.63,  0.17, -0.96,  0.66,
         -0.99]],

       [[ 0.36, -0.46,  0.47,  0.92, -0.5 ,  0.15,  0.18,  0.14, -0.55,
          0.91],
        [-0.11,  0.

In [4]:
# prepare Y outputs for regression
y1 = np.int16(X.argmax(axis=1)/w).reshape((n,1))
y2 = np.int16(X.argmax(axis=1)%w).reshape((n,1))
print(y1[0:5])
print(y2[0:5])
Y_reg = np.hstack((y1,y2))

[[5]
 [4]
 [1]
 [4]
 [7]]
[[2]
 [9]
 [0]
 [9]
 [8]]


In [5]:
from sklearn.preprocessing import OneHotEncoder
y1 = np.int16(X.argmax(axis=1)/w).reshape((n,1))
y2 = np.int16(X.argmax(axis=1)%w).reshape((n,1))
enc = OneHotEncoder(sparse=False)
enc.fit(np.arange(w).reshape(w,1))
y1 = enc.transform(y1)
y2 = enc.transform(y2)
print(y1[0:5,:])
print(y2[0:5,:])
Y_sm = np.hstack([y1,y2])
Y_sm_half = np.hstack([y1,y2])
print(Y_sm[0:5,:])

[[ 0.  0.  0.  0.  0.  1.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  1.  0.  0.  0.  0.  0.]
 [ 0.  1.  0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  1.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  1.  0.  0.]]
[[ 0.  0.  1.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  1.]
 [ 1.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  1.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  1.  0.]]
[[ 0.  0.  0.  0.  0.  1.  0.  0.  0.  0.  0.  0.  1.  0.  0.  0.  0.  0.
   0.  0.]
 [ 0.  0.  0.  0.  1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  1.]
 [ 0.  1.  0.  0.  0.  0.  0.  0.  0.  0.  1.  0.  0.  0.  0.  0.  0.  0.
   0.  0.]
 [ 0.  0.  0.  0.  1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   0.  1.]
 [ 0.  0.  0.  0.  0.  0.  0.  1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
   1.  0.]]


We begin by testing a model with a softmax on the top, combining over both coordinate outputs

In [6]:
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout
from keras.layers.pooling import MaxPooling1D
from keras.optimizers import SGD, RMSprop

m_sm = Sequential([
    Dense(16, input_dim=w*w),
    Activation('tanh'),
    Dropout(0.2),
    Dense(16, input_dim=8),
    Activation('tanh'),
    Dropout(0.2),
    Dense(2*w),
    Activation('softmax'),
])

m_reg = Sequential([
    Dense(16, input_dim=w*w),
    Activation('tanh'),
    Dropout(0.2),
    Dense(16, input_dim=8),
    Activation('tanh'),
    Dropout(0.2),
    Dense(2),
    Activation('linear'),
])


Using TensorFlow backend.


In [8]:
# train regression model
sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
m_reg.compile(optimizer=sgd,loss='mean_squared_error',metrics=['mae'])
m_reg.fit(x=X,y=Y_reg,batch_size=64,nb_epoch=2,validation_split=0.2)
sgd = SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True)
m_reg.layers[2] = Dropout(0.)
m_reg.layers[5] = Dropout(0.)
m_reg.compile(optimizer=sgd,loss='mean_squared_error',metrics=['mae'])
m_reg.fit(x=X,y=Y_reg,batch_size=64,nb_epoch=1,validation_split=0.2)
sgd = SGD(lr=0.0001, decay=1e-6, momentum=0.9, nesterov=True)
m_reg.compile(optimizer=sgd,loss='mean_squared_error',metrics=['mae'])
m_reg.fit(x=X,y=Y_reg,batch_size=64,nb_epoch=1,validation_split=0.2)

Train on 800000 samples, validate on 200000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3
Train on 800000 samples, validate on 200000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3
Train on 800000 samples, validate on 200000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x11b286748>

In [12]:
# train softmax model
sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
m_sm.compile(optimizer=sgd,loss='categorical_crossentropy',metrics=['accuracy'])
m_sm.fit(x=X,y=Y_sm_half,batch_size=64,nb_epoch=2,validation_split=0.2)
sgd = SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True)
m_sm.layers[2] = Dropout(0.)
m_sm.layers[5] = Dropout(0.)
m_sm.compile(optimizer=sgd,loss='categorical_crossentropy',metrics=['accuracy'])
m_sm.fit(x=X,y=Y_sm_half,batch_size=64,nb_epoch=1,validation_split=0.2)
sgd = SGD(lr=0.0001, decay=1e-6, momentum=0.9, nesterov=True)
m_sm.compile(optimizer=sgd,loss='categorical_crossentropy',metrics=['accuracy'])
m_sm.fit(x=X,y=Y_sm_half,batch_size=64,nb_epoch=1,validation_split=0.2)

Train on 800000 samples, validate on 200000 samples
Epoch 1/2
Epoch 2/2
Train on 800000 samples, validate on 200000 samples
Epoch 1/1
Train on 800000 samples, validate on 200000 samples
Epoch 1/1


<keras.callbacks.History at 0x140ce3a20>

In [9]:
k = 100
y_pred = m_reg.predict(X[0:k,:])
y1_pred = y_pred[:,0]
y2_pred = y_pred[:,1]
y1_true = Y_reg[:k,0]
y2_true = Y_reg[:k,1]
print(y1_pred[0:5])
print(y1_true[0:5])
print(y2_pred[0:5])
print(y2_true[0:5])

mae = (np.abs(y1_pred-y1_true) + np.abs(y2_pred-y2_true)).sum()/2/k/(w-1)
print('mean proportion error %.2f' % mae)

[ 3.17538571  3.24850011  4.63944006  4.9945178   4.85439539]
[5 4 1 4 7]
[ 4.29260015  4.30029583  5.08920574  4.69491196  4.74303722]
[2 9 0 9 8]
mean proportion error 0.13


In [13]:
k = 100
y_pred = m_sm.predict_proba(X[0:k,:])
y1_pred = y_pred[:,0:w].argmax(axis=1)
y2_pred = y_pred[:,w:(2*w)].argmax(axis=1)
y1_true = Y_sm[0:k].argmax(axis=1)
y2_true = Y_sm[k:(2*k)].argmax(axis=1)
print(y1_pred)
print(y1_true)
print(y2_pred)
print(y2_true)
acc = ((y1_pred==y1_true).sum()+(y2_pred==y2_true).sum())/2/k
print('accuracy %.3f' % acc)
mae = (np.abs(y1_pred-y1_true) + np.abs(y2_pred-y2_true)).sum()/2/k/(w-1)
print('mean proportion error %.2f' % mae)

 5 2 1 1 1 2 4 6 4 3 2 0 4 1 0 3 1 4 3 0 5 0 2 1 2 4 0 2 3 6 5 1 0 0 3 0 0
 1 3 2 5 3 1 3 2 2 2 0 0 1 1 1 2 0 4 2 1 2 3 2 0 1 2]
[5 4 1 4 7 9 1 6 6 7 2 2 2 1 2 5 0 8 1 5 3 2 5 6 6 3 2 4 3 3 9 0 5 6 0 1 9
 5 0 6 8 2 0 1 5 6 4 2 8 4 8 5 3 9 0 6 6 3 7 2 6 9 2 9 2 4 6 9 3 8 2 5 8 7
 2 1 9 4 1 6 2 6 7 6 5 8 1 4 6 3 1 7 3 7 7 7 6 5 8 8]
[8 4 9 9 1 9 2 9 5 4 8 9 2 5 3 1 6 0 0 1 8 4 9 6 5 3 1 2 8 9 2 7 8 6 1 0 9
 3 9 1 7 4 0 3 7 8 9 2 1 0 5 4 9 0 6 7 7 0 7 7 7 4 4 0 7 7 7 0 5 0 9 6 6 5
 0 3 2 6 3 6 9 9 6 1 1 5 8 8 3 8 2 5 5 7 8 8 0 4 3 8]
[0 7 9 9 6 9 9 7 4 1 4 2 6 3 0 6 8 4 3 3 4 3 5 0 6 2 8 8 7 0 3 3 4 4 0 0 2
 1 0 5 1 1 2 0 2 3 7 9 4 8 1 3 2 5 5 3 7 0 5 8 2 3 2 2 2 4 3 4 8 8 2 4 0 8
 5 8 7 1 2 2 3 2 7 0 9 5 4 3 4 3 1 2 8 0 3 6 4 4 4 7]
accuracy 0.130
mean proportion error 0.18


As seen in the above 2 comparisons, the regression and single softmax doesn't seem to work very well in detecting the 'max pixel' from a w\*w grid. We now try augmenting the final output with 2 different softmaxes, 1 for each dimension. This will require the use of the functional api, since we're forking the outputs from the fc layer into 2 softmaxes.

In [35]:
rng = np.random.RandomState(0)
n = 1000000
h = 6
w = 10
k = 500000
X = np.round(rng.uniform(low=-1,high=1,size=(n,h*w)),decimals=2)
X_test = np.round(rng.uniform(low=-1,high=1,size=(k,h*w)),decimals=2)

# prepare Y outputs for regression
y1_reg = np.int16(X.argmax(axis=1)/w).reshape((n,1)) # h, y
y2_reg = np.int16(X.argmax(axis=1)%w).reshape((n,1)) # w, x
y1_reg_test = np.int16(X_test.argmax(axis=1)/w).reshape((k,1)) # h, y
y2_reg_test = np.int16(X_test.argmax(axis=1)%w).reshape((k,1)) # w, x

# prepare Y outputs for softmax
from sklearn.preprocessing import OneHotEncoder
enc = OneHotEncoder(sparse=False)
enc.fit(np.arange(h).reshape(h,1))
y1_sm = enc.transform(y1_reg)
y1_sm_test = enc.transform(y1_reg_test)
enc.fit(np.arange(w).reshape(w,1))
y2_sm = enc.transform(y2_reg)
y2_sm_test = enc.transform(y2_reg_test)

In [36]:
print(X[0:5,:].reshape((5,h,w)))
print(y1_reg[0:5,:])
print(y2_reg[0:5,:])
print(y1_sm[0:5,:])
print(y2_sm[0:5,:])

[[[ 0.1   0.43  0.21  0.09 -0.15  0.29 -0.12  0.78  0.93 -0.23]
  [ 0.58  0.06  0.14  0.85 -0.86 -0.83 -0.96  0.67  0.56  0.74]
  [ 0.96  0.6  -0.08  0.56 -0.76  0.28 -0.71  0.89  0.04 -0.17]
  [-0.47  0.55 -0.09  0.14 -0.96  0.24  0.22  0.23  0.89  0.36]
  [-0.28 -0.13  0.4  -0.88  0.33  0.34 -0.58 -0.74 -0.37 -0.27]
  [ 0.14 -0.12  0.98 -0.8  -0.58 -0.68  0.31 -0.49 -0.07 -0.51]]

 [[-0.68 -0.78  0.31 -0.72 -0.61 -0.26  0.64 -0.81  0.68 -0.81]
  [ 0.95 -0.06  0.95  0.21  0.48 -0.92 -0.43 -0.76 -0.41 -0.76]
  [-0.36 -0.17 -0.87  0.38  0.13 -0.47  0.05 -0.81  0.15  0.86]
  [-0.36  0.33 -0.74  0.43 -0.42 -0.63  0.17 -0.96  0.66 -0.99]
  [ 0.36 -0.46  0.47  0.92 -0.5   0.15  0.18  0.14 -0.55  0.91]
  [-0.11  0.69  0.4  -0.41  0.63 -0.21  0.76  0.16  0.76  0.39]]

 [[ 0.45  0.    0.91  0.29 -0.15  0.21 -0.96 -0.4   0.32 -0.42]
  [ 0.24 -0.14 -0.73 -0.4   0.14  0.18  0.15  0.31  0.3  -0.14]
  [ 0.79 -0.26 -0.13  0.78  0.61  0.41 -0.8   0.84  0.43  1.  ]
  [-0.7   0.74 -0.68  0.23 -0.75  0.

In [37]:
print(X_test[0:5,:].reshape((5,h,w)))
print(y1_reg_test[0:5,:])
print(y2_reg_test[0:5,:])
print(y1_sm_test[0:5,:])
print(y2_sm_test[0:5,:])

[[[-0.63 -0.15 -0.51  0.19 -0.66  0.51  0.74  0.02  0.36 -0.61]
  [-0.17  0.06 -0.22  0.39  0.51 -0.52  0.16 -0.76  0.82 -0.82]
  [-0.45  0.55  0.22 -0.36 -0.07  0.68  0.03 -0.36  0.02  0.13]
  [-0.99 -0.21  0.02  0.12 -0.03 -0.78  0.35  0.3  -0.16 -0.4 ]
  [ 0.09  0.67 -0.57  0.09 -0.98  0.15  0.04 -0.27  0.09  0.67]
  [-0.52 -0.58 -0.04  0.41 -0.81 -0.79  0.78  0.19 -0.02  0.46]]

 [[-0.09 -0.62  0.46  0.73 -0.    0.8   0.36  0.28  0.93 -0.27]
  [-0.   -0.    0.54  0.62  0.53 -0.2  -0.09 -0.62 -0.93 -0.55]
  [ 0.43  0.6  -0.73  0.18 -0.39 -0.12  0.82  0.1  -0.46  0.1 ]
  [ 0.12 -0.96 -0.38 -0.68 -0.87 -0.54 -0.33  0.5  -0.22  0.1 ]
  [-0.96  0.31 -0.63 -0.79 -0.19  0.11  0.89  0.11  0.1  -0.09]
  [ 0.83  0.11  0.28 -0.88 -0.33  0.81  0.25  0.65  0.89 -0.98]]

 [[-0.2  -0.83  0.08  0.66 -0.29 -0.59 -0.68 -0.46 -0.58 -0.39]
  [ 0.85 -0.45  0.49  0.5  -0.16  0.57  0.4   0.97  0.75  0.93]
  [-0.22  0.5   0.64  0.7   0.21 -0.45  0.59  0.52 -0.66 -0.93]
  [-0.58  0.98 -0.94 -0.79  0.05 -0.

In [53]:
from keras.layers import Input, Convolution2D, MaxPooling2D, ZeroPadding2D, Layer, \
    Activation, Dropout, Flatten, MaxoutDense, Dense, merge
from keras.models import Model
from keras import backend as K

# custom layer for getting max, mean for over each vertical strip
class VerticalAgg(Layer):
    def get_output_shape_for(self, input_shape):
        shape = list(input_shape)
        assert len(shape) == 4  # only valid for 4D tensors (batch_size,h,w,ch)
        return (input_shape[0],input_shape[2]*2,input_shape[3]) # (batch_size,2w,ch)

    def call(self, x, mask=None):
        return merge([K.max(x, axis=1, keepdims=False), 
                     K.mean(x, axis=1, keepdims=False)], 
                     mode='concat', concat_axis=1)


    
# custom layer for getting max for over each horizontal strip
class HorizontalMax(Layer):
    def get_output_shape_for(self, input_shape):
        shape = list(input_shape)
        assert len(shape) == 4  # only valid for 4D tensors (batch_size,h,w,ch)
        return (input_shape[0],input_shape[1],input_shape[3]) # (batch_size,h,ch)

    def call(self, x, mask=None):
        return K.max(x, axis=2, keepdims=False)

X_conv = X.reshape((n,h,w,1))
X_conv_test = X_test.reshape((k,h,w,1))

inputs = Input(shape=(h,w,1))

x_pool = VerticalAgg()(inputs)
x_flat = Flatten()(x_pool)
x_dense = Dense(16, activation='relu')(x_flat)
x_out = Dense(w, activation='softmax')(x_dense)

y_pool = HorizontalMax()(inputs)
y_flat = Flatten()(y_pool)
y_dense = Dense(16, activation='relu')(y_flat)
y_out = Dense(h, activation='softmax')(y_dense)

m2_sm = Model(input=[inputs], output=[y_out,x_out])
sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
m2_sm.compile(optimizer=sgd, loss='categorical_crossentropy',
              loss_weights=[0.5, 0.5], metrics=['accuracy'])

In [54]:
m2_sm.fit(x=[X_conv],y=[y1_sm,y2_sm],batch_size=64,nb_epoch=10,validation_split=0.2)

Train on 800000 samples, validate on 200000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
 94784/800000 [==>...........................] - ETA: 54s - loss: 0.1872 - dense_26_loss: 0.1375 - dense_24_loss: 0.2368 - dense_26_acc: 0.9754 - dense_24_acc: 0.9212

KeyboardInterrupt: 

In [48]:
m2_sm.fit(x=[X_conv],y=[y1_sm,y2_sm],batch_size=64,nb_epoch=10,validation_split=0.2)

Train on 800000 samples, validate on 200000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x118ce3e48>

In [50]:
# predict on X_test
y_pred = m2_sm.predict(X_conv_test)
y1_pred = y_pred[0].argmax(axis=1)
y2_pred = y_pred[1].argmax(axis=1)
y1_true = y1_sm_test.argmax(axis=1)
y2_true = y2_sm_test.argmax(axis=1)
print(y1_pred[:20])
print(y1_true[:20])
print(y2_pred[:20])
print(y2_true[:20])
acc = (np.equal(y1_pred,y1_true).mean()+np.equal(y2_pred,y2_true).mean())/2
print('accuracy %.3f' % acc)
mae_x = (np.abs(y1_pred-y1_true)).mean()/(w-1)
mae_y = (np.abs(y2_pred-y2_true)).mean()/(h-1)
print('mean proportion error for x %.2f' % mae_x)
print('mean proportion error for y %.2f' % mae_y)

[1 0 5 0 1 1 0 2 1 5 0 3 4 1 0 4 4 2 5 4]
[1 0 5 0 1 1 0 2 1 5 0 3 4 1 0 4 4 2 5 4]
[8 8 9 2 2 3 3 5 7 0 7 2 0 4 0 2 4 6 4 6]
[8 8 9 2 2 3 3 5 7 0 7 2 0 4 0 2 4 6 4 6]
accuracy 0.958
mean proportion error for x 0.00
mean proportion error for y 0.05


That's quite a bit of an improvement! Nevertheless, let's try a regression model:

In [45]:
inputs = Input(shape=(h,w,1))

x_pool = VerticalMax()(inputs)
x_flat = Flatten()(x_pool)
x_dense = Dense(16, activation='relu')(x_flat)
x_out = Dense(1, activation='relu')(x_dense)

y_pool = HorizontalMax()(inputs)
y_flat = Flatten()(y_pool)
y_dense = Dense(16, activation='relu')(y_flat)
y_out = Dense(1, activation='relu')(y_dense)

m2_reg = Model(input=[inputs], output=[y_out,x_out])
sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
m2_reg.compile(optimizer=sgd, loss='mean_squared_error',
              loss_weights=[0.5, 0.5], metrics=['mae'])

In [49]:
m2_reg.fit(x=[X_conv],y=[y1_reg,y2_reg],batch_size=64,nb_epoch=10,validation_split=0.2)

Train on 800000 samples, validate on 200000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x11af66908>

In [55]:
y_pred = m2_reg.predict(X_conv[0:k,...])
y_pred[0].shape,y_pred[1].shape

((100, 1), (100, 1))

In [51]:
y1_pred, y2_pred = m2_reg.predict(X_conv_test)
y1_true = y1_reg_test
y2_true = y2_reg_test
print(y1_pred[:5])
print(y1_true[:5])
print(y2_pred[:5])
print(y2_true[:5])
acc = ((np.round(y1_pred)==y1_true).sum()+(np.round(y2_pred)==y2_true).sum())/2/k
print('accuracy %.3f' % acc)
mae_x = (np.abs(y1_pred-y1_true)).sum()/k/(w-1)
mae_y = (np.abs(y2_pred-y2_true)).sum()/k/(h-1)
print('mean proportion error for x %.2f' % mae_x)
print('mean proportion error for y %.2f' % mae_y)

[[ 2.74435997]
 [ 1.80402732]
 [ 3.20696402]
 [ 0.33051264]
 [ 0.24506938]]
[[1]
 [0]
 [5]
 [0]
 [1]]
[[ 6.33391571]
 [ 5.3895359 ]
 [ 7.62091827]
 [ 6.93370247]
 [ 4.49849844]]
[[8]
 [8]
 [9]
 [2]
 [2]]
accuracy 0.332
mean proportion error for x 0.08
mean proportion error for y 0.30


Now that we know that this architecture works for such inputs, what happens if we extend the size of the image and the outputs?


In [None]:
# TODO