In [1]:
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from sklearn.datasets import load_digits
import tensorflow as tf

from raytracerthing import RayTracerThing, Activations

np.random.seed(42)

In [2]:
digits = load_digits()
digits.keys()

y = digits['target']

X = digits['images']
X = X[y < 2]
X = X / X.max()
N = X.shape[0]
image_shape = X.shape[1:]

print(N, image_shape)

y = y[y < 2]

print(y[:5])

360 (8, 8)
[0 1 0 1 0]


In [3]:
layer_shape = image_shape

clf = RayTracerThing(input_shape=image_shape,
                     hidden_layer_shape=layer_shape, n_layers=3,
                     activation_func=Activations.sigmoid)

for layer in clf.hidden_layers:
    layer.pixel_values = tf.convert_to_tensor(layer.pixel_values)

In [4]:
clf.hidden_layers[0].pixel_values

<tf.Tensor: id=1, shape=(8, 8), dtype=float64, numpy=
array([[0.95071431, 0.73199394, 0.59865848, 0.15601864, 0.15599452,
        0.05808361, 0.86617615, 0.60111501],
       [0.70807258, 0.02058449, 0.96990985, 0.83244264, 0.21233911,
        0.18182497, 0.18340451, 0.30424224],
       [0.52475643, 0.43194502, 0.29122914, 0.61185289, 0.13949386,
        0.29214465, 0.36636184, 0.45606998],
       [0.78517596, 0.19967378, 0.51423444, 0.59241457, 0.04645041,
        0.60754485, 0.17052412, 0.06505159],
       [0.94888554, 0.96563203, 0.80839735, 0.30461377, 0.09767211,
        0.68423303, 0.44015249, 0.12203823],
       [0.49517691, 0.03438852, 0.9093204 , 0.25877998, 0.66252228,
        0.31171108, 0.52006802, 0.54671028],
       [0.18485446, 0.96958463, 0.77513282, 0.93949894, 0.89482735,
        0.59789998, 0.92187424, 0.0884925 ],
       [0.19598286, 0.04522729, 0.32533033, 0.38867729, 0.27134903,
        0.82873751, 0.35675333, 0.28093451]])>

In [5]:
def log_loss(true_label, predicted_prob):
    if true_label == 1:
        return -tf.log(predicted_prob)
    else:
        return -tf.log(1 - predicted_prob)

def grad(x):
    
    with tf.GradientTape() as t:
        t.watch(x)
        t.watch(clf.hidden_layers[0].pixel_values)
        t.watch(clf.hidden_layers[1].pixel_values)
        t.watch(clf.hidden_layers[2].pixel_values)
        
        out = clf.forward(x)        
        print(out)
        
        loss = log_loss(y[0], out)        
        print('Log Loss: %.4f' % loss)
        
    return t.gradient(loss, [w_0, w_1, w_2])

x = tf.convert_to_tensor(X[0])

w_0 = clf.hidden_layers[0].pixel_values
w_1 = clf.hidden_layers[1].pixel_values
w_2 = clf.hidden_layers[2].pixel_values

dw_0, dw_1, dw_2 = grad(x)

tf.Tensor(0.6940541919688394, shape=(), dtype=float64)
Log Loss: 1.1843


In [6]:
clf.hidden_layers[0].pixel_values

<tf.Tensor: id=1, shape=(8, 8), dtype=float64, numpy=
array([[0.95071431, 0.73199394, 0.59865848, 0.15601864, 0.15599452,
        0.05808361, 0.86617615, 0.60111501],
       [0.70807258, 0.02058449, 0.96990985, 0.83244264, 0.21233911,
        0.18182497, 0.18340451, 0.30424224],
       [0.52475643, 0.43194502, 0.29122914, 0.61185289, 0.13949386,
        0.29214465, 0.36636184, 0.45606998],
       [0.78517596, 0.19967378, 0.51423444, 0.59241457, 0.04645041,
        0.60754485, 0.17052412, 0.06505159],
       [0.94888554, 0.96563203, 0.80839735, 0.30461377, 0.09767211,
        0.68423303, 0.44015249, 0.12203823],
       [0.49517691, 0.03438852, 0.9093204 , 0.25877998, 0.66252228,
        0.31171108, 0.52006802, 0.54671028],
       [0.18485446, 0.96958463, 0.77513282, 0.93949894, 0.89482735,
        0.59789998, 0.92187424, 0.0884925 ],
       [0.19598286, 0.04522729, 0.32533033, 0.38867729, 0.27134903,
        0.82873751, 0.35675333, 0.28093451]])>

In [7]:
for gradient in [dw_0, dw_1, dw_2]:
    print(gradient)

tf.Tensor(
[[0.         0.         0.         0.         0.         0.
  0.         0.        ]
 [0.         0.         0.         0.         0.         0.
  0.         0.        ]
 [0.         0.         0.         0.         0.         0.
  0.         0.        ]
 [0.         0.         0.         0.75760918 1.01031374 0.
  0.         0.        ]
 [0.         0.         0.         0.09993985 0.43346778 0.
  0.         0.        ]
 [0.         0.         0.         0.         0.         0.
  0.         0.        ]
 [0.         0.         0.         0.         0.         0.
  0.         0.        ]
 [0.         0.         0.         0.         0.         0.
  0.         0.        ]], shape=(8, 8), dtype=float64)
tf.Tensor(
[[0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00]
 [0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00
  0.00000000e+00 0.00000000e+00 0.00000000e+00 0.00000000e+00]
 [0.00000000e+00 

In [8]:
eta = 0.1

print(w_0 - eta * dw_0)
print(w_1 - eta * dw_1)
print(w_2 - eta * dw_2)

tf.Tensor(
[[ 0.95071431  0.73199394  0.59865848  0.15601864  0.15599452  0.05808361
   0.86617615  0.60111501]
 [ 0.70807258  0.02058449  0.96990985  0.83244264  0.21233911  0.18182497
   0.18340451  0.30424224]
 [ 0.52475643  0.43194502  0.29122914  0.61185289  0.13949386  0.29214465
   0.36636184  0.45606998]
 [ 0.78517596  0.19967378  0.51423444  0.51665365 -0.05458096  0.60754485
   0.17052412  0.06505159]
 [ 0.94888554  0.96563203  0.80839735  0.29461978  0.05432534  0.68423303
   0.44015249  0.12203823]
 [ 0.49517691  0.03438852  0.9093204   0.25877998  0.66252228  0.31171108
   0.52006802  0.54671028]
 [ 0.18485446  0.96958463  0.77513282  0.93949894  0.89482735  0.59789998
   0.92187424  0.0884925 ]
 [ 0.19598286  0.04522729  0.32533033  0.38867729  0.27134903  0.82873751
   0.35675333  0.28093451]], shape=(8, 8), dtype=float64)
tf.Tensor(
[[0.54269608 0.14092422 0.80219698 0.07455064 0.98688694 0.77224477
  0.19871568 0.00552212]
 [0.81546143 0.70685734 0.72900717 0.77127035 

In [9]:
clf.hidden_layers[0].pixel_values -= eta * dw_0
clf.hidden_layers[1].pixel_values -= eta * dw_1
clf.hidden_layers[2].pixel_values -= eta * dw_2

In [None]:
outputs = []

for i, image in enumerate(X):
    print('\rImage %d of %d' % (i + 1, N), end='')
    outputs.append(clf.forward(image))

print()

In [None]:
fig, axes = plt.subplots(5, 2, figsize=(9, 15))
axes = axes.ravel()

for ax, image, expected, actual in zip(axes, X[:10], y[:10], outputs[:10]):
    sns.heatmap(image, vmin=0.0, vmax=1.0, cmap='gray', ax=ax)
    ax.set_axis_off()

    actual = 0 if actual < 0.5 else 1

    color = 'green' if expected == actual else 'red'
    ax.set_title('Predicted %d' % actual, color=color)

plt.show()