# Training and Improving Neural Networks

In [1]:
import pandas as pd

import tensorflow as tf
from tensorflow.keras.datasets.fashion_mnist import load_data

from tensorflow.keras.utils import plot_model

from tensorflow.keras.models import Model
from tensorflow.keras.layers import Layer, Input, Flatten, Dense, Concatenate

import torch
import torch.nn as nn

In [2]:
# load_data()[0][0]

With tensorflow:

In [3]:
class MyLinear(Layer):
    def __init__(self, units = 32, input_dim = 32):
        super().__init__()
        self.w = self.add_weight(
            shape = (input_dim, units),
            initializer = "random_normal")
        self.b = self.add_weight(
            shape = (units,),
            initializer = "zeros")
    
    def call(self, inputs):
        return tf.matmul(inputs, self.w) + self.b

With pytorch:

In [4]:
class MyLinear(nn.Module):
    def __init__(self, in_units, units):
        super().__init__()
        self.weight = nn.Parameter(
            torch.randn(in_units, units))
        self.bias = nn.Parameter(
            torch.randn(units,))
    
def forward(self, X):
    return torch.matmul(
        X, self.weight.data) + self.bias.data

In [5]:
((attributes_train, labels_train), (attributes_test, labels_test)) = load_data()

In [6]:
attributes_train.shape

(60000, 28, 28)

In [7]:
len(set(labels_train))

10

In [8]:
pd.DataFrame(labels_train).value_counts()

0    6000
1    6000
2    6000
3    6000
4    6000
5    6000
6    6000
7    6000
8    6000
9    6000
dtype: int64

In [9]:
pd.DataFrame(labels_test).value_counts()

0    1000
1    1000
2    1000
3    1000
4    1000
5    1000
6    1000
7    1000
8    1000
9    1000
dtype: int64

In [10]:
# attributes_train.reshape(60_000, -1).shape

In [11]:
input_layer = Input((28, 28))
flatten_layer = Flatten()(input_layer)
layer1 = Dense(64, activation = "relu")(flatten_layer)
layer2 = Dense(128, activation = "relu")(layer1)
layer3 = Dense(64, activation = "relu")(layer2)
output_layer = Dense(10, activation = "softmax")(layer3)

In [12]:
output_layer

<KerasTensor shape=(None, 10), dtype=float32, sparse=False, name=keras_tensor_5>

In [13]:
model = Model(inputs = input_layer, outputs = output_layer)

In [14]:
model.summary()

In [15]:
Model(inputs = input_layer, outputs = layer2).summary()

In [16]:
model(attributes_train[:20]).numpy()

array([[0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 2.04257116e-25,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 1.00000000e+00,
        0.00000000e+00, 6.88683948e-28],
       [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 9.99997973e-01,
        0.00000000e+00, 1.42687990e-37, 2.59148714e-12, 1.03659059e-21,
        0.00000000e+00, 2.04307753e-06],
       [0.00000000e+00, 0.00000000e+00, 6.96930921e-24, 1.32016764e-09,
        0.00000000e+00, 3.09939362e-38, 9.99999881e-01, 5.60170602e-24,
        1.33497187e-37, 7.84196956e-08],
       [0.00000000e+00, 0.00000000e+00, 5.46494514e-28, 8.29636585e-16,
        0.00000000e+00, 9.55925524e-33, 1.00000000e+00, 1.52888055e-34,
        1.67895847e-37, 1.57074735e-11],
       [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 2.05420994e-13,
        0.00000000e+00, 0.00000000e+00, 3.48204480e-07, 1.06895868e-29,
        0.00000000e+00, 9.99999642e-01],
       [0.00000000e+00, 0.00000000e+00, 1.14650349e-34, 1.12810892e-26,
   

In [17]:
block_beginning = Dense(20)(flatten_layer)

block_a1 = Dense(30, activation = "relu")(block_beginning)
block_a2 = Dense(11, activation = "relu")(block_a1)
block_a3 = Dense(11, activation = "relu")(block_a2)

block_b = Dense(20, activation = "relu")(block_beginning)
block_c = Dense(50, activation = "relu")(block_beginning)

block_end = Concatenate()([block_a3, block_b, block_c])

In [18]:
block = Model(inputs = block_beginning, outputs = block_end)

In [19]:
block.summary()

In [22]:
# plot_model(model)  #not working on Windows

In [23]:
final_layer = Dense(20, activation = "relu")(block.output)

In [25]:
Model(inputs = block.input, outputs = final_layer).summary()