- Coding a neural network using only matrix multiplication
- Writting a dataloader to be used in training
- Creating a NN with high level classes and sequential api
- Using an optimizer and pre-defined loss
- Using tf and keras

Data: 
- [mnist_train](https://drive.google.com/file/d/1bCVtZBPQcEz3AqkvrI1M69D-mcY6f5TK/view?usp=sharing)
- [mnist_test](https://drive.google.com/file/d/1DrN5-afU-U6x5hMrUgpUaOA7wYZzXkmz/view?usp=sharing)


In [1]:
import torch
from torch.utils.data import Dataset, DataLoader
import pandas as pd

In [2]:
local_path_mnist_train = "/Users/gunnvantsaini/OneDrive/project_codes/content/dl_basics/vision/sony/data/mnist_train.csv"
local_path_mnist_test = "/Users/gunnvantsaini/OneDrive/project_codes/content/dl_basics/vision/sony/data/mnist_test.csv"

### Coding a neural network using only matrix multiplication

In [3]:
mnist_train = pd.read_csv(local_path_mnist_train)
mnist_test = pd.read_csv(local_path_mnist_test)

In [4]:
mnist_train.head()

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [5]:
X=mnist_train.drop('label',axis=1).values/255.0
y=mnist_train['label'].values
X=torch.tensor(X,dtype=torch.float)
y=torch.tensor(y,dtype=torch.float)

In [6]:
w1=torch.randn((784,3),dtype=torch.float)
b1=torch.randn((3,),dtype=torch.float)
w2=torch.randn((3,10),dtype=torch.float)
b2=torch.randn((10,),dtype=torch.float)

In [7]:
def network(X,w1,b1,w2,b2):
    z1=torch.matmul(X.float(),w1)+b1
    res1=torch.sigmoid(z1)
    z2=torch.matmul(res1,w2)+b2
    probs=torch.softmax(z2,axis=1)
    return probs
def CE(probs,y):
    return -torch.log(probs[range(y.shape[0]),y.long()]).mean()

In [8]:
## Forward pass
p=network(X,w1,b1,w2,b2)

In [9]:
## Loss
CE(p,y)

tensor(3.2862)

In [10]:
## training loop
w1=torch.randn((784,3),dtype=torch.float,requires_grad=True)
b1=torch.randn((3,),dtype=torch.float,requires_grad=True)
w2=torch.randn((3,10),dtype=torch.float,requires_grad=True)
b2=torch.randn((10,),dtype=torch.float,requires_grad=True)
lr=0.1
Loss=[]
for i in range(10):
    p=network(X,w1,b1,w2,b2)
    #print(p)
    loss=CE(p,y)
    loss.backward()
    Loss.append(loss.item())
    acc=(p.argmax(axis=1)==y).float().mean().item()
    print(f"Iter {i+1}, loss {loss.item()}, acc {acc}")
    with torch.no_grad():
        w1-=lr*w1.grad
        b1-=lr*b1.grad
        w2-=lr*w2.grad
        b2-=lr*b2.grad
        w1.grad.zero_()
        b1.grad.zero_()
        w2.grad.zero_()
        b2.grad.zero_()   

Iter 1, loss 3.1359598636627197, acc 0.09988094866275787
Iter 2, loss 3.091695547103882, acc 0.100095234811306
Iter 3, loss 3.0513336658477783, acc 0.10035714507102966
Iter 4, loss 3.0144829750061035, acc 0.10061904788017273
Iter 5, loss 2.9807841777801514, acc 0.10104762017726898
Iter 6, loss 2.9499104022979736, acc 0.1014999970793724
Iter 7, loss 2.921565532684326, acc 0.10180952399969101
Iter 8, loss 2.8954837322235107, acc 0.1022142842411995
Iter 9, loss 2.8714263439178467, acc 0.10264285653829575
Iter 10, loss 2.8491806983947754, acc 0.103071428835392


In [11]:
class MnistData(Dataset):
    def __init__(self,X,y):
        self.X=X
        self.y=y
    def __len__(self):
        return X.shape[0]
    def __getitem__(self,idx):
        X=self.X[idx,]
        y=self.y[idx]
        sample={'X':X,'y':y}
        return sample

In [12]:
X=mnist_train.drop('label',axis=1).values/255.0
y=mnist_train['label'].values
mnist=MnistData(X,y)

In [13]:
mnist_batched=DataLoader(mnist,100)

In [14]:
next(iter(mnist_batched))

{'X': tensor([[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]], dtype=torch.float64),
 'y': tensor([1, 0, 1, 4, 0, 0, 7, 3, 5, 3, 8, 9, 1, 3, 3, 1, 2, 0, 7, 5, 8, 6, 2, 0,
         2, 3, 6, 9, 9, 7, 8, 9, 4, 9, 2, 1, 3, 1, 1, 4, 9, 1, 4, 4, 2, 6, 3, 7,
         7, 4, 7, 5, 1, 9, 0, 2, 2, 3, 9, 1, 1, 1, 5, 0, 6, 3, 4, 8, 1, 0, 3, 9,
         6, 2, 6, 4, 7, 1, 4, 1, 5, 4, 8, 9, 2, 9, 9, 8, 9, 6, 3, 6, 4, 6, 2, 9,
         1, 2, 0, 5])}

In [15]:
### Class Excercise use the regression.csv and create a Dataloader using that data

In [16]:
class RegData(Dataset):
    def __init__(self,path,y_name):
        self.path = path
        self.y_name = y_name
        self.data = pd.read_csv(path)
        self.X = self.data.drop(self.y_name,axis=1).values
        self.y = self.data[self.y_name].values
        
    def __len__(self):
        return len(self.y)
    
    def __getitem__(self,idx):
        x = self.X[idx]
        y = self.y[idx]
        return {'X':x,'y':y}    

In [19]:
d = RegData(path="../data/regression.csv",y_name='mpg')

In [20]:
d_train = DataLoader(d,batch_size=32)

In [21]:
next(iter(d_train))

{'X': tensor([[8.0000e+00, 3.0700e+02, 1.3000e+02, 3.5040e+03, 1.2000e+01, 7.0000e+01,
          1.0000e+00],
         [8.0000e+00, 3.5000e+02, 1.6500e+02, 3.6930e+03, 1.1500e+01, 7.0000e+01,
          1.0000e+00],
         [8.0000e+00, 3.1800e+02, 1.5000e+02, 3.4360e+03, 1.1000e+01, 7.0000e+01,
          1.0000e+00],
         [8.0000e+00, 3.0400e+02, 1.5000e+02, 3.4330e+03, 1.2000e+01, 7.0000e+01,
          1.0000e+00],
         [8.0000e+00, 3.0200e+02, 1.4000e+02, 3.4490e+03, 1.0500e+01, 7.0000e+01,
          1.0000e+00],
         [8.0000e+00, 4.2900e+02, 1.9800e+02, 4.3410e+03, 1.0000e+01, 7.0000e+01,
          1.0000e+00],
         [8.0000e+00, 4.5400e+02, 2.2000e+02, 4.3540e+03, 9.0000e+00, 7.0000e+01,
          1.0000e+00],
         [8.0000e+00, 4.4000e+02, 2.1500e+02, 4.3120e+03, 8.5000e+00, 7.0000e+01,
          1.0000e+00],
         [8.0000e+00, 4.5500e+02, 2.2500e+02, 4.4250e+03, 1.0000e+01, 7.0000e+01,
          1.0000e+00],
         [8.0000e+00, 3.9000e+02, 1.9000e+02, 3.85

### Training using a dataloader

In [22]:
w1=torch.randn((784,3),dtype=torch.float,requires_grad=True)
b1=torch.randn((3,),dtype=torch.float,requires_grad=True)
w2=torch.randn((3,10),dtype=torch.float,requires_grad=True)
b2=torch.randn((10,),dtype=torch.float,requires_grad=True)
lr=0.01
Loss=[]
num_epoch=10
for i in range(num_epoch):
    for j,batch in enumerate(mnist_batched):
        x=batch['X']
        Y=batch['y']
        p=network(x,w1,b1,w2,b2)
        loss=CE(p,Y)
        loss.backward()
        Loss.append(loss.item())
        acc=(p.argmax(axis=1)==Y).float().mean().item()
        if j%100 == 0:
            print(f"Epoch {i+1}, iter {j+1}, loss {loss.item()}, acc {acc}")
        with torch.no_grad():
            w1-=lr*w1.grad
            b1-=lr*b1.grad
            w2-=lr*w2.grad
            b2-=lr*b2.grad
            w1.grad.zero_()
            b1.grad.zero_()
            w2.grad.zero_()
            b2.grad.zero_()   

Epoch 1, iter 1, loss 3.6475841999053955, acc 0.14000000059604645
Epoch 1, iter 101, loss 2.959592342376709, acc 0.12999999523162842
Epoch 1, iter 201, loss 2.8314015865325928, acc 0.11999999731779099
Epoch 1, iter 301, loss 2.713552236557007, acc 0.10999999940395355
Epoch 1, iter 401, loss 2.654496669769287, acc 0.09000000357627869
Epoch 2, iter 1, loss 2.6646318435668945, acc 0.12999999523162842
Epoch 2, iter 101, loss 2.4830801486968994, acc 0.11999999731779099
Epoch 2, iter 201, loss 2.4732818603515625, acc 0.10000000149011612
Epoch 2, iter 301, loss 2.473522901535034, acc 0.10999999940395355
Epoch 2, iter 401, loss 2.4026601314544678, acc 0.10999999940395355
Epoch 3, iter 1, loss 2.434114933013916, acc 0.10000000149011612
Epoch 3, iter 101, loss 2.3586008548736572, acc 0.10999999940395355
Epoch 3, iter 201, loss 2.3317301273345947, acc 0.09000000357627869
Epoch 3, iter 301, loss 2.375793933868408, acc 0.10999999940395355
Epoch 3, iter 401, loss 2.2883334159851074, acc 0.1299999952

### Writting a network with a high level api

In [23]:
from torch import nn

In [24]:
#### Create a model with nn class ####
class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.w1=nn.Parameter(torch.randn((784,3),dtype=torch.float))
        self.b1=nn.Parameter(torch.randn((3,),dtype=torch.float))
        self.w2=nn.Parameter(torch.randn((3,10),dtype=torch.float))
        self.b2=nn.Parameter(torch.randn((10,),dtype=torch.float))
    def forward(self,X):
        z1=torch.matmul(X.float(),self.w1)+self.b1
        res1=torch.sigmoid(z1)
        z2=torch.matmul(res1,self.w2)+self.b2
        probs=torch.softmax(z2,axis=1)
        return probs

In [25]:
mod=MLP()

In [26]:
lr=0.01
Loss=[]
num_epoch=10
for i in range(num_epoch):
    for j,batch in enumerate(mnist_batched):
        x=batch['X']
        Y=batch['y']
        p=mod(x)
        loss=CE(p,Y)
        loss.backward()
        Loss.append(loss.item())
        acc=(p.argmax(axis=1)==Y).float().mean().item()
        if j%100 == 0:
            print(f"Epoch {i+1}, iter {j+1}, loss {loss.item()}, acc {acc}")
        with torch.no_grad():
            for p in mod.parameters():
                p-=lr*p.grad
            mod.zero_grad()

Epoch 1, iter 1, loss 3.5367236137390137, acc 0.10999999940395355
Epoch 1, iter 101, loss 3.0288314819335938, acc 0.07000000029802322
Epoch 1, iter 201, loss 2.8319709300994873, acc 0.05000000074505806
Epoch 1, iter 301, loss 2.6407525539398193, acc 0.10999999940395355
Epoch 1, iter 401, loss 2.3520538806915283, acc 0.10999999940395355
Epoch 2, iter 1, loss 2.3813798427581787, acc 0.15000000596046448
Epoch 2, iter 101, loss 2.4469709396362305, acc 0.07000000029802322
Epoch 2, iter 201, loss 2.4275898933410645, acc 0.11999999731779099
Epoch 2, iter 301, loss 2.3922646045684814, acc 0.14000000059604645
Epoch 2, iter 401, loss 2.2707107067108154, acc 0.18000000715255737
Epoch 3, iter 1, loss 2.259216070175171, acc 0.15000000596046448
Epoch 3, iter 101, loss 2.3237292766571045, acc 0.07999999821186066
Epoch 3, iter 201, loss 2.2869784832000732, acc 0.09000000357627869
Epoch 3, iter 301, loss 2.3006675243377686, acc 0.11999999731779099
Epoch 3, iter 401, loss 2.251497983932495, acc 0.189999

In [27]:
### Can we improve this further, should we be declaring parameters? Shouldn't there be abstractions for layers?
class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.lin1=nn.Linear(784,3)
        self.sig=nn.Sigmoid()
        self.lin2=nn.Linear(3,10)
        self.softmax=nn.Softmax()
    def forward(self,X):
        x=self.lin1(X)
        x=self.sig(x)
        x=self.lin2(x)
        x=self.softmax(x)
        return x           

In [28]:
mod=MLP()

In [29]:
import torch.optim as optim

In [30]:
opt=optim.SGD(mod.parameters(),lr=0.1)

In [31]:
Loss=[]
num_epoch=10
for i in range(num_epoch):
    for j,batch in enumerate(mnist_batched):
        x=batch['X']
        Y=batch['y']
        p=mod(x.float())
        loss=CE(p,Y)
        loss.backward()
        Loss.append(loss.item())
        acc=(p.argmax(axis=1)==Y).float().mean().item()
        if j%100 == 0:
            print(f"Epoch {i+1}, iter {j+1}, loss {loss.item()}, acc {acc}")
        opt.step()
        opt.zero_grad()

  x=self.softmax(x)


Epoch 1, iter 1, loss 2.372739791870117, acc 0.05999999865889549
Epoch 1, iter 101, loss 2.1189982891082764, acc 0.3400000035762787
Epoch 1, iter 201, loss 1.9030648469924927, acc 0.550000011920929
Epoch 1, iter 301, loss 1.7667666673660278, acc 0.5799999833106995
Epoch 1, iter 401, loss 1.71932053565979, acc 0.4699999988079071
Epoch 2, iter 1, loss 1.6427087783813477, acc 0.5799999833106995
Epoch 2, iter 101, loss 1.5365495681762695, acc 0.5600000023841858
Epoch 2, iter 201, loss 1.3791559934616089, acc 0.6600000262260437
Epoch 2, iter 301, loss 1.3651549816131592, acc 0.5600000023841858
Epoch 2, iter 401, loss 1.4177724123001099, acc 0.5699999928474426
Epoch 3, iter 1, loss 1.3656424283981323, acc 0.5899999737739563
Epoch 3, iter 101, loss 1.2652097940444946, acc 0.6200000047683716
Epoch 3, iter 201, loss 1.149062991142273, acc 0.699999988079071
Epoch 3, iter 301, loss 1.165700078010559, acc 0.6299999952316284
Epoch 3, iter 401, loss 1.249300241470337, acc 0.6299999952316284
Epoch 4,

In [30]:
## We can make one last change, instead of defining a loss function ourselves we will use a predifined one

In [32]:
criterion=torch.nn.CrossEntropyLoss()

In [33]:
mod=MLP()
opt=optim.SGD(mod.parameters(),lr=0.1)
Loss=[]
num_epoch=10
for i in range(num_epoch):
    for j,batch in enumerate(mnist_batched):
        x=batch['X']
        Y=batch['y']
        p=mod(x.float())
        loss=criterion(p,Y.long())
        loss.backward()
        Loss.append(loss.item())
        acc=(p.argmax(axis=1)==Y).float().mean().item()
        if j%100 == 0:
            print(f"Epoch {i+1}, iter {j+1}, loss {loss.item()}, acc {acc}")
        opt.step()
        opt.zero_grad()

  x=self.softmax(x)


Epoch 1, iter 1, loss 2.2954514026641846, acc 0.1599999964237213
Epoch 1, iter 101, loss 2.2960641384124756, acc 0.15000000596046448
Epoch 1, iter 201, loss 2.291412115097046, acc 0.11999999731779099
Epoch 1, iter 301, loss 2.287562608718872, acc 0.11999999731779099
Epoch 1, iter 401, loss 2.2863752841949463, acc 0.09000000357627869
Epoch 2, iter 1, loss 2.2610960006713867, acc 0.1599999964237213
Epoch 2, iter 101, loss 2.2560718059539795, acc 0.15000000596046448
Epoch 2, iter 201, loss 2.252784013748169, acc 0.11999999731779099
Epoch 2, iter 301, loss 2.2487235069274902, acc 0.11999999731779099
Epoch 2, iter 401, loss 2.2575955390930176, acc 0.09000000357627869
Epoch 3, iter 1, loss 2.212414503097534, acc 0.1599999964237213
Epoch 3, iter 101, loss 2.219043254852295, acc 0.17000000178813934
Epoch 3, iter 201, loss 2.227004051208496, acc 0.1599999964237213
Epoch 3, iter 301, loss 2.229422092437744, acc 0.1899999976158142
Epoch 3, iter 401, loss 2.2433786392211914, acc 0.1700000017881393

### Using keras and tensorflow to build the neural networks

- Using tensors
- Using Keras low level api
- Using Keras Functional api
- Using Keras Sequential api

In [34]:
import tensorflow as tf

### Using tensors

In [35]:
X=mnist_train.drop('label',axis=1).values/255.0
y=mnist_train['label'].values
X = tf.constant(X,dtype='float32')
y = tf.constant(y,dtype='float32')

Metal device set to: Apple M1


2022-07-03 17:43:54.384450: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2022-07-03 17:43:54.385017: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [36]:
def make_bias(k, initializer):
    return tf.Variable(initializer(shape=[k,], dtype=tf.float32))
def make_weights(n,k,initializer):
    return tf.Variable(initializer(shape=[n,k], dtype=tf.float32))          

In [37]:
w1 = make_weights(784,3,tf.random_normal_initializer())
b1 = make_bias(3,tf.random_normal_initializer())
w2 = make_weights(3,10,tf.random_normal_initializer())
b2 = make_bias(10,tf.random_normal_initializer())

In [38]:
def network(X,w1,b1,w2,b2):
    z1=tf.matmul(X,w1)+b1
    res1=tf.math.sigmoid(z1)
    z2=tf.matmul(res1,w2)+b2
    probs=tf.nn.softmax(z2,axis=1)
    return probs
CE = tf.keras.losses.SparseCategoricalCrossentropy()

In [39]:
## Forward pass
p=network(X,w1,b1,w2,b2)

In [40]:
CE(y,p)

<tf.Tensor: shape=(), dtype=float32, numpy=2.3038237>

In [41]:
## training loop
X=mnist_train.drop('label',axis=1).values/255.0
y=mnist_train['label'].values
X = tf.constant(X,dtype='float32')
y = tf.constant(y,dtype='float32')

w1 = make_weights(784,3,tf.zeros_initializer())
b1 = make_bias(3,tf.zeros_initializer())
w2 = make_weights(3,10,tf.zeros_initializer())
b2 = make_bias(10,tf.zeros_initializer())

lr=0.1
Loss=[]
for i in range(10):
    with tf.GradientTape() as tape:
        p=network(X,w1,b1,w2,b2)
        loss = CE(y,p)
    pred = tf.cast(tf.argmax(p,axis=1),tf.int32)
    mask = tf.equal(pred,tf.cast(y,dtype = tf.int32))
    mask = tf.cast(mask,dtype=tf.int32)
    acc = tf.reduce_mean(mask)
    print(f"Iter {i+1}, loss {loss.numpy()}, acc {acc.numpy()}")
    ## update the weights
    gw1,gb1,gw2,gb2 = tape.gradient(loss,[w1,b1,w2,b2])
    w1.assign_sub(lr*gw1)
    b1.assign_sub(lr*gb1)
    w2.assign_sub(lr*gw2)
    b2.assign_sub(lr*b2)

Iter 1, loss 2.3025851249694824, acc 0
Iter 2, loss 2.3025636672973633, acc 0
Iter 3, loss 2.3025424480438232, acc 0
Iter 4, loss 2.3025214672088623, acc 0
Iter 5, loss 2.3025012016296387, acc 0
Iter 6, loss 2.302480697631836, acc 0
Iter 7, loss 2.3024609088897705, acc 0
Iter 8, loss 2.302441120147705, acc 0
Iter 9, loss 2.302421808242798, acc 0
Iter 10, loss 2.3024024963378906, acc 0


### Using keras-low-level api

In [42]:
class Linear(tf.keras.layers.Layer):
    """y = w.x + b"""

    def __init__(self, units=32, input_dim=32):
        super(Linear, self).__init__()
        w_init = tf.random_normal_initializer()
        self.w = tf.Variable(
            initial_value=w_init(shape=(input_dim, units), dtype="float32"),
            trainable=True,
        )
        b_init = tf.zeros_initializer()
        self.b = tf.Variable(
            initial_value=b_init(shape=(units,), dtype="float32"), trainable=True
        )

    def call(self, inputs):
        return tf.matmul(inputs, self.w) + self.b

In [43]:
class Model(tf.keras.layers.Layer):
    def __init__(self,layer_1_shape,layer_1_num_units, layer_2_num_units,layer_2_shape):
        super(Model, self).__init__()
        self.layer1 = Linear(units = layer_1_num_units,input_dim=layer_1_shape)
        self.layer2 = Linear(units=layer_2_num_units,input_dim=layer_2_shape)
    
    def call(self,inputs):
        x = self.layer1(inputs)
        x = tf.math.sigmoid(x)
        x = self.layer2(x)
        return tf.nn.softmax(x,axis=1)

In [44]:
model = Model(784,3,10,3)

In [45]:
X=mnist_train.drop('label',axis=1).values/255.0
y=mnist_train['label'].values
X = tf.constant(X,dtype='float32')
y = tf.constant(y,dtype='float32')
mnist_data = tf.data.Dataset.from_tensor_slices((X,y))

In [46]:
mnist_data = mnist_data.shuffle(buffer_size=1024).batch(64)

In [47]:
# Loss and optimizer.
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy()
optimizer = tf.keras.optimizers.SGD(learning_rate=1e-3)

In [48]:
for step, (x, y_1) in enumerate(mnist_data):
    with tf.GradientTape() as tape:

        # Forward pass.
        probs = model(x)
        pred = tf.cast(tf.argmax(probs,axis=1),tf.int32)
        mask = tf.equal(pred,tf.cast(y_1,dtype=tf.int32))
        mask = tf.cast(mask,dtype=tf.int32)
        acc = tf.reduce_mean(mask)


        # External loss value for this batch.
        loss = loss_fn(y_1, probs)

        # Add the losses created during the forward pass.
        loss += sum(model.losses)

        # Get gradients of the loss wrt the weights.
        gradients = tape.gradient(loss, model.trainable_weights)

    # Update the weights of our linear layer.
    optimizer.apply_gradients(zip(gradients, model.trainable_weights))

    # Logging.
    if step % 100 == 0:
        print("Step:", step, "Loss:", float(loss),'Accuracy:',float(acc))

Step: 0 Loss: 2.3036980628967285 Accuracy: 0.0
Step: 100 Loss: 2.2934751510620117 Accuracy: 0.0
Step: 200 Loss: 2.2922370433807373 Accuracy: 0.0
Step: 300 Loss: 2.294084310531616 Accuracy: 0.0
Step: 400 Loss: 2.3088443279266357 Accuracy: 0.0
Step: 500 Loss: 2.2955589294433594 Accuracy: 0.0
Step: 600 Loss: 2.292743682861328 Accuracy: 0.0


### Using keras functional api

In [49]:
inputs = tf.keras.Input(shape=(784,))
x=tf.keras.layers.Dense(units=3,activation="sigmoid")(inputs)
output = tf.keras.layers.Dense(units=10,activation="softmax")(x)
model = tf.keras.Model(inputs,output)

In [50]:
model.summary()

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 784)]             0         
                                                                 
 dense (Dense)               (None, 3)                 2355      
                                                                 
 dense_1 (Dense)             (None, 10)                40        
                                                                 
Total params: 2,395
Trainable params: 2,395
Non-trainable params: 0
_________________________________________________________________


In [51]:
model.compile(optimizer='sgd', loss='sparse_categorical_crossentropy',metrics=['accuracy'])

In [52]:
model.fit(mnist_data,epochs=10)

Epoch 1/10


2022-07-03 17:44:29.965764: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz
2022-07-03 17:44:29.970628: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x2b7db4490>

### Keras Sequential api

In [53]:
model = tf.keras.Sequential(
    [
    tf.keras.layers.Dense(units=3,activation='sigmoid',input_shape=(784,)),
    tf.keras.layers.Dense(units=10,activation='softmax')
    ]
)

In [54]:
model.compile(optimizer='sgd', loss='sparse_categorical_crossentropy',metrics=['accuracy'])

In [55]:
model.fit(mnist_data,epochs=10)

Epoch 1/10
  8/657 [..............................] - ETA: 4s - loss: 2.3391 - accuracy: 0.1250  

2022-07-03 17:45:14.414254: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x2b7bf50d0>