In [1]:
import torch
from torchvision import datasets
from torchvision.transforms import ToTensor

In [10]:
train_data = datasets.FashionMNIST(
      train = True,
      download = True,
      root = 'data',
      transform = ToTensor(),
      # target_transform = ToTensor()
    )

test_data = datasets.FashionMNIST(
        root = 'data',
        download = False,
        train = False,
        transform = ToTensor(),
        #
        target_transform = ToTensor()
    )

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to data/FashionMNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 26421880/26421880 [00:01<00:00, 15005538.92it/s]


Extracting data/FashionMNIST/raw/train-images-idx3-ubyte.gz to data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to data/FashionMNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 29515/29515 [00:00<00:00, 271252.18it/s]


Extracting data/FashionMNIST/raw/train-labels-idx1-ubyte.gz to data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 4422102/4422102 [00:00<00:00, 4971482.07it/s]


Extracting data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 5148/5148 [00:00<00:00, 4723753.44it/s]

Extracting data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to data/FashionMNIST/raw






In [11]:
train_loader = torch.utils.data.DataLoader(
          batch_size =  32,
          dataset = train_data,
          shuffle = True
      )

test_loader = torch.utils.data.DataLoader(
          dataset = test_data,
          batch_size = 32,
      )

In [12]:
train_features_batch, train_labels_batch = next(iter(train_loader))
train_features_batch.shape

torch.Size([32, 1, 28, 28])

In [2]:
import torch

In [3]:
class ConvolutionalClassifier(torch.nn.Module):

  def __init__(self,input_channel, output_features):
    super().__init__()
    self.kernel_size = (3,3)
    self.stride = (2,2)
    self.convmodel = torch.nn.Sequential(
      torch.nn.Conv2d(in_channels = input_channel, out_channels = 12, kernel_size = self.kernel_size , stride = self.stride),
      torch.nn.ReLU(),
      torch.nn.Conv2d(in_channels = 12, out_channels=3, kernel_size = self.kernel_size, stride =self.stride),
      torch.nn.MaxPool2d(kernel_size = self.kernel_size,stride = self.stride),
      torch.nn.Flatten()
    )

    self.linearmodel = torch.nn.Sequential(
        torch.nn.Flatten(start_dim=1,end_dim=-1),
        torch.nn.Linear(in_features = 12, out_features = 10),
        torch.nn.Softmax(),
    )

  def forward(self, x):
    x = self.convmodel(x)
    x = self.linearmodel(x)
    return x

In [4]:
seed = 34

In [None]:
torch.manual_seed(seed)
model = ConvolutionalClassifier(input_channel = 1, output_features = 1)

In [13]:
!pip install torchinfo

Collecting torchinfo
  Downloading torchinfo-1.8.0-py3-none-any.whl (23 kB)
Installing collected packages: torchinfo
Successfully installed torchinfo-1.8.0


In [None]:
import torchinfo
torchinfo.summary(model,input_size=(1,1,28,28))

  return self._call_impl(*args, **kwargs)


Layer (type:depth-idx)                   Output Shape              Param #
ConvolutionalClassifier                  [1, 10]                   --
├─Sequential: 1-1                        [1, 12]                   --
│    └─Conv2d: 2-1                       [1, 12, 13, 13]           120
│    └─ReLU: 2-2                         [1, 12, 13, 13]           --
│    └─Conv2d: 2-3                       [1, 3, 6, 6]              327
│    └─MaxPool2d: 2-4                    [1, 3, 2, 2]              --
│    └─Flatten: 2-5                      [1, 12]                   --
├─Sequential: 1-2                        [1, 10]                   --
│    └─Flatten: 2-6                      [1, 12]                   --
│    └─Linear: 2-7                       [1, 10]                   130
│    └─Softmax: 2-8                      [1, 10]                   --
Total params: 577
Trainable params: 577
Non-trainable params: 0
Total mult-adds (M): 0.03
Input size (MB): 0.00
Forward/backward pass size (MB): 0.02
Pa

In [None]:
loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adadelta(params = model.parameters(),lr = 0.01)

In [None]:
epochs = 100
torch.manual_seed(seed)
for epoch in range(epochs):
  model.train()
  train_l = 0
  for x,y in train_loader:
    train_pred = model(x)
    loss = loss_fn(train_pred,y)
    train_l += loss
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
  if epoch%10==0:
    test_l = 0
    train_l/= len(train_loader)
    for t_x,t_y in test_loader:
      test_y = model(t_x)
      test_l+= loss_fn(test_y,t_y)
    test_l/=len(test_loader)
    print(f"{epoch} \t train loss : {train_l} \t test loss : {test_l}")


  return self._call_impl(*args, **kwargs)


0 	 train loss : 2.30222225189209 	 test loss : 2.3015670776367188
10 	 train loss : 1.941670298576355 	 test loss : 1.9328166246414185
20 	 train loss : 1.839295744895935 	 test loss : 1.8378793001174927
30 	 train loss : 1.7987815141677856 	 test loss : 1.8003718852996826
40 	 train loss : 1.7784082889556885 	 test loss : 1.7814797163009644
50 	 train loss : 1.765880823135376 	 test loss : 1.7701894044876099
60 	 train loss : 1.7576712369918823 	 test loss : 1.7631553411483765
70 	 train loss : 1.7517908811569214 	 test loss : 1.7578370571136475
80 	 train loss : 1.7469817399978638 	 test loss : 1.7531683444976807
90 	 train loss : 1.7427291870117188 	 test loss : 1.7489087581634521


In [None]:
test_l = 0
with torch.inference_mode():
  for t_x,t_y in test_loader:
    test_y = model(t_x)
    test_l+= loss_fn(test_y,t_y)
  test_l/=len(test_loader)
  print(f"loss : {test_l}")


loss : 1.7455533742904663


    It took around 26minutes to  run on cpu

***Now change the runtime to GPU***

In [5]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [6]:
device

'cuda'

In [7]:
# model on GPU
torch.manual_seed(seed)
gpu_model = ConvolutionalClassifier(input_channel=1, output_features = 1)
gpu_model.to(device)

ConvolutionalClassifier(
  (convmodel): Sequential(
    (0): Conv2d(1, 12, kernel_size=(3, 3), stride=(2, 2))
    (1): ReLU()
    (2): Conv2d(12, 3, kernel_size=(3, 3), stride=(2, 2))
    (3): MaxPool2d(kernel_size=(3, 3), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
    (4): Flatten(start_dim=1, end_dim=-1)
  )
  (linearmodel): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=12, out_features=10, bias=True)
    (2): Softmax(dim=None)
  )
)

In [8]:
# loss and optimizers are similar
gpu_loss_fn = torch.nn.CrossEntropyLoss()
gpu_optimizer = torch.optim.Adadelta(params = gpu_model.parameters(),lr = 0.01)

In [14]:
# gpu model training
torch.manual_seed(seed)
epochs = 100
for epoch in range(epochs):
  gpu_model.train()
  for x,y in train_loader:
    x = x.to(device)
    y = y.to(device)
    y_pred = gpu_model(x)
    loss = gpu_loss_fn(y_pred,y)
    gpu_optimizer.zero_grad()
    loss.backward()
    gpu_optimizer.step()


  return self._call_impl(*args, **kwargs)


In [22]:
torch.save(gpu_model.state_dict(),'savedmodel.h5')

In [23]:
new_model = ConvolutionalClassifier(input_channel=1, output_features=1)
new_model.load_state_dict(torch.load('savedmodel.h5'))

<All keys matched successfully>

In [24]:
new_model.parameters()

<generator object Module.parameters at 0x7a7f76132420>

In [25]:
gpu_model.parameters()

<generator object Module.parameters at 0x7a7f76133300>