install data

https://github.com/Project-MONAI/MONAI-extra-test-data/releases/download/0.8.1/MedNIST.tar.gz

In [1]:
! pip install wget
import wget
import os

if os.path.isfile('MedNIST.tar.gz'):
  pass
else:
  wget.download("https://github.com/Project-MONAI/MONAI-extra-test-data/releases/download/0.8.1/MedNIST.tar.gz")

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [2]:
!  tar -xf "MedNIST.tar.gz"

In [3]:
! rm "./MedNIST/README.md"

In [4]:
MedNIST_DATA_DIR = f"./MedNIST"

Import packages

In [5]:
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy
from PIL import Image
import random

Preprocess parameters and constants

In [6]:
TEST_RATIO = 0.2

Preprocess Code

In [7]:
if os.path.isfile("./X_train.npy") and os.path.isfile("./Y_train") and os.path.isfile("./X_test.npy") and os.path.isfile("./Y_test.npy"):
        train_x = numpy.load('./X_train.npy')
        train_y = numpy.load('./Y_train.npy')
        valid_x = numpy.load('./X_test.npy')
        valid_y = numpy.load('./Y_test.npy')

        train_x = torch.FloatTensor(train_x)
        valid_x = torch.FloatTensor(valid_x)
        train_y = torch.LongTensor(train_y)
        valid_y = torch.LongTensor(valid_y)

else:
  data_folder_list = os.listdir(MedNIST_DATA_DIR)
  dataset_dict = dict()
  for category,folder in enumerate(data_folder_list):
      dataset_dict[f'{folder}'] = list()
      for img in os.listdir(f'{MedNIST_DATA_DIR}/{folder}'):
          img_array = numpy.asarray(Image.open(f'{MedNIST_DATA_DIR}/{folder}/{img}'))
          dataset_dict[f'{folder}'].append([img_array,category])

  tmp_test = list()
  tmp_train = list()
  
  for key in dataset_dict.keys():
      tmp_test.append(dataset_dict[key][:int(TEST_RATIO*len(dataset_dict[key]))])
      tmp_train.append(dataset_dict[key][int(TEST_RATIO*len(dataset_dict[key])):])

  train_x = list()
  train_y = list()
  valid_x = list()
  valid_y = list()

  for key in range(6):
      for train_data_x, train_data_y in tmp_train[key]:
          train_x.append([train_data_x])
          train_y.append(train_data_y)

      for test_data_x, test_data_y in tmp_test[key]:
          valid_x.append([test_data_x])
          valid_y.append(test_data_y)

  train_x = numpy.asarray(train_x)
  train_y = numpy.asarray(train_y)
  valid_x = numpy.asarray(valid_x)
  valid_y = numpy.asarray(valid_y)

  train_x = train_x/255.0
  valid_x = valid_x/255.0

  train_y = numpy.ravel(train_y)
  valid_y = numpy.ravel(valid_y)
  
  train_index = numpy.arange(len(train_x))
  random.Random(6).shuffle(train_index)

  train_x = train_x[train_index]
  train_y = train_y[train_index]

  test_index = numpy.arange(len(valid_x))
  random.Random(7).shuffle(test_index)

  valid_x = valid_x[test_index]
  valid_y = valid_y[test_index]

  train_x = torch.FloatTensor(train_x)
  valid_x = torch.FloatTensor(valid_x)
  train_y = torch.LongTensor(train_y)
  valid_y = torch.LongTensor(valid_y)

  numpy.save('./X_train.npy',train_x.cpu().numpy())
  numpy.save('./Y_train.npy',train_y.cpu().numpy())
  numpy.save('./X_test.npy',valid_x.cpu().numpy())
  numpy.save('./Y_test.npy',valid_y.cpu().numpy())

Dataloader parameter

In [8]:
BATCH_SIZE = 1024

Set Dataloader

In [9]:
train_set = torch.utils.data.TensorDataset(train_x, train_y)
train_loader = torch.utils.data.DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=False)

valid_set = torch.utils.data.TensorDataset(valid_x, valid_y)
valid_loader = torch.utils.data.DataLoader(valid_set, batch_size=BATCH_SIZE, shuffle=False)

Prepare Device

In [10]:
USE_CUDA = torch.cuda.is_available()

if USE_CUDA:
    device = torch.device(f'cuda:0')
    torch.cuda.set_device(device)
else:
    device = torch.device('cpu')

Config Model

In [11]:
class Conv2d_Model(nn.Module):
    def __init__(self):
        super(Conv2d_Model, self).__init__()
        self.convolution_layer1 = nn.Conv2d(1,6,3)
        self.convolution_layer2 = nn.Conv2d(6,12,3)
        self.pool = nn.MaxPool2d(2, 2)
        self.dense_layer1 = nn.Linear(14*14*12, 256)
        self.dense_layer2 = nn.Linear(256, 32)
        self.dense_layer3 = nn.Linear(32, 6)

    def forward(self,x):
        x = F.relu(self.convolution_layer1(x))
        x = self.pool(x)
        x = F.relu(self.convolution_layer2(x))
        x = self.pool(x)
        x = torch.flatten(x, 1)
        x = F.relu(self.dense_layer1(x))
        x = F.relu(self.dense_layer2(x))
        x = self.dense_layer3(x)
        return x

Create Model object

In [12]:
model = Conv2d_Model()
if USE_CUDA: model.cuda()

Define Loss Function and Optimizer 

In [13]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = nn.CrossEntropyLoss()

Parameters for training session

In [14]:
EPOCH = 100
PATIENCE_LIMIT = 5
CURRENT_PAIENCE = 0
STANDARD_VAL_LOSS = 10**9

Train

In [18]:
for epoch in range(EPOCH):
  print(f"---------------Epoch : {epoch+1}/{EPOCH}--------------------")
  train_loss = 0.0

  for train_idx, data in enumerate(train_loader, 0):
    optimizer.zero_grad()
    print('\r',f"training {train_idx+1}/{len(train_loader)}, train_loss: {train_loss:0.4f}",end=" ")
    inputs, labels = data

    outputs = model(inputs.to(device))
    loss = criterion(outputs, labels.to(device))

    loss.backward()
    optimizer.step()

    train_loss += loss.item()

  print('')
  print('\n')

  total = 0
  correct = 0
  tot_val_loss = 0.0
  acc = 0.0
    
  for val_idx, val_data in enumerate(valid_loader, 0):
    print('\r',f"validing {val_idx+1}/{len(valid_loader)}, val_loss:{tot_val_loss:0.4f}, val_acc: {acc:0.4}%", end=" ")
      
    val_inputs, val_label = val_data
    val_output = model(val_inputs.to(device))
    val_loss = criterion(val_output, val_label.to(device))
      
    prediction = torch.argmax(val_output,1)
    tot_val_loss += val_loss.item() 

    total += val_label.size(0)
    correct += (prediction == val_label.to(device)).sum().item()
    acc = 100.0*correct/total
  print('')
  print('\n')
  
  if PATIENCE_LIMIT > CURRENT_PAIENCE:

    if val_loss < STANDARD_VAL_LOSS:
      
      STANDARD_VAL_LOSS = val_loss
      best_epoch = epoch+1
      best_model = model
      CURRENT_PAIENCE = 0

    else:
      CURRENT_PAIENCE += 1
  else:break

print('')
print('\n')

if (epoch+1) != EPOCH:
  print("Early Stopping ...")

if os.path.isfile('./2D_CNN_model_parameter'):
  os.remove('./2D_CNN_model_parameter')

torch.save(best_model.state_dict(), './2D_CNN_model_parameter')
print("================================================")
print(f"model parameters from epoch {best_epoch} saved!")
print("================================================")
print("\n")
print(".. model train finished")

---------------Epoch : 1/100--------------------
 training 47/47, train_loss: 0.0088 


 validing 12/12, val_loss:0.0225, val_acc: 99.94% 





Early Stopping ...
model parameters from epoch 6 saved!


.. model train finished
