#### Import Libraries 

In [1]:
import h5py
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
from tqdm import tqdm

# PyTorch libraries and modules
import torch
from torch.nn import Linear, ReLU, LeakyReLU, CrossEntropyLoss, Sequential, Conv2d, MaxPool2d, Module, Softmax, BatchNorm2d, Dropout, MSELoss
from torch.optim import Adam, SGD

  from .autonotebook import tqdm as notebook_tqdm


#### Split the data

In [2]:
# H5_TRAIN = "RBC-data-2021-train.hdf5"
# H5_VAL = "RBC-data-2021-val.hdf5"
# H5_TEST = "RBC-data-2021-test.hdf5"

H5_TRAIN = "RBC-data-2021-train-sample.hdf5"
H5_VAL = "RBC-data-2021-val-sample.hdf5"
H5_TEST = "RBC-data-2021-test-sample.hdf5"

train_file = h5py.File(H5_TRAIN, 'r')
val_file = h5py.File(H5_VAL, 'r')
test_file = h5py.File(H5_TEST, 'r')


def create_hdf5_generator(file_path, batch_size):
    file = h5py.File(file_path)
    data_size = file['data'].shape[0]

    while True: # loop through the dataset indefinitely
        for i in np.arange(0, data_size, batch_size):
            data = file['data'][i:i+batch_size]
            labels = file['labels'][i:i+batch_size]
            # converting data into torch format
            data  = torch.from_numpy(data)
            # converting the lables into torch format
            labels = torch.from_numpy(labels)
            yield data, labels



f <HDF5 dataset "data": shape (675, 20, 8, 8), type "<f4">
f <HDF5 dataset "labels": shape (675,), type "<f4">
f <HDF5 dataset "data": shape (75, 20, 8, 8), type "<f4">
f <HDF5 dataset "labels": shape (75,), type "<f4">
f <HDF5 dataset "data": shape (188, 20, 8, 8), type "<f4">
f <HDF5 dataset "labels": shape (188,), type "<f4">


#### Creating CNN model

In [11]:
class Net(Module):   
    def __init__(self):
        super(Net, self).__init__()

        self.cnn_layers = Sequential(
            # Defining a 2D convolution layer
            Conv2d(20, 32, kernel_size=3, stride=1, padding=1),
            BatchNorm2d(32),
            LeakyReLU(inplace=True),
            # Defining another 2D convolution layer
            Conv2d(32, 32, kernel_size=3, stride=1, padding=1),
            BatchNorm2d(32),
            LeakyReLU(inplace=True)
        )

        self.linear_layers = Sequential(
            Linear(32 * 8 * 8, 37)
        )

    # Defining the forward pass    
    def forward(self, x):
        x = self.cnn_layers(x)
        x = x.view(x.size(0), -1)
        x = self.linear_layers(x)
        return x

#### Define optimizer and loss function 

In [12]:
# defining the model
model = Net()
# defining the optimizer
optimizer = Adam(model.parameters(), lr=0.07)
# defining the loss function
criterion = CrossEntropyLoss()
# checking if GPU is available
if torch.cuda.is_available():
    model = model.cuda()
    criterion = criterion.cuda()
    
print(model)

Net(
  (cnn_layers): Sequential(
    (0): Conv2d(20, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): LeakyReLU(negative_slope=0.01, inplace=True)
    (3): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): LeakyReLU(negative_slope=0.01, inplace=True)
  )
  (linear_layers): Sequential(
    (0): Linear(in_features=2048, out_features=37, bias=True)
  )
)


#### Training 

In [18]:
import numpy as np
import time

batch_size = 32

train_hdf5_generator = create_hdf5_generator(H5_TRAIN, batch_size)
val_hdf5_generator = create_hdf5_generator(H5_VAL, batch_size)

epochs = 5
min_valid_loss = np.inf

train_losses = []
val_losses = []

for e in range(epochs):
    train_loss = 0.0
    model.train()
    with tqdm(train_hdf5_generator, unit='batch') as tepoch:
        for data, labels  in tepoch:
            if labels.shape[0] < batch_size: 
                break
            tepoch.set_description(f"Epoch {e}")
            # labels = labels.to(torch.float32)
            labels = labels.to(torch.long)

            # converting the data into GPU format
            if torch.cuda.is_available():
                data, labels = data.cuda(), labels.cuda()

            # clearing the Gradients of the model parameters
            optimizer.zero_grad()
            #prediction
            target = model(data)
            loss = criterion(target, labels)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
            train_losses.append(loss.item())

            tepoch.set_postfix(loss=loss.item())
            time.sleep(0.1)

    valid_loss = 0.0
    model.eval()
    with tqdm(val_hdf5_generator, unit='batch') as vepoch:
        for data, labels in vepoch:
            if labels.shape[0] < batch_size: 
                break
            # labels = labels.to(torch.float32)
            labels = labels.to(torch.long)
            # converting the data into GPU format
            if torch.cuda.is_available():
                    data, labels = data.cuda(), labels.cuda()
            target = model(data)
            loss = criterion(target, labels)
            valid_loss = loss.item() * data.size(0)
            val_losses.append(loss.item())
            tepoch.set_postfix(loss=loss.item())
            time.sleep(0.1)
    
    print(f'Epoch {e+1} \t\t Training Loss: {train_loss / batch_size} \t\t Validation Loss: {valid_loss / batch_size}')
    if min_valid_loss > valid_loss:
        print(f'Validation Loss Decreased({min_valid_loss:.6f}--->{valid_loss:.6f}) \t Saving The Model')
        min_valid_loss = valid_loss
        # Saving State Dict
        torch.save(model.state_dict(), 'saved_model.pth')



Epoch 0: : 21batch [00:02,  7.93batch/s, loss=3.02]
2batch [00:00,  8.79batch/s]


Epoch 1 		 Training Loss: 2.1115522980690002 		 Validation Loss: 3.6648764610290527
Validation Loss Decreased(inf--->117.276047) 	 Saving The Model


Epoch 1: : 21batch [00:02,  8.27batch/s, loss=2.98]
2batch [00:00,  8.78batch/s]


Epoch 2 		 Training Loss: 2.1188453137874603 		 Validation Loss: 3.6870222091674805


Epoch 2: : 21batch [00:02,  8.30batch/s, loss=2.98]
2batch [00:00,  8.72batch/s]


Epoch 3 		 Training Loss: 2.11085394769907 		 Validation Loss: 3.7171387672424316


Epoch 3: : 21batch [00:02,  8.26batch/s, loss=2.98]
2batch [00:00,  8.69batch/s]


Epoch 4 		 Training Loss: 2.1109675616025925 		 Validation Loss: 3.712414264678955


Epoch 4: : 21batch [00:02,  8.12batch/s, loss=2.99]
2batch [00:00,  9.38batch/s]

Epoch 5 		 Training Loss: 2.119533531367779 		 Validation Loss: 3.7221405506134033





In [None]:
# plotting the training and validation loss
# plt.plot(train_losses, label='Training loss')
# plt.plot(val_losses, label='Validation loss')
# plt.legend()
# plt.show()