## Imports

In [1]:
import os
import argparse
import yaml

import load_data, models, train

import torch
from torchsummary import summary

## Environment variables

In [2]:
# yaml_data = './configs/config.yaml'

# with open(yaml_data) as file:
#     config = yaml.safe_load(file)

use_gpu = True

In [3]:
# Training Device
if use_gpu:
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
else:
    device = torch.device('cpu')

## Data path

In [4]:
# Datasets
data_root = f'./data'

train_dir = os.path.join(data_root, 'train')
val_dir = None
test_dir = os.path.join(data_root, 'test')

In [5]:
print('Num classes:', len(os.listdir(train_dir)))

Num classes: 10


## Project Parameters

In [6]:
root_dir = os.path.abspath('./')

In [7]:
# Train Id is the unique name of the current training process.
# It should be a descriptive name for the current training setup.
train_id = 'Default_10_Notebook_Train_1'

In [8]:
# The model's final checkpoint will be saved in {model_dir}
model_dir = f'{root_dir}/model/{train_id}'
# All other checkpoints are saved in {checkpoints_dir}
checkpoints_dir = f'{root_dir}/Checkpoint/{train_id}'

save_model_path = f"{model_dir}/final.pth"

if not os.path.exists(model_dir):
    os.makedirs(model_dir)
if not os.path.exists(checkpoints_dir):
    os.makedirs(checkpoints_dir)

## Training Parameters

In [9]:
# Network parameters
num_epochs = 301
num_classes = len(os.listdir(train_dir))
input_shape = (160, 50)

In [10]:
# Batch sizes
train_batch_size = 64
val_batch_size = 1
test_batch_size = 32

#### Start the training from scratch

In [11]:
start_epoch = 0
load_model_path = None

#### In case if you want to continue your training from a certain checkpoint

In [12]:
# start_epoch = 6
# load_model_path = f'{checkpoints_dir}/5.pth'

## Data Loaders

In [13]:
train_data, train_classes, train_proportions = load_data.load_images(train_dir, train_batch_size, 'train')
val_data, val_classes, _ = load_data.load_images(val_dir, val_batch_size, 'val') if val_dir else (None, None, None)
test_data, test_classes, _ = load_data.load_images(test_dir, test_batch_size, 'test') if test_dir else (None, None, None)

Loading 3009 images from ./data/train is over.
Loading 571 images from ./data/test is over.


## Training Setup

In [14]:
net = models.Model(num_classes=num_classes, input_shape=input_shape).to(device)

### Layers

In [15]:
print(net)

Model(
  (classifier): Classifier(
    (act1): LeakyReLU(negative_slope=0.05)
    (act2): Softmax(dim=1)
    (conv11): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=same)
    (conv12): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=same)
    (conv13): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=same)
    (mp1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (drop1): Dropout(p=0.2, inplace=False)
    (conv21): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=same)
    (conv22): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=same)
    (conv23): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=same)
    (mp2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (drop2): Dropout(p=0.2, inp

### Output Summary

In [16]:
summary(net, [1, 160, 50], verbose=0)

Layer (type:depth-idx)                   Output Shape              Param #
├─Classifier: 1-1                        [-1, 10]                  --
|    └─Conv2d: 2-1                       [-1, 32, 160, 50]         320
|    └─LeakyReLU: 2-2                    [-1, 32, 160, 50]         --
|    └─Conv2d: 2-3                       [-1, 32, 160, 50]         9,248
|    └─LeakyReLU: 2-4                    [-1, 32, 160, 50]         --
|    └─Conv2d: 2-5                       [-1, 32, 160, 50]         9,248
|    └─LeakyReLU: 2-6                    [-1, 32, 160, 50]         --
|    └─MaxPool2d: 2-7                    [-1, 32, 80, 25]          --
|    └─BatchNorm2d: 2-8                  [-1, 32, 80, 25]          64
|    └─Dropout: 2-9                      [-1, 32, 80, 25]          --
|    └─Conv2d: 2-10                      [-1, 64, 80, 25]          18,496
|    └─LeakyReLU: 2-11                   [-1, 64, 80, 25]          --
|    └─Conv2d: 2-12                      [-1, 64, 80, 25]          36,928


In [17]:
if load_model_path:
    net.load_state_dict(torch.load(load_model_path))

In [18]:
print('\nTraining started:')

net = train.train_model(
    net,
    train=train_data,
    val=val_data,
    test=test_data,
    epochs=num_epochs,
    start_epoch=start_epoch,
    device=device,
    model_folder=checkpoints_dir,
    train_id=train_id,
    classes=test_classes,
    train_proportions=train_proportions
)


Training started:
Epoch 0/300


KeyboardInterrupt: 

### Saving Final Checkpoint

In [None]:
torch.save(net.state_dict(), save_model_path)