In [1]:
import torch
import torchvision.datasets as datasets
from torchvision import transforms
from pytorch_trainer.dataloaders import create_dataloaders
from pytorch_trainer.engine import train
from pathlib import Path
from torch.optim.lr_scheduler import ReduceLROnPlateau
import numpy as np
import os
import torch.nn as nn

2024-03-08 16:22:41.137613: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

In [3]:
transform = transforms.Compose([transforms.ToTensor(), 
                                        transforms.Normalize((0.5,), (0.5,))])

In [4]:
train_val_data = datasets.CIFAR10(root='./data', train=True, download=True, transform=transforms.ToTensor())
train_val_data_count = len(train_val_data)
train_count = int(train_val_data_count * 0.8)
val_count = train_val_data_count - train_count
train_data, val_data = torch.utils.data.random_split(train_val_data, [train_count, val_count])

train_data.classes = train_val_data.classes

Files already downloaded and verified


In [5]:
test_data = datasets.CIFAR10(root='./data', train=False, download=True, transform=transforms.ToTensor())


Files already downloaded and verified


In [6]:
class_names = train_data.classes
class_names

['airplane',
 'automobile',
 'bird',
 'cat',
 'deer',
 'dog',
 'frog',
 'horse',
 'ship',
 'truck']

In [7]:
BATCH_SIZE = 16
EPOCHS = 1000

In [8]:
from torch.utils.data import DataLoader


# Turn datasets into iterables (batches)
train_dataloader = DataLoader(train_data, # dataset to turn into iterable
    batch_size=BATCH_SIZE, # how many samples per batch? 
    shuffle=True # shuffle data every epoch?
)

val_dataloader = DataLoader(val_data,
    batch_size=BATCH_SIZE,
    shuffle=True # don't necessarily have to shuffle the testing data
)

test_dataloader = DataLoader(test_data,
    batch_size=BATCH_SIZE,
    shuffle=False # don't necessarily have to shuffle the testing data
)

In [9]:
from torch import nn

class SE_Block(nn.Module):
    def __init__(self, channel, reduction=16):
        super(SE_Block, self).__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Sequential(
            nn.Linear(channel, channel // reduction, bias=False),
            nn.ReLU(inplace=True),
            nn.Linear(channel // reduction, channel, bias=False),
            nn.Sigmoid()
        )

    def forward(self, x):
        b, c, _, _ = x.size()
        y = self.avg_pool(x).view(b, c)
        y = self.fc(y).view(b, c, 1, 1)
        return x * y.expand_as(x)

In [16]:
class SMALL_CNN(nn.Module):
    def __init__(self, dropout=0.0):
        super().__init__()

        # convolution layers
        self._body = nn.Sequential(
        nn.Conv2d(in_channels=3, out_channels=5, kernel_size=3),
        nn.BatchNorm2d(5),
        nn.ReLU(inplace=True),
        nn.MaxPool2d(kernel_size=2),
        #output 111x111x11

        SE_Block(channel=5, reduction=16),

        nn.Conv2d(in_channels=5, out_channels=11, kernel_size=3),
        nn.BatchNorm2d(11),
        nn.ReLU(inplace=True),
        nn.MaxPool2d(kernel_size=2),
        nn.Dropout(dropout)
        #54x54x22
        )

        # Fully connected layers
        self._head = nn.Sequential(
            nn.Flatten(),
            nn.Linear(in_features=11 * 6 * 6, out_features=100),
            nn.ReLU(inplace=True),
            nn.Dropout(dropout),

            nn.Linear(in_features=100, out_features=50),
            nn.ReLU(inplace=True),
            nn.Dropout(dropout),

            nn.Linear(in_features=50, out_features=10)
        )

    def forward(self, x):
        x = self._body(x)
        x = self._head(x)
        return x

In [17]:
model_1 = SMALL_CNN(dropout=0.2)
optimizer = torch.optim.Adam(model_1.parameters(),
                            lr=1e-3,
                            betas=(0.9,0.999),
                            weight_decay=0.1)
loss_fn = torch.nn.CrossEntropyLoss()

scheduler = ReduceLROnPlateau(optimizer, 'min')

In [18]:
train(
    model_1,
    train_dataloader,
    val_dataloader,
    test_dataloader,
    optimizer,
    loss_fn,
    EPOCHS,
    early_stopper_paitence=10,
    scheduler=scheduler
    )

[34m[1mTensorBoard: [0mStart with 'tensorboard --logdir /opt/anaconda3/envs/pytorch_cuda_11.8/lib/python3.11/site-packages/ipykernel_launcher/20240308-162336', view at http://localhost:6006/
[34m[1mHyperparameters: [0mepochs=1000, image_size=(3, 32, 32), batch_size=16, optimizer=Adam, lr=0.001, betas=(0.9, 0.999), weight_decay=0.1, loss=CrossEntropyLoss, scheduler=ReduceLROnPlateau, es_paitence=10, es_min_delta=0, device=cuda
[34m[1mModel Structure: [0m
Layer (type (var_name))                       Input Shape          Output Shape         Param #              Trainable
SMALL_CNN (SMALL_CNN)                         [1, 3, 32, 32]       [1, 10]              --                   True
├─Sequential (_body)                          [1, 3, 32, 32]       [1, 11, 6, 6]        --                   True
│    └─Conv2d (0)                             [1, 3, 32, 32]       [1, 5, 30, 30]       140                  True
│    └─BatchNorm2d (1)                        [1, 5, 30, 30]       [1, 5