In [1]:
import torch
import numpy as np
from torchvision import datasets
import torchvision.transforms as transforms
from torch.utils.data.sampler import SubsetRandomSampler
from torch import nn
import torch.nn.functional as F

# check if CUDA is available
train_on_gpu = torch.cuda.is_available()

if not train_on_gpu:
    print('CUDA is not available.  Training on CPU ...')
else:
    print('CUDA is available!  Training on GPU ...')

CUDA is available!  Training on GPU ...


Prepare Train / test data sets

In [2]:
num_workers = 0
batch_size = 32
valid_size = 0.2

train_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomCrop(32, padding=4),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])

test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])

train_data = datasets.CIFAR10('data', train=True,
                              download=True, transform=train_transform)
test_data = datasets.CIFAR10('data', train=False,
                             download=True, transform=test_transform)

num_train = len(train_data)
indices = list(range(num_train))
np.random.shuffle(indices)
split = int(np.floor(valid_size * num_train))
train_idx, valid_idx = indices[split:], indices[:split]

train_sampler = SubsetRandomSampler(train_idx)
valid_sampler = SubsetRandomSampler(valid_idx)

# prepare data loaders (combine dataset and sampler)
train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size,
    sampler=train_sampler, num_workers=num_workers)
valid_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, 
    sampler=valid_sampler, num_workers=num_workers)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size, 
    num_workers=num_workers)

# specify the image classes
classes = ['airplane', 'automobile', 'bird', 'cat', 'deer',
           'dog', 'frog', 'horse', 'ship', 'truck']

Files already downloaded and verified
Files already downloaded and verified


## Simple CNN with two convolutions

7 (weighted) layer 이상을 쌓으려면... pool 한번에 절반씩 줄어들기 때문에 pool을 계속 하는 것도 이상함. pool 네 번이면 2x2로 작아짐. 

pooling 없이 conv만 하면? -- pooling이나 conv나 계산량이 비슷하므로 conv를 자꾸 하는 것도 나쁘지 않을 듯. 

In [3]:
from ResNet20 import ResNet, BasicBlock

In [4]:
model = ResNet(BasicBlock,[1,1,1])

model.to('cuda')
train_on_gpu=True

#print(model)

In [5]:
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import transforms
import pytorch_lightning as pl
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping

/home/hoseung/anaconda3/envs/fhe/lib/python3.10/site-packages/lightning_utilities/core/imports.py:132: Unbuilt egg for bbsQt [unknown version] (/home/hoseung/Work/FHE/Kinect_BBS_demo)
/home/hoseung/anaconda3/envs/fhe/lib/python3.10/site-packages/lightning_utilities/core/imports.py:132: Unbuilt egg for bbsQt [unknown version] (/home/hoseung/Work/FHE/Kinect_BBS_demo)


In [6]:
class LitModel(pl.LightningModule):
    def __init__(self, model, criterion):
        super(LitModel, self).__init__()
        self.model = model
        self.criterion = criterion

    def forward(self, x):
        return self.model(x)

    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self.model(x)
        loss = self.criterion(y_hat, y)
        self.log('train_loss', loss)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self.model(x)
        loss = self.criterion(y_hat, y)
        self.log('val_loss', loss)
        return loss

    def test_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self.model(x)
        loss = self.criterion(y_hat, y)
        acc = (y_hat.argmax(dim=1) == y).float().mean()
        self.log('test_loss', loss)
        self.log('test_acc', acc)
        return {'test_loss': loss, 'test_acc': acc}

    def configure_optimizers(self):
        optimizer = optim.Adam(self.model.parameters(), lr=0.001)
        return optimizer

In [7]:
# Data preparation
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False)

# Instantiate the model and Lightning module
criterion = nn.CrossEntropyLoss()
lit_model = LitModel(model, criterion)

# Define Trainer and fit the model
checkpoint_callback = ModelCheckpoint(monitor='val_loss')
early_stop_callback = EarlyStopping(monitor='val_loss', patience=3)

trainer = Trainer(max_epochs=50, callbacks=[checkpoint_callback, early_stop_callback])
trainer.fit(lit_model, train_dataloaders=train_loader, val_dataloaders=val_loader)

# Test the model
trainer.test(dataloaders=test_loader)

/home/hoseung/anaconda3/envs/fhe/lib/python3.10/site-packages/lightning_utilities/core/imports.py:132: Unbuilt egg for bbsQt [unknown version] (/home/hoseung/Work/FHE/Kinect_BBS_demo)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
/home/hoseung/anaconda3/envs/fhe/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py:75: Starting from v1.9.0, `tensorboardX` has been removed as a dependency of the `pytorch_lightning` package, due to potential conflicts with other packages in the ML ecosystem. For this reason, `logger=True` will use `CSVLogger` as the default logger, unless the `tensorboard` or `tensorboardX` packages are found. Please `pip install lightning[extra]` or one of them to enable TensorBoard support by default
You are using a CUDA device ('NVIDIA GeForce RTX 4060 Laptop GPU') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matm

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/home/hoseung/anaconda3/envs/fhe/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:424: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=31` in the `DataLoader` to improve performance.
/home/hoseung/anaconda3/envs/fhe/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=31` in the `DataLoader` to improve performance.


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Restoring states from the checkpoint path at /home/hoseung/Work/FHE/MuxConv/scripts/lightning_logs/version_5/checkpoints/epoch=15-step=25008.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Loaded model weights from the checkpoint at /home/hoseung/Work/FHE/MuxConv/scripts/lightning_logs/version_5/checkpoints/epoch=15-step=25008.ckpt
/home/hoseung/anaconda3/envs/fhe/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:424: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=31` in the `DataLoader` to improve performance.


Testing: |          | 0/? [00:00<?, ?it/s]

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        test_acc             0.775600016117096
        test_loss           0.6556284427642822
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


[{'test_loss': 0.6556284427642822, 'test_acc': 0.775600016117096}]

In [8]:
trainer.save_checkpoint("final.pth")

In [6]:
import matplotlib.pyplot as plt 

## Validation accuracy

## 성능 

(Conv - BN - Avg Pool)
conv 5x5, same padding = 67
conv 3x3, same padding = 68
conv 3x3, valid padding = 67



--------------------------

relu + maxpool: ~62%

relu + avgpool: ~58% -- OK, maxpool -> avgpool은 큰 문제 없음. 

approx. relu + avgpool: 52% !! 

approx. relu + avgpool + BN (2Conv + 3FC, 20 epoch): 58% 정도? 

approx. relu + avgpool + BN (2Conv + 2FC, 50 epoch: 59% 


## Number of trainable parameters

In [19]:
model_parameters = filter(lambda p: p.requires_grad, model.parameters())
params = sum([np.prod(p.size()) for p in model_parameters])
print(f"We have to train {params} parameters")

We have to train 77402 parameters
