In [1]:
from torchvision.transforms.v2 import ToTensor

VERSION = '001'
BATCHSIZE = 32
MAX_EPOCHS = 50
BACKBONE = "resnet50"
MODEL_NAME = "ResNet50_transfer"
NUM_WORKERS = 31

In [2]:
import torch
from torchvision import transforms
import torch.nn as nn
import os
import pytorch_lightning as pl  # Add this import
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
from pytorch_lightning.loggers import TensorBoardLogger
from code.dataloader import PneumoniaDataset
from code.custom_checkpoint import CustomModelCheckpoint
from code.project_globals import TEST_DIR, TRAIN_DIR, VAL_DIR


In [3]:
# Enable CUDA Launch Blocking
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
# Set float32 matrix multiplication precision to 'medium' to utilize Tensor Cores
torch.set_float32_matmul_precision('medium')

In [4]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
    print('Not connected to a GPU')
else:
    print(gpu_info)

Tue Nov 19 19:26:28 2024       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 565.90                 Driver Version: 565.90         CUDA Version: 12.7     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                  Driver-Model | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce RTX 4070 Ti   WDDM  |   00000000:02:00.0  On |                  N/A |
|  0%   44C    P8             14W /  285W |    8486MiB /  12282MiB |     13%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [5]:
import torch
from torchvision import transforms
from code.classifier import PneumoniaClassifier, Config

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

config = Config(
    backbone_name="resnet50",
    transfer_learning=True,
    learning_rate=1e-3,
    batch_size=32,
    max_epochs=50,
    weight_decay=1e-4,
    dropout=0.5,
    num_workers=31,
    model_name="ResNet50_transfer",
    version="001"
)

model = PneumoniaClassifier(config, transform)
model.train_model()


Using 16bit Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name              | Type            | Params | Mode 
--------------------------------------------------------------
0 | accuracy          | BinaryAccuracy  | 0      | train
1 | precision         | BinaryPrecision | 0      | train
2 | recall            | BinaryRecall    | 0      | train
3 | f1                | BinaryF1Score   | 0      | train
4 | feature_extractor | Sequential      | 23.5 M | train
5 | dropout           | Dropout         | 0      | train
6 | classifier        | Linear          | 4.1 K  | train
--------------------------------------------------------------
4.1 K     Trainable params
23.5 M    Non-trainable params
23.5 M    Total params
94.049    Total estimated model params size (MB)
156       Modules in train mode
0         Modules in eval mode


Epoch 0: 100%|██████████| 163/163 [00:26<00:00,  6.22it/s, v_num=2, train_acc_step=0.879, train_precision_step=0.888, train_recall_step=0.958, train_f1_step=0.922]
Validation: |          | 0/? [00:00<?, ?it/s][A
Validation:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 12.77it/s][A
Epoch 0: 100%|██████████| 163/163 [00:26<00:00,  6.07it/s, v_num=2, train_acc_step=0.879, train_precision_step=0.888, train_recall_step=0.958, train_f1_step=0.922, val_acc_step=0.879, val_precision_step=0.888, val_recall_step=0.958, val_f1_step=0.921, val_acc_epoch=0.879, val_precision_epoch=0.888, val_recall_epoch=0.958, val_f1_epoch=0.921]

Metric val_loss improved. New best score: 0.367


Epoch 1: 100%|██████████| 163/163 [00:26<00:00,  6.26it/s, v_num=2, train_acc_step=0.938, train_precision_step=0.954, train_recall_step=0.962, train_f1_step=0.958, val_acc_step=0.879, val_precision_step=0.888, val_recall_step=0.958, val_f1_step=0.921, val_acc_epoch=0.879, val_precision_epoch=0.888, val_recall_epoch=0.958, val_f1_epoch=0.921, train_acc_epoch=0.000, train_precision_epoch=0.000, train_recall_epoch=0.000, train_f1_epoch=0.000]
Validation: |          | 0/? [00:00<?, ?it/s][A
Validation:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 14.29it/s][A
Epoch 1: 100%|██████████| 163/163 [00:26<00:00,  6.10it/s, v_num=2, train_acc_step=0.938, train_precision_step=0.954, train_recall_step=0.962, train_f1_step=0.958, val_acc_step=0.937, val_precision_step=0.954, val_recall_step=0.962, val_f1_step=0.958, val_acc_epoch=0.937, val_precision_epoch=0.954, val_recall_epo

Metric val_loss improved by 0.030 >= min_delta = 0.0. New best score: 0.337


Epoch 2: 100%|██████████| 163/163 [00:26<00:00,  6.27it/s, v_num=2, train_acc_step=0.945, train_precision_step=0.957, train_recall_step=0.969, train_f1_step=0.963, val_acc_step=0.937, val_precision_step=0.954, val_recall_step=0.962, val_f1_step=0.958, val_acc_epoch=0.937, val_precision_epoch=0.954, val_recall_epoch=0.962, val_f1_epoch=0.958, train_acc_epoch=0.000, train_precision_epoch=0.000, train_recall_epoch=0.000, train_f1_epoch=0.000]
Validation: |          | 0/? [00:00<?, ?it/s][A
Validation:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  8.84it/s][A
Epoch 2: 100%|██████████| 163/163 [00:26<00:00,  6.10it/s, v_num=2, train_acc_step=0.945, train_precision_step=0.957, train_recall_step=0.969, train_f1_step=0.963, val_acc_step=0.945, val_precision_step=0.957, val_recall_step=0.969, val_f1_step=0.963, val_acc_epoch=0.945, val_precision_epoch=0.957, val_recall_epo

Metric val_loss improved by 0.053 >= min_delta = 0.0. New best score: 0.285


Epoch 3: 100%|██████████| 163/163 [00:28<00:00,  5.80it/s, v_num=2, train_acc_step=0.951, train_precision_step=0.964, train_recall_step=0.970, train_f1_step=0.967, val_acc_step=0.945, val_precision_step=0.957, val_recall_step=0.969, val_f1_step=0.963, val_acc_epoch=0.945, val_precision_epoch=0.957, val_recall_epoch=0.969, val_f1_epoch=0.963, train_acc_epoch=0.000, train_precision_epoch=0.000, train_recall_epoch=0.000, train_f1_epoch=0.000]
Validation: |          | 0/? [00:00<?, ?it/s][A
Validation:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 10.36it/s][A
Epoch 3: 100%|██████████| 163/163 [00:28<00:00,  5.63it/s, v_num=2, train_acc_step=0.951, train_precision_step=0.964, train_recall_step=0.970, train_f1_step=0.967, val_acc_step=0.950, val_precision_step=0.963, val_recall_step=0.970, val_f1_step=0.967, val_acc_epoch=0.950, val_precision_epoch=0.963, val_recall_epo

Metric val_loss improved by 0.064 >= min_delta = 0.0. New best score: 0.220


Epoch 4: 100%|██████████| 163/163 [00:27<00:00,  5.90it/s, v_num=2, train_acc_step=0.952, train_precision_step=0.966, train_recall_step=0.970, train_f1_step=0.968, val_acc_step=0.950, val_precision_step=0.963, val_recall_step=0.970, val_f1_step=0.967, val_acc_epoch=0.950, val_precision_epoch=0.963, val_recall_epoch=0.970, val_f1_epoch=0.967, train_acc_epoch=0.000, train_precision_epoch=0.000, train_recall_epoch=0.000, train_f1_epoch=0.000]
Validation: |          | 0/? [00:00<?, ?it/s][A
Validation:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 10.63it/s][A
Epoch 5: 100%|██████████| 163/163 [00:28<00:00,  5.72it/s, v_num=2, train_acc_step=0.954, train_precision_step=0.968, train_recall_step=0.970, train_f1_step=0.969, val_acc_step=0.952, val_precision_step=0.966, val_recall_step=0.970, val_f1_step=0.968, val_acc_epoch=0.952, val_precision_epoch=0.966, val_recall_epo

Metric val_loss improved by 0.029 >= min_delta = 0.0. New best score: 0.191


Epoch 7: 100%|██████████| 163/163 [00:25<00:00,  6.30it/s, v_num=2, train_acc_step=0.960, train_precision_step=0.971, train_recall_step=0.975, train_f1_step=0.973, val_acc_step=0.955, val_precision_step=0.970, val_recall_step=0.970, val_f1_step=0.970, val_acc_epoch=0.955, val_precision_epoch=0.970, val_recall_epoch=0.970, val_f1_epoch=0.970, train_acc_epoch=0.000, train_precision_epoch=0.000, train_recall_epoch=0.000, train_f1_epoch=0.000]
Validation: |          | 0/? [00:00<?, ?it/s][A
Validation:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 10.00it/s][A
Epoch 8: 100%|██████████| 163/163 [00:25<00:00,  6.34it/s, v_num=2, train_acc_step=0.960, train_precision_step=0.970, train_recall_step=0.975, train_f1_step=0.973, val_acc_step=0.959, val_precision_step=0.970, val_recall_step=0.975, val_f1_step=0.973, val_acc_epoch=0.959, val_precision_epoch=0.970, val_recall_epo

Metric val_loss improved by 0.037 >= min_delta = 0.0. New best score: 0.154


Epoch 9: 100%|██████████| 163/163 [00:25<00:00,  6.37it/s, v_num=2, train_acc_step=0.960, train_precision_step=0.974, train_recall_step=0.973, train_f1_step=0.973, val_acc_step=0.959, val_precision_step=0.970, val_recall_step=0.975, val_f1_step=0.973, val_acc_epoch=0.959, val_precision_epoch=0.970, val_recall_epoch=0.975, val_f1_epoch=0.973, train_acc_epoch=0.000, train_precision_epoch=0.000, train_recall_epoch=0.000, train_f1_epoch=0.000]
Validation: |          | 0/? [00:00<?, ?it/s][A
Validation:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|          | 0/1 [00:00<?, ?it/s][A
Validation DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 12.82it/s][A
Epoch 10: 100%|██████████| 163/163 [00:25<00:00,  6.40it/s, v_num=2, train_acc_step=0.960, train_precision_step=0.972, train_recall_step=0.974, train_f1_step=0.973, val_acc_step=0.960, val_precision_step=0.973, val_recall_step=0.973, val_f1_step=0.973, val_acc_epoch=0.960, val_precision_epoch=0.973, val_recall_ep

Monitored metric val_loss did not improve in the last 3 records. Best score: 0.154. Signaling Trainer to stop.


Epoch 11: 100%|██████████| 163/163 [00:25<00:00,  6.35it/s, v_num=2, train_acc_step=0.960, train_precision_step=0.972, train_recall_step=0.975, train_f1_step=0.973, val_acc_step=0.960, val_precision_step=0.971, val_recall_step=0.975, val_f1_step=0.973, val_acc_epoch=0.960, val_precision_epoch=0.971, val_recall_epoch=0.975, val_f1_epoch=0.973, train_acc_epoch=0.000, train_precision_epoch=0.000, train_recall_epoch=0.000, train_f1_epoch=0.000]


In [17]:
from torchvision.transforms import Compose, Resize, InterpolationMode, ToTensor, Normalize
import torch

# Add Compose to the safe globals list
torch.serialization.add_safe_globals([Compose, Resize, set, Config, InterpolationMode, ToTensor, Normalize])

# Load the checkpoint with weights_only=True
checkpoint_path = '../checkpoints/ResNet50_transfer-epoch=08-val_loss=0.15_v001.ckpt'
checkpoint = torch.load(checkpoint_path, weights_only=True)

# Access the metadata
metadata = checkpoint.get("metadata", {})

In [20]:
# Initialize the model
model = PneumoniaClassifier(config, transform)

# Path to the checkpoint
checkpoint_path = '../checkpoints/ResNet50_transfer-epoch=08-val_loss=0.15_v001.ckpt'

# Test the model
metadata = model.test_model(checkpoint_path)

# Print the metadata
print(metadata)

Using 16bit Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing DataLoader 0: 100%|██████████| 20/20 [00:18<00:00,  1.10it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
     test_acc_epoch         0.48076921701431274
      test_acc_step         0.6751185059547424
      test_f1_epoch         0.38167938590049744
      test_f1_step          0.2445952147245407
        test_loss           0.7132286429405212
  test_precision_epoch       0.746268630027771
   test_precision_step      0.37489965558052063
    test_recall_epoch       0.25641027092933655
    test_recall_step        0.19538259506225586
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
None


In [19]:

# Load the TensorBoard extension
%load_ext tensorboard

# Start TensorBoard and point it to the log directory
%tensorboard --logdir tb_logs/ResNet50_transfer/version_2

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


Reusing TensorBoard on port 6007 (pid 39112), started 2 days, 4:06:44 ago. (Use '!kill 39112' to kill it.)

# Add Regularization