In [2]:
import os
import torch
from torch.utils.data import DataLoader
import pytorch_lightning as pl
from pytorch_lightning.loggers import WandbLogger

from Dataset import load_data
from CNN import CNN

In [3]:
def main():
    # Hyperparameters
    data_dir = "../../inaturalist_12K"
    batch_size = 32
    image_size = (512, 512)
    val_fraction = 0.2
    max_epochs = 2
    learning_rate = 1e-3

    # Load datasets
    train_set, val_set, test_set, class_names = load_data(
        data_dir=data_dir,
        val_fraction=val_fraction,
        image_size=image_size,
        batch_size=batch_size
    )

    # DataLoaders
    train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=4)
    val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=False, num_workers=4)
    test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False, num_workers=4)

    # Initialize model
    model = CNN(
        input_channels=3,
        conv_filters=[32, 64, 128, 256, 512],
        kernel_sizes=[3, 3, 3, 3, 3],
        activation='relu',
        dense_neurons=512,
        num_classes=len(class_names),
        lr=learning_rate
    )

    # PyTorch Lightning trainer
    trainer = pl.Trainer(
        max_epochs=max_epochs,
        accelerator="auto",       # Automatically uses GPU if available
        devices=1,
        log_every_n_steps=10
    )

    # Train the model
    print("Training the model...")
    trainer.fit(model, train_loader, val_loader)

    # Test the model
    print("Testing the model on the test set...")
    trainer.test(model, test_loader)

In [4]:
main()

You are using the plain ModelCheckpoint callback. Consider using LitModelCheckpoint which with seamless uploading to Model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Training the model...


2025-04-11 11:55:30.921314: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1744352731.055044  214455 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1744352731.090039  214455 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-04-11 11:55:31.419205: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.

  | Name          | Type             | Params | Mode 
-----------------------------------------------------------
0 | activa

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=2` reached.


Testing the model on the test set...


Testing: |          | 0/? [00:00<?, ?it/s]

In [5]:
import torch
print("CUDA Available:", torch.cuda.is_available())

if torch.cuda.is_available():
    print("GPU Name:", torch.cuda.get_device_name(0))
    print("GPU Count:", torch.cuda.device_count())
    print("Current Device:", torch.cuda.current_device())
else:
    print("No GPU found.")


CUDA Available: False
No GPU found.
