In [1]:
from google.colab import files

In [2]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [3]:
import zipfile
import os

zip_ref = zipfile.ZipFile('/content/drive/MyDrive/nature_12K.zip', 'r') # Opens the zip file in read mode
zip_ref.extractall('/nature') # Extracts the files into the /nature folder
zip_ref.close()

In [5]:
!pip install pytorch_lightning

Collecting pytorch_lightning
  Downloading pytorch_lightning-2.5.1-py3-none-any.whl.metadata (20 kB)
Collecting torchmetrics>=0.7.0 (from pytorch_lightning)
  Downloading torchmetrics-1.7.1-py3-none-any.whl.metadata (21 kB)
Collecting lightning-utilities>=0.10.0 (from pytorch_lightning)
  Downloading lightning_utilities-0.14.3-py3-none-any.whl.metadata (5.6 kB)
[0mCollecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=2.1.0->pytorch_lightning)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=2.1.0->pytorch_lightning)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=2.1.0->pytorch_lightning)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=2.1.0->pytorch_lightning)
  Downl

In [6]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
import pytorch_lightning as pl
from torch.utils.data import DataLoader, random_split
from pytorch_lightning.loggers import WandbLogger
import wandb
import matplotlib.pyplot as plt

In [7]:
# For reproducibility
pl.seed_everything(42)

# -------------------------
# Customizable CNN Module
# -------------------------
class ConfigurableCNN(pl.LightningModule):
    def __init__(self,
                 input_channels=3,
                 conv_filters=[32, 64, 128, 128, 256],  # number of filters per conv layer
                 conv_kernel_sizes=[3, 3, 3, 3, 3],       # kernel sizes per conv layer
                 dense_neurons=512,                      # number of neurons in the dense layer
                 activation=nn.ReLU,                     # activation function
                 num_classes=10,                         # final output classes
                 learning_rate=1e-3):
        super().__init__()
        self.save_hyperparameters()

        layers = []
        in_channels = input_channels

        # Create 5 convolution blocks (conv -> activation -> maxpool)
        for i in range(5):
            out_channels = conv_filters[i]
            kernel_size = conv_kernel_sizes[i]
            # Convolution layer (padding chosen to preserve spatial dimensions)
            conv = nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=1, padding=kernel_size//2)
            layers.append(conv)
            # Configurable activation function
            layers.append(activation())
            # MaxPool layer
            layers.append(nn.MaxPool2d(kernel_size=2, stride=2))
            in_channels = out_channels

        self.conv_layers = nn.Sequential(*layers)

        # To calculate the flattened features after conv layers, we'll run a dummy input.
        self.example_input_array = torch.zeros(1, input_channels, 128, 128)
        conv_out = self.conv_layers(self.example_input_array)
        conv_out_flat_size = conv_out.view(1, -1).shape[1]

        # Fully-connected layers: one dense layer and final output layer
        self.fc1 = nn.Linear(conv_out_flat_size, dense_neurons)
        self.fc2 = nn.Linear(dense_neurons, num_classes)
        self.learning_rate = learning_rate

    def forward(self, x):
        # Run through conv layers
        x = self.conv_layers(x)
        # Flatten
        x = x.view(x.size(0), -1)
        # Dense layer with activation (using ReLU)
        x = F.relu(self.fc1(x))
        # Output layer
        x = self.fc2(x)
        return x

    def training_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.cross_entropy(logits, y)
        preds = torch.argmax(logits, dim=1)
        acc = (preds == y).float().mean()

        # Log batch-level metrics to Lightning; these will be aggregated later
        self.log('batch_train_loss', loss, on_step=True, on_epoch=True, prog_bar=True)
        self.log('batch_train_acc', acc, on_step=True, on_epoch=True, prog_bar=True)
        return {'loss': loss, 'train_acc': acc}

    def training_epoch_end(self, outputs):
        # Aggregate metrics over the epoch
        avg_loss = torch.stack([o['loss'] for o in outputs]).mean()
        avg_acc = torch.stack([o['train_acc'] for o in outputs]).mean()

        # For demonstration, assume you have the same logging in your validation step.
        # Here we log the metrics using wandb.log() manually.
        wandb.log({
            'train_loss': avg_loss.item(),
            'train_acc': avg_acc.item()
        }, step=self.current_epoch)

    def validation_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.cross_entropy(logits, y)
        preds = torch.argmax(logits, dim=1)
        acc = (preds == y).float().mean()

        # Log batch-level metrics to Lightning; these are aggregated at the end
        self.log('batch_val_loss', loss, on_step=False, on_epoch=True, prog_bar=True)
        self.log('batch_val_acc', acc, on_step=False, on_epoch=True, prog_bar=True)
        return {'val_loss': loss, 'val_acc': acc}

    def validation_epoch_end(self, outputs):
        avg_loss = torch.stack([o['val_loss'] for o in outputs]).mean()
        avg_acc = torch.stack([o['val_acc'] for o in outputs]).mean()

        # Log validation metrics to wandb
        wandb.log({
            'val_loss': avg_loss.item(),
            'val_acc': avg_acc.item()
        }, step=self.current_epoch)

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate)
        return optimizer

INFO:lightning_fabric.utilities.seed:Seed set to 42


In [8]:
from google.colab import drive
import os
import zipfile

# Mount Google Drive
drive.mount('/content/drive')

# Define paths
zip_path = '/content/drive/MyDrive/nature_12K.zip'
unzip_target_path = '/content/drive/MyDrive/nature_12K'

# Unzip only if the target folder does not exist
if not os.path.exists(unzip_target_path):
    print("Unzipping and storing in Google Drive...")
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(unzip_target_path)
    print("Unzipping complete and saved in Drive!")
else:
    print("Already unzipped in Drive. Skipping unzip.")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Unzipping and storing in Google Drive...
Unzipping complete and saved in Drive!


In [11]:
from google.colab import drive
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader
import os

# Mount Google Drive
drive.mount('/content/drive')

# Base path to your unzipped dataset
base_path = '/content/drive/MyDrive/nature_12K/inaturalist_12K'

train_dir = os.path.join(base_path, 'train')
val_dir = os.path.join(base_path, 'val')

# Define image transformations (can be tweaked later for augmentation)
transform = transforms.Compose([
    transforms.Resize((128, 128)),   # or 224x224 depending on your model
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

# Load the datasets using ImageFolder
train_dataset = datasets.ImageFolder(root=train_dir, transform=transform)
val_dataset = datasets.ImageFolder(root=val_dir, transform=transform)

# Define DataLoaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=2)

# Print class names to verify
print("Classes:", train_dataset.classes)
print(f"Train samples: {len(train_dataset)}, Val samples: {len(val_dataset)}")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Classes: ['Amphibia', 'Animalia', 'Arachnida', 'Aves', 'Fungi', 'Insecta', 'Mammalia', 'Mollusca', 'Plantae', 'Reptilia']
Train samples: 9999, Val samples: 2000
