In [7]:
%matplotlib inline
import json

import mlflow
import mlflow.pytorch

import cv2
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import torch
import torch.nn as nn
from datasets import load_dataset

import torch.optim as optim
from torch.utils.data import DataLoader, IterableDataset
# from torchvision import transforms
# from torchvision import transforms
from sklearn.metrics import accuracy_score

from typing import Any, Tuple, List

from cv2 import Mat
from numpy import dtype, floating, integer, ndarray

from tqdm.autonotebook import tqdm

plt.rcParams["figure.figsize"] = (16, 10)  # (w, h)

Run in terminal to set up MLFlow tracking server

In [1]:
#!mlflow server --host 127.0.0.1 --port 8080

In [8]:
mlflow.set_tracking_uri(uri="http://127.0.0.1:8080")

In [9]:
with open("../data/iwildcam2020_train_annotations.json") as f:
	data = json.load(f)

annotations = pd.DataFrame.from_dict(data["annotations"])
images_metadata = pd.DataFrame.from_dict(data["images"])
categories = pd.DataFrame.from_dict(data["categories"])

In [10]:
# convert datetime type and split into day/night time
def split_day_night_time(
    data: pd.DataFrame, day_start: str = "06:00:00", day_end: str = "18:00:00"
) -> pd.DataFrame:
    data = data.copy()
    data["datetime"] = pd.to_datetime(data["datetime"])
    data["is_day"] = data["datetime"].apply(
        lambda x: True
        if pd.Timestamp(day_start).time() <= x.time() < pd.Timestamp(day_end).time()
        else False
    )
    return data

In [11]:
def preprocess_dark_images(
    image: np.ndarray,
) -> Mat | ndarray[Any, dtype[integer[Any] | floating[Any]]]:
    img = cv2.cvtColor(image, cv2.COLOR_RGB2LUV)
    img_eq = img.copy()
    img_eq[:, :, 0] = cv2.equalizeHist(img[:, :, 0])
    final_img = cv2.cvtColor(img_eq, cv2.COLOR_LUV2RGB)
    return final_img

In [12]:
from pathlib import Path

class iWildCam2020Dataset(IterableDataset):
    def __init__(
        self,
        dataset: str,
        metadata: pd.DataFrame,
        batch_size: int = 16,
        resize_dim: Tuple[int, int] | None = None,
        num_samples: int = 1000,
        mean: np.ndarray | None = None,
        std: np.ndarray | None = None,
        save_dir: str | None = None,
        overwrite: bool = False,
        split: str = "train",
        val_ratio: float = 0.2,
    ):
        super().__init__()
        self.metadata = metadata

        self.split = split
        self.val_ratio = val_ratio
        self.train_size = int((1 - val_ratio) * num_samples)
        self.val_size = num_samples - self.train_size

        self.dataset = dataset
        self.batch_size = batch_size
        self.resize_dim = resize_dim

        self.num_samples = num_samples
        if self.split == "train":
            self.num_batches = (self.train_size + batch_size - 1) // batch_size
        else:
            self.num_batches= (self.val_size + batch_size - 1) // batch_size

        self.mean = torch.tensor(mean if mean is not None else [0.0, 0.0, 0.0]).view(
            3, 1, 1
        )
        self.std = torch.tensor(std if std is not None else [1.0, 1.0, 1.0]).view(
            3, 1, 1
        )

        self.save_dir = Path(save_dir) if save_dir else None
        if self.save_dir:
            self.save_dir.mkdir(parents=True, exist_ok=True)
        self.overwrite = overwrite

    def save_image(self, img_tensor: torch.Tensor, idx: int):
        if self.save_dir:
            save_path = self.save_dir / f"image_{idx}.pt"
            torch.save(img_tensor, save_path)
    
    def load_image(self, idx: int) -> torch.Tensor | None:
        if self.save_dir:
            save_path = self.save_dir / f"image_{idx}.pt"
            if save_path.exists():
                return torch.load(save_path, weights_only=True)
        return None
    
    def __len__(self):
        return self.num_batches

    def __iter__(self):
        if self.split == "train":
            start_idx, end_idx = 0, self.train_size
        else:
            start_idx, end_idx = self.train_size, self.num_samples
        
        for idx, image_batch in enumerate(self.dataset.iter(self.batch_size)):
            # to get consistent part of dataset + val / train split
            batch_start = idx * self.batch_size
            if batch_start >= end_idx:
                break
            if batch_start < start_idx:
                continue
            
            is_day = self.metadata[idx * self.batch_size : (idx + 1) * self.batch_size][
                "is_day"
            ].values
            image_batch = image_batch["image"]
            imgs_ = []

            dark_idx = set(np.where(~is_day)[0].tolist())
            for i in range(len(image_batch)):
                img_tensor = self.load_image(idx * self.batch_size + i)
                if img_tensor is None:
                    img = np.transpose(image_batch[i].numpy())
                    if i in dark_idx:
                        img = preprocess_dark_images(img)
                    img = cv2.resize(img, self.resize_dim, interpolation=cv2.INTER_AREA)
                    img_tensor = (
                        torch.tensor(np.transpose(img, (2, 0, 1)), dtype=torch.float32)
                        / 255.0
                    )

                    if self.save_dir:
                        self.save_image(img_tensor, idx * self.batch_size + i)

                imgs_.append(img_tensor)
            yield torch.stack(imgs_)

In [13]:
def calculate_mean_std(dataset, batch_size=32, resize_dim=(224, 224), num_samples=1000):
    means = []
    stds = []
    for idx, image_batch in tqdm(enumerate(dataset.iter(batch_size)), total = ((num_samples + batch_size - 1) // batch_size)):
        if idx * batch_size >= num_samples:
            break

        imgs_ = []
        for image in image_batch["image"]:
            img = np.transpose(image.numpy(), (1, 2, 0))
            img = cv2.resize(img, resize_dim, interpolation=cv2.INTER_AREA)
            img = img / 255.0
            imgs_.append(img)

        imgs_array = np.stack(imgs_)
        means.append(imgs_array.mean(axis=(0, 1, 2)))
        stds.append(imgs_array.std(axis=(0, 1, 2)))

    mean = np.mean(means, axis=0)
    std = np.mean(stds, axis=0)
    return mean, std

In [16]:
import os
from datetime import datetime

def get_unique_model_path(base_path):
    if not os.path.exists(base_path):
        return base_path
    
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    unique_path = f"{base_path}_{timestamp}.pt"
    
    while os.path.exists(unique_path):
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        unique_path = f"{base_path}_{timestamp}.pt"
    
    return unique_path

In [17]:
def train(
    model,
    criterion,
    optimizer,
    train_loader,
    val_loader,
    batch_size,
    device,
    num_epochs=1,
    ckpt_path="models/best.pt"
):
    
    ckpt_path = get_unique_model_path(ckpt_path)
    best = 0.0

    with mlflow.start_run():
        mlflow.log_param("model", model)
        mlflow.log_param("criterion", criterion)
        mlflow.log_param("optimizer", optimizer)
        mlflow.log_param("model_path", ckpt_path)
        mlflow.log_param("num_epochs", num_epochs)
        mlflow.log_param("batch_size", batch_size)

        for epoch in range(num_epochs):
            train_loop = tqdm(
                enumerate(train_loader, 0),
                total=len(train_loader),
                desc=f"Epoch {epoch}: train",
            )

            model.train()
            train_loss = 0.0

            for i, batch in train_loop:
                images = batch.to(device)
                labels = torch.tensor(
                    annotations["category_id"][
                        epoch * (len(train_loader) * batch_size) + batch_size * i : min(
                            epoch * (len(train_loader) * batch_size) + batch_size * (i + 1),
                            len(annotations["category_id"]),
                        )
                    ].values
                ).to(device)

                optimizer.zero_grad()
                outputs = model(images)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()

                train_loss += loss.item()
                train_loop.set_postfix({"loss": loss.item()})

            correct = 0
            total = 0
            val_loss = 0
            with torch.no_grad():
                model.eval()

                val_loop = tqdm(
                    enumerate(val_loader, 0),
                    total=len(val_loader),
                    desc=f"Val",
                )

                for i, batch in val_loop:
                    images = batch.to(device)
                    labels = torch.tensor(
                        annotations["category_id"][
                            epoch * len(train_loader) * batch_size
                            + batch_size * i : min(
                                epoch * len(train_loader) * batch_size
                                + batch_size * (i + 1),
                                len(annotations["category_id"]),
                            )
                        ].values
                    ).to(device)

                    outputs = model(images)
                    _, predicted = torch.max(outputs, 1)

                    loss = criterion(outputs, labels)
                    val_loss += loss.item()

                    total += labels.size(0)
                    correct += (predicted == labels).sum().item()
                    val_loop.set_postfix({"acc": correct / total, "loss": val_loss / (i + 1)})


                val_accuracy = correct / total

                mlflow.log_metric("train_loss", train_loss / len(train_loader), step=epoch)
                mlflow.log_metric("val_accuracy", val_accuracy, step=epoch)
                mlflow.log_metric("val_loss", val_loss / len(val_loader), step=epoch)
                
                print(f"Epoch [{epoch + 1}/{num_epochs}], Training Loss: {train_loss / len(train_loader):.6f}")
                print(f"Epoch [{epoch + 1}/{num_epochs}], Validation Accuracy: {val_accuracy:.6f}, Validation Loss: {val_loss:.6f}")
                
                if val_accuracy > best:
                    best = val_accuracy
                    torch.save(model.state_dict(), ckpt_path)
                    mlflow.pytorch.log_model(model, f"{ckpt_path[:-2]}")

In [18]:
dataset = load_dataset(
    "anngrosha/iWildCam2020", split="train", streaming=True
).with_format("torch")

Resolving data files:   0%|          | 0/190 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/52 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/190 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/52 [00:00<?, ?it/s]

In [19]:
images_metadata = split_day_night_time(images_metadata)

In [20]:
batch_size = 5
img_size = 224
resize_dim = (img_size, img_size)
num_classes = max(annotations["category_id"])

num_samples = 5
val_ratio = 0

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

Simple CNN

In [None]:
class SimpleCNN(nn.Module):
    def __init__(self, num_classes):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(32 * 56 * 56, 128)
        self.fc2 = nn.Linear(128, num_classes)
        self.pool = nn.MaxPool2d(2, 2)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.pool(self.relu(self.conv1(x)))
        x = self.pool(self.relu(self.conv2(x)))
        x = x.view(-1, 32 * 56 * 56)
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [21]:
mean, std = calculate_mean_std(
    dataset, batch_size=batch_size, resize_dim=resize_dim, num_samples=num_samples
)
mean, std

  0%|          | 0/1 [00:00<?, ?it/s]

(array([0.28920304, 0.30016854, 0.272847  ]),
 array([0.248005  , 0.25105431, 0.26049172]))

In [22]:
train_dataset = iWildCam2020Dataset(
    dataset=dataset,
    metadata=images_metadata,
    batch_size=batch_size,
    resize_dim=resize_dim,
    num_samples=num_samples,
    mean=mean,
    std=std,
    save_dir="/working/data/train",
    split="train",
    val_ratio=val_ratio
)

val_dataset = iWildCam2020Dataset(
    dataset=dataset,
    metadata=images_metadata,
    batch_size=batch_size,
    resize_dim=resize_dim,
    num_samples=num_samples,
    mean=mean,
    std=std,
    save_dir="/working/data/val",
    split="val",
    val_ratio=val_ratio
)


train_loader = DataLoader(train_dataset, batch_size=None)
val_loader = DataLoader(val_dataset, batch_size=None)

In [23]:
model = SimpleCNN(num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adadelta(model.parameters(), lr=1)

  _torch_pytree._register_pytree_node(


In [24]:
# train(
#     model,
#     criterion,
#     optimizer,
#     batch_loader,
#     #val_loader,
#     batch_loader,
#     batch_size,
#     device,
#     num_epochs=20
# )

Performing singe-batch overfitting to see if model capable enought for our task

In [26]:
batch_dataset = iWildCam2020Dataset(
    dataset=dataset,
    metadata=images_metadata,
    batch_size=batch_size,
    resize_dim=resize_dim,
    num_samples=num_samples,
    mean=mean,
    std=std,
    save_dir="/working/data/train",
    split="train",
    val_ratio=0
)
batch_loader = DataLoader(batch_dataset, batch_size=None)

train(
    model,
    criterion,
    optimizer,
    batch_loader,
    batch_loader,
    batch_size,
    device,
    num_epochs=3,
    ckpt_path='models/simpleCNN.pt'
)

Epoch 0: train:   0%|          | 0/1 [00:00<?, ?it/s]

Val:   0%|          | 0/1 [00:00<?, ?it/s]

Epoch [1/3], Training Loss: 5.809134
Epoch [1/3], Validation Accuracy: 0.200000, Validation Loss: 4.822841




Epoch 1: train:   0%|          | 0/1 [00:00<?, ?it/s]

Val:   0%|          | 0/1 [00:00<?, ?it/s]

Epoch [2/3], Training Loss: 4.814448
Epoch [2/3], Validation Accuracy: 0.600000, Validation Loss: 2.278366




Epoch 2: train:   0%|          | 0/1 [00:00<?, ?it/s]

Val:   0%|          | 0/1 [00:00<?, ?it/s]

2024/10/27 16:13:19 INFO mlflow.tracking._tracking_service.client: 🏃 View run treasured-mole-198 at: http://127.0.0.1:8080/#/experiments/0/runs/f0807d87eb5f41b092fcb449651bbfd3.
2024/10/27 16:13:19 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:8080/#/experiments/0.


Epoch [3/3], Training Loss: 4.153296
Epoch [3/3], Validation Accuracy: 0.600000, Validation Loss: 1.631622
