In [1]:
import datetime
import importlib
import sys
from pathlib import Path

import dotenv
import hydra
import pytorch_lightning as pl
import torch
from matplotlib import pyplot as plt
from tqdm import tqdm
import numpy as np
from inz.data.data_module import XBDDataModule
from inz.data.event import Event, Hold, Tier1, Tier3, Test
from inz.util import show_masks_comparison
from PIL import Image
from torchvision.utils import draw_segmentation_masks, make_grid  # type: ignore[import-untyped]
from torchvision.io import read_image  # type: ignore[import-untyped]
from inz.util import get_wandb_logger

import torchvision.transforms as T
from inz.data.data_module_floodnet import FloodNetModule
from inz.models.base_pl_module import BasePLModule

sys.path.append("inz/farseg")
sys.path.append("inz/dahitra")

In [2]:
dotenv.load_dotenv()
RANDOM_SEED = 123
pl.seed_everything(RANDOM_SEED)
device = torch.device("cuda")
torch.set_float32_matmul_precision("high")

Seed set to 123


In [3]:
# FarSeg double
# CKPT_PATH = "/home/tomek/inz/inz/saved_checkpoints/runs/farseg-double/checkpoints/experiment_name-0-epoch-39-step-39000-f1-0.660326-best-f1.ckpt"
# CONFIG_PATH = "../saved_checkpoints/runs/farseg-double/.hydra"

# FarSeg single
# CKPT_PATH = "/home/tomek/inz/inz/saved_checkpoints/runs/farseg_single/2024-10-25_00-48-01/checkpoints/experiment_name-0-epoch-28-step-28275-challenge_score_safe-0.6489-best-challenge-score.ckpt"
# CONFIG_PATH = "../saved_checkpoints/runs/farseg_single/2024-10-25_00-48-01/.hydra"

# Baseline double
# CKPT_PATH = "/home/tomek/inz/inz/outputs/split_wind_test_hurricane_matthew_baseline/2024-11-06_05-02-23/checkpoints/experiment_name-0-epoch-12-step-1560-challenge_score_safe-0.6650-best-challenge-score.ckpt"
# CONFIG_PATH = "../outputs/split_wind_test_hurricane_matthew_baseline/2024-11-06_05-02-23/.hydra"

# Baseline single
CKPT_PATH = "/home/tomek/inz/inz/saved_checkpoints/runs/baseline_singlebranch/baseline_singlebranch_ckpt/baseline_singlebranch-epoch=33-step=16592-challenge_score_safe=0.639932-best-challenge-score.ckpt"
CONFIG_PATH = "../saved_checkpoints/runs/baseline_singlebranch/.hydra_lol"
from functools import partialmethod
def floodnet_module_adapter(model_class):
    def forward(self, x):
        preds = self.model(x)
        return torch.cat([preds[:, :2, ...], preds[:, 2:, ...].max(dim=1, keepdim=True).values], dim=1)

    class_init = model_class.__init__

    # This is not how this works. This is not how any of this works.
    base_class = model_class.__bases__[0]
    # base_class = [c for c in model_class.__mro__ if c is BasePLModule][0]

    model_class.__init__ = partialmethod(class_init, n_classes=3)
    base_class.forward = forward
    return model_class

from hydra import compose, initialize

with initialize(version_base="1.3", config_path=CONFIG_PATH):
    cfg = compose(config_name="config", overrides=[])

model_class_str = cfg["module"]["module"]["_target_"]
model_class_name = model_class_str.split(".")[-1]
module_path = ".".join(model_class_str.split(".")[:-1])
imported_module = importlib.import_module(module_path)
model_class = floodnet_module_adapter(getattr(imported_module, model_class_name))
model_partial = hydra.utils.instantiate(cfg["module"]["module"])

model = model_class.load_from_checkpoint(CKPT_PATH, *model_partial.args, **model_partial.keywords).to(device)

model.class_weights = torch.Tensor([0.01, 0.5, 1.]).to(device)

def optimizer_factory_override(params):
    return torch.optim.AdamW(params=params, lr=0.00002, weight_decay=1e-6)
model.optimizer_factory = optimizer_factory_override

model.train()

using weights from ResNet34_Weights.IMAGENET1K_V1


SingleBranchBaselinePLModule(
  (model): BaselineSingleBranchModule(
    (conv6): ConvRelu(
      (layer): Sequential(
        (0): Conv2d(512, 320, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (1): ReLU(inplace=True)
      )
    )
    (conv6_2): ConvRelu(
      (layer): Sequential(
        (0): Conv2d(576, 320, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (1): ReLU(inplace=True)
      )
    )
    (conv7): ConvRelu(
      (layer): Sequential(
        (0): Conv2d(320, 160, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (1): ReLU(inplace=True)
      )
    )
    (conv7_2): ConvRelu(
      (layer): Sequential(
        (0): Conv2d(288, 160, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (1): ReLU(inplace=True)
      )
    )
    (conv8): ConvRelu(
      (layer): Sequential(
        (0): Conv2d(160, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (1): ReLU(inplace=True)
      )
    )
    (conv8_2): ConvRelu(
      (layer): Seq

In [4]:
BATCH_SIZE = cfg["datamodule"]["datamodule"]["train_batch_size"]

# dm = XBDDataModule(
#     path=Path("data/xBD_processed_512"),
#     drop_unclassified_channel=True,
#     events={
#         Tier1: [
#                 Event.hurricane_matthew,
#             ],
#             Test: [
#                 Event.hurricane_matthew,
#             ],
#             Hold: [
#                 Event.hurricane_matthew,
#             ],
#     },
#     train_batch_size=BATCH_SIZE,
#     val_batch_size=BATCH_SIZE,
#     test_batch_size=BATCH_SIZE,
#     val_fraction=0.2,
#     test_fraction=0.,
#     num_workers=2,
# )

dm = FloodNetModule(
    path=Path("data/floodnet_processed_512/FloodNet-Supervised_v1.0"),
    train_batch_size=BATCH_SIZE,
    val_batch_size=BATCH_SIZE,
    test_batch_size=BATCH_SIZE,
    transform=T.Compose(
        transforms=[
            T.RandomHorizontalFlip(p=0.5),
            T.RandomApply(
                p=0.6, transforms=[T.RandomAffine(degrees=(-10, 10), scale=(0.9, 1.1), translate=(0.1, 0.1))]
            ),
        ]
    ),
)

dm.prepare_data()
dm.setup("train")


In [5]:
wandb_logger = get_wandb_logger(
    run_name=f"delete-me-{datetime.datetime.now().replace(microsecond=0).isoformat()}",
    project="inz",
    # watch_model=True,
    # watch_model_log_frequency=500,
    # watch_model_model=model,
)

trainer = pl.Trainer(
    accelerator="gpu",
    max_epochs=50,
    precision="bf16-mixed",
    deterministic=True,
    sync_batchnorm=True,
    callbacks=[
        pl.callbacks.RichProgressBar()
    ],
    log_every_n_steps=15,
    logger=wandb_logger
)

trainer.validate(model=model, datamodule=dm)
trainer.fit(model=model, datamodule=dm)

[34m[1mwandb[0m: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mtomasz-owienko-stud[0m ([33mtomasz-owienko-stud-warsaw-university-of-technology[0m). Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /home/tomek/.netrc
Using bfloat16 Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
/home/tomek/inz/inz/.venv/lib/python3.11/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Output()

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Output()