In [1]:
from src.datasets.data_module import CloudCoverDataModule
from pathlib import Path
from src.models.unet import LightningUNet
from src.training.trainer import train
from src.testing.tester import test

### Dataloader

In [2]:
data_module = CloudCoverDataModule(
    train_X_folder_path=Path("../data/final/public/train_features/"),
    train_y_folder_path=Path("../data/final/public/train_labels/"),
    test_X_folder_path=Path("../data/final/private/test_features/"),
    test_y_folder_path=Path("../data/final/private/test_labels/"),
    train_batch_size=4,
    val_batch_size=8,
    test_batch_size=8,
    val_size=0.2,
    random_state=42
)

In [3]:
data_module.prepare_data()

In [4]:
data_module.setup(stage="fit")
data_module.setup(stage="test")

### Train

In [5]:
unet = train(
    model=LightningUNet(n_channels=4, n_classes=2, bilinear=True, learning_rate=0.001),
    run_name="unet",
    model_version=0,
    data_module=data_module,
    max_epochs=10,
    patience=10
)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

   | Name          | Type               | Params
------------------------------------------------------
0  | jaccard_index | BinaryJaccardIndex | 0     
1  | inc           | DoubleConv         | 39.6 K
2  | down1         | Down               | 221 K 
3  | down2         | Down               | 886 K 
4  | down3         | Down               | 3.5 M 
5  | down4         | Down               | 4.7 M 
6  | up1           | Up                 | 5.9 M 
7  | up2           | Up                 | 1.5 M 
8  | up3           | Up                 | 369 K 
9  | up4           | Up                 | 110 K 
10 | outc          | OutConv            | 130   
------------------------------------------------------
17.3 M    Trainable params
0         Non-trainable params
17.3 M    Total params
69.072    Total estimated 

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/home/user/micromamba/envs/gif-7005-project/lib/python3.9/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=23` in the `DataLoader` to improve performance.


Sanity Checking DataLoader 0:   0%|          | 0/2 [00:00<?, ?it/s]

  return F.conv2d(input, weight, bias, self.stride,


                                                                           

/home/user/micromamba/envs/gif-7005-project/lib/python3.9/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=23` in the `DataLoader` to improve performance.


Epoch 0: 100%|██████████| 2350/2350 [24:40<00:00,  1.59it/s, v_num=0, train_loss_step=0.156, train_jaccard_step=0.960, val_loss_step=0.675, val_jaccard_step=0.455, val_loss_epoch=0.423, val_jaccard_epoch=0.740, train_loss_epoch=0.384, train_jaccard_epoch=nan.0]



Epoch 9: 100%|██████████| 2350/2350 [24:34<00:00,  1.59it/s, v_num=0, train_loss_step=0.273, train_jaccard_step=0.781, val_loss_step=0.609, val_jaccard_step=0.635, val_loss_epoch=0.276, val_jaccard_epoch=0.861, train_loss_epoch=0.224, train_jaccard_epoch=nan.0] 

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 2350/2350 [24:34<00:00,  1.59it/s, v_num=0, train_loss_step=0.273, train_jaccard_step=0.781, val_loss_step=0.609, val_jaccard_step=0.635, val_loss_epoch=0.276, val_jaccard_epoch=0.861, train_loss_epoch=0.224, train_jaccard_epoch=nan.0]


### Test  
Replace the checkpoint path with the best checkpoint from the training.

In [5]:
unet = LightningUNet.load_from_checkpoint(checkpoint_path='./logs/unet/version_0/checkpoints/unet-epoch=07-val_loss=0.26.ckpt', n_channels=4, n_classes=2)

In [6]:
test(
    model=unet,
    run_name="unet",
    model_version=0,
    data_module=data_module
)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
/home/user/micromamba/envs/gif-7005-project/lib/python3.9/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=23` in the `DataLoader` to improve performance.


Testing DataLoader 0:   0%|          | 0/1373 [00:00<?, ?it/s]

  return F.conv2d(input, weight, bias, self.stride,


Testing DataLoader 0: 100%|██████████| 1373/1373 [10:09<00:00,  2.25it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
   test_jaccard_epoch       0.8518288135528564
     test_loss_epoch        0.27107203006744385
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
