In [10]:
!wget https://zenodo.org/record/5610000/files/resnet50-sentinel2.pt
#!pip install rasterio matplotlib torchgeo wandb

--2023-01-21 00:17:15--  https://zenodo.org/record/5610000/files/resnet50-sentinel2.pt
Resolving zenodo.org (zenodo.org)... 188.185.124.72
Connecting to zenodo.org (zenodo.org)|188.185.124.72|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 94566017 (90M) [application/octet-stream]
Saving to: ‘resnet50-sentinel2.pt’


2023-01-21 00:17:42 (3.60 MB/s) - ‘resnet50-sentinel2.pt’ saved [94566017/94566017]



In [15]:
def mount_bucket(bucket_name):
  """
  This fn authenticates your google account and mounts to you Google Cloud Platform bucket
  """
  !echo "deb http://packages.cloud.google.com/apt gcsfuse-`lsb_release -c -s` main" | sudo tee /etc/apt/sources.list.d/gcsfuse.list
  !curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | sudo apt-key add -
  !sudo apt-get -y -q update
  !sudo apt-get -y -q install gcsfuse

  from google.colab import auth
  auth.authenticate_user()
  !mkdir -p gcp_bucket 
  !gcsfuse --implicit-dirs --limit-bytes-per-sec -1 --limit-ops-per-sec -1 {bucket_name} gcp_bucket

mount_bucket('finlandforest')
%cd drive/MyDrive/finland_forest/

deb http://packages.cloud.google.com/apt gcsfuse-bionic main
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  1210  100  1210    0     0  30250      0 --:--:-- --:--:-- --:--:-- 30250
OK
Get:1 http://security.ubuntu.com/ubuntu bionic-security InRelease [88.7 kB]
Get:2 http://packages.cloud.google.com/apt gcsfuse-bionic InRelease [5,001 B]
Get:3 https://cloud.r-project.org/bin/linux/ubuntu bionic-cran40/ InRelease [3,626 B]
Hit:4 http://archive.ubuntu.com/ubuntu bionic InRelease
Ign:5 https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64  InRelease
Hit:6 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64  InRelease
Hit:7 https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64  Release
Get:8 http://archive.ubuntu.com/ubuntu bionic-updates InRelease [88.7 kB]
Get:9 http://packages.cloud.goo

In [2]:
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision.transforms import ToTensor
from torchvision.transforms import Compose, Normalize
import torch.nn.functional as F
import pandas as pd
import segmentation_models_pytorch as smp
import os
from torchvision.io import read_image
import rasterio
from pytorch_lightning import LightningModule, Trainer
from pytorch_lightning.loggers import WandbLogger
import pytorch_lightning as pl
import warnings
import numpy as np
from matplotlib import pyplot as plt
from PIL import Image
from tqdm import tqdm
import wandb
from pathlib import Path
#os.chdir('../')
#from src.data_unet.data_loader import SentinelDataset2

# Our rasters contain no geolocation info, so silence this warning from rasterio
warnings.filterwarnings("ignore", category=rasterio.errors.NotGeoreferencedWarning)

  from .autonotebook import tqdm as notebook_tqdm


In [24]:
features_metadata = "~/Desktop/DD-Finland-Forests/data/features_metadata.csv"
features_df = pd.read_csv(features_metadata)
train_img_dir = "../data/train/train_features/"
test_img_dir = "../data/test/test_features"

label_dir = "../data/train/train_agbm/"

In [11]:
class SentinelDataset2(Dataset):
    def __init__(self, annotations_df, img_dir, label_dir=None, transform=None, target_transform=None):
        chips = annotations_df[annotations_df['satellite'] == 'S2']
        chips = chips[chips['split'] == 'train']
        self.chip_ids = chips[chips['filename'].apply(lambda x: str(x).split('_')[-1]).str.contains('06.tif')]['chip_id'].values
                                                                        
        #self.chip_ids = annotations_df.chip_id.unique()
        self.img_dir = img_dir
        self.label_dir = label_dir
        self.transform = transform
        self.target_transform = target_transform

        # self.chip_ids = [
        #     x for x in self.chip_ids if f"{x}_S2_06.tif" in os.listdir(img_dir)
        # ]
        

    def __len__(self):
        return len(self.chip_ids)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, f"{self.chip_ids[idx]}_S2_06.tif")
        image = torch.tensor(rasterio.open(img_path).read().astype(np.float32)[:10])
        # Normalize image with mean 0 and stddev 1. Add a little bit to div to avoid dividing by 0
        image = (image.permute(1,2,0) - image.mean(dim=(1,2)))/(image.std(dim=(1,2)) + 0.01)
        image = image.permute(2,0,1)


        label_path = os.path.join(self.label_dir, f"{self.chip_ids[idx]}_agbm.tif")
        label = torch.tensor(rasterio.open(label_path).read().astype(np.float32))


        if self.transform:
            image = self.transform(image)

        return image, label

In [12]:
train_dataset = SentinelDataset2(features_df, train_img_dir, label_dir)
train_size = int(0.8*len(train_dataset))
valid_size = len(train_dataset) - train_size
train_set, val_set = torch.utils.data.random_split(train_dataset, [train_size, valid_size])
train_dataloader = DataLoader(train_set, batch_size=32, shuffle=True, num_workers = 6)
valid_dataloader = DataLoader(val_set, batch_size=32, shuffle=False, num_workers = 6)

test_dataset = SentinelDataset2(features_df, test_img_dir)
test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=True, num_workers = 6)

In [13]:
class Sentinel2Model(pl.LightningModule):
    def __init__(self, model):
        super().__init__()
        self.model = model

    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self.model(x)
        loss = F.mse_loss(y_hat, y)
        self.log("train/loss", loss)
        self.log("train/rmse", torch.sqrt(loss))
        return loss
    
    def validation_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self.model(x)
        loss = F.mse_loss(y_hat, y)
        self.log("valid/loss", loss)
        self.log("valid/rmse", torch.sqrt(loss))
        return loss
    
    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=0.02)
    
    def forward(self, x):
        return self.model(x)

In [14]:
base_model = smp.Unet(
    encoder_name="resnet50",       
    in_channels=10,                 
    classes=1,                     
)

In [15]:
base_model.encoder.load_state_dict(torch.load("../resnet50-sentinel2.pt"))
s2_model = Sentinel2Model(base_model)

In [16]:
wandb_logger = WandbLogger(name='Sentinel_2_ResNet50',project='BioMassters_baseline')

  rank_zero_warn(


In [17]:
# Initialize a trainer
trainer = Trainer(
    accelerator="gpu",
    max_epochs=20,
    logger=[wandb_logger],
)
# Train the model ⚡
trainer.fit(s2_model, train_dataloaders=train_dataloader, val_dataloaders=valid_dataloader)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type | Params
-------------------------------
0 | model | Unet | 32.5 M
-------------------------------
32.5 M    Trainable params
0         Non-trainable params
32.5 M    Total params
130.172   Total estimated model params size (MB)


Epoch 19: 100%|██████████| 257/257 [01:34<00:00,  2.72it/s, loss=2.04e+03, v_num=72j4]

`Trainer.fit` stopped: `max_epochs=20` reached.


Epoch 19: 100%|██████████| 257/257 [01:35<00:00,  2.69it/s, loss=2.04e+03, v_num=72j4]


In [30]:
from tqdm import tqdm
chip_ids = features_df[features_df['split'] == 'test']['chip_id'].unique()

for chip_id in tqdm(chip_ids):
    image_path = os.path.join(test_img_dir, f"{chip_id}_S2_06.tif")

    test_im = torch.tensor(rasterio.open(image_path).read().astype(np.float32)[:10])
    test_im = (test_im.permute(1,2,0) - test_im.mean(dim=(1,2)))/(test_im.std(dim=(1,2)) + 0.01)
    test_im = test_im.permute(2,0,1)
    pred = s2_model(test_im.unsqueeze(0))


    im = Image.fromarray(pred.squeeze().cpu().detach().numpy())
    im.save(f"../data/preds/{chip_id}_agbm.tif", format="TIFF", save_all=True)

100%|██████████| 2773/2773 [06:08<00:00,  7.52it/s]


In [50]:
!tar -cvzf ../predictions4.tgz *

00a28320_agbm.tif
01047f47_agbm.tif
0128a38e_agbm.tif
0169bbe3_agbm.tif
0189741b_agbm.tif
01931f13_agbm.tif
01ad8a71_agbm.tif
01d74a63_agbm.tif
01e343cf_agbm.tif
01f808cb_agbm.tif
01fff88d_agbm.tif
0203607d_agbm.tif
02073a85_agbm.tif
0215f36c_agbm.tif
023081a4_agbm.tif
0248148e_agbm.tif
025109d9_agbm.tif
028fe78c_agbm.tif
02b2807a_agbm.tif
02fb2883_agbm.tif
031c6894_agbm.tif
03253d45_agbm.tif
036a83d8_agbm.tif
038c87e4_agbm.tif
03a0229a_agbm.tif
03a2936a_agbm.tif
03b4500a_agbm.tif
03d1b011_agbm.tif
03d2248c_agbm.tif
03dfc9f4_agbm.tif
041a10b6_agbm.tif
041a7c10_agbm.tif
04307f02_agbm.tif
04446d4b_agbm.tif
0452be20_agbm.tif
0471ca6b_agbm.tif
047bd278_agbm.tif
0481b1bf_agbm.tif
0491e532_agbm.tif
04b4be2b_agbm.tif
04e83670_agbm.tif
04f4810e_agbm.tif
050699ab_agbm.tif
0546a96c_agbm.tif
05617f0f_agbm.tif
058c90ad_agbm.tif
058eb8d1_agbm.tif
059af7e6_agbm.tif
059c2676_agbm.tif
05a2ca49_agbm.tif
05b2b505_agbm.tif
05e15988_agbm.tif
05e4da75_agbm.tif
0608d694_agbm.tif
0620721c_agbm.tif
06269db0_a

In [44]:
!tar -tf ../../predictions2.tgz | head

data/preds/00a28320_agbm.tif
data/preds/01047f47_agbm.tif
data/preds/0128a38e_agbm.tif
data/preds/0169bbe3_agbm.tif
data/preds/0189741b_agbm.tif
data/preds/01931f13_agbm.tif
data/preds/01ad8a71_agbm.tif
data/preds/01d74a63_agbm.tif
data/preds/01e343cf_agbm.tif
data/preds/01f808cb_agbm.tif
tar: write error


In [51]:
!gsutil -m cp ../../predictions4.tgz gs://finlandforest

Copying file://../../predictions4.tgz [Content-Type=application/gzip]...
==> NOTE: You are uploading one or more large file(s), which would run          
significantly faster if you enable parallel composite uploads. This
feature can be enabled by editing the
"parallel_composite_upload_threshold" value in your .boto
configuration file. However, note that if you do this large files will
be uploaded as `composite objects
<https://cloud.google.com/storage/docs/composite-objects>`_,which
means that any user who downloads such objects will need to have a
compiled crcmod installed (see "gsutil help crcmod"). This is because
without a compiled crcmod, computing checksums on composite objects is
so slow that gsutil disables downloads of composite objects.

ResumableUploadAbortException: 403 Access denied.
CommandException: 1 file/object could not be transferred.
