In [1]:
import sys
sys.path.append("/kaggle/input/contrails-helper")

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import glob
from IPython.display import display
import os

import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader
from torch_train import TorchTrain

In [3]:
BASE_DIR = "/kaggle/input/google-research-identify-contrails-reduce-global-warming"

In [4]:
# !ls $BASE_DIR/test/1002653297254493116

In [5]:
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
DEVICE

device(type='cuda')

In [6]:
_T11_BOUNDS = (243, 303)
_CLOUD_TOP_TDIFF_BOUNDS = (-4, 5)
_TDIFF_BOUNDS = (-4, 2)
N_TIMES_BEFORE = 4
IMG_SIZE = 256
BATCH_SIZE = 16

def normalize_range(data, bounds):
    """Maps data to the range [0, 1]."""
    return (data - bounds[0]) / (bounds[1] - bounds[0])

def load_one_record(record_id, BASE_DIR=BASE_DIR, mask_too=True):
    band11 = np.load(f"{BASE_DIR}/{str(record_id)}/band_11.npy")
    band14 = np.load(f"{BASE_DIR}/{str(record_id)}/band_14.npy")
    band15 = np.load(f"{BASE_DIR}/{str(record_id)}/band_15.npy")
    if mask_too:
        human_pixel_mask =  np.load(f"{BASE_DIR}/{str(record_id)}/human_pixel_masks.npy")
        
    r = normalize_range(band15 - band14, _TDIFF_BOUNDS)
    g = normalize_range(band14 - band11, _CLOUD_TOP_TDIFF_BOUNDS)
    b = normalize_range(band14, _T11_BOUNDS)
    false_color = np.clip(np.stack([r, g, b], axis=2), 0, 1)
    
    img = false_color[..., N_TIMES_BEFORE]
    if mask_too:
        return img, human_pixel_mask
    else:
        return img

In [7]:
metadata = pd.read_csv("/kaggle/input/contrails-helper/metadata.csv")
metadata.head()

Unnamed: 0,Record_ID,Split,Contrails,High_Pixels
0,1000216489776414077,train,False,0.0
1,1000603527582775543,train,True,8.512878
2,1000660467359258186,train,True,1.028442
3,100071707854144929,train,False,0.0
4,1000823728928031783,train,True,0.563049


In [8]:
train_md = metadata[metadata["Split"] == "train"]
validation_md = metadata[metadata["Split"] == "validation"]
display(train_md.head())
display(validation_md.head())

Unnamed: 0,Record_ID,Split,Contrails,High_Pixels
0,1000216489776414077,train,False,0.0
1,1000603527582775543,train,True,8.512878
2,1000660467359258186,train,True,1.028442
3,100071707854144929,train,False,0.0
4,1000823728928031783,train,True,0.563049


Unnamed: 0,Record_ID,Split,Contrails,High_Pixels
20529,1000834164244036115,validation,False,0.0
20530,1002653297254493116,validation,False,0.0
20531,1002777035567823518,validation,True,0.038147
20532,1010397530434035516,validation,False,0.0
20533,1012978360687713914,validation,False,0.0


In [9]:
class ContrailsDataLoader():
    def __init__(self, df, split="train"):
        self.df = df
        self.split = split
        
    def __getitem__(self, idx):
        record_id = self.df.iloc[idx]["Record_ID"]
        images, mask = load_one_record(record_id, f"{BASE_DIR}/{self.split}")
        images = images.transpose(2, 0, 1)
        mask = mask.transpose(2, 0, 1)
        return torch.from_numpy(images).float(), torch.from_numpy(mask).float()

    def __len__(self):
        return len(self.df)

In [10]:
class ContrailsDataLoaderTest():
    def __init__(self, ids, split="test"):
        self.ids = ids
        self.split = split
        
    def __getitem__(self, idx):
        record_id = self.ids[idx]
        images = load_one_record(record_id, f"{BASE_DIR}/{self.split}", mask_too=False)
        images = images.transpose(2, 0, 1)
        return torch.from_numpy(images).float(), record_id

    def __len__(self):
        return len(self.ids)

In [11]:
train_dataloader = ContrailsDataLoader(train_md, "train")
validation_dataloader = ContrailsDataLoader(validation_md, "validation")

train_data = DataLoader(train_dataloader, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)
validation_data = DataLoader(validation_dataloader, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)

In [12]:
for image, label in train_data:
    print(image.shape, label.shape)
    break

torch.Size([16, 3, 256, 256]) torch.Size([16, 1, 256, 256])


In [13]:
test_ids = os.listdir(f"{BASE_DIR}/test")
print(len(test_ids))

2


In [14]:
test_dataloader = ContrailsDataLoaderTest(test_ids, "test")
test_data = DataLoader(test_dataloader, batch_size=10, shuffle=False, num_workers=2)

In [15]:
for image, img_id in test_data:
    print(image.shape, img_id)
    break

torch.Size([2, 3, 256, 256]) ('1002653297254493116', '1000834164244036115')


In [16]:
class DoubleConv(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(DoubleConv, self).__init__()
        self.double_conv = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True)
        )

    def forward(self, x):
        return self.double_conv(x)


class Down(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(Down, self).__init__()
        self.maxpool_conv = nn.Sequential(
            nn.MaxPool2d(2),
            DoubleConv(in_channels, out_channels)
        )

    def forward(self, x):
        return self.maxpool_conv(x)


class Up(nn.Module):
    def __init__(self, in_channels, out_channels, bilinear=True):
        super(Up, self).__init__()

        if bilinear:
            self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
        else:
            self.up = nn.ConvTranspose2d(in_channels // 2, in_channels // 2, kernel_size=2, stride=2)

        self.conv = DoubleConv(in_channels, out_channels)

    def forward(self, x1, x2):
        x1 = self.up(x1)

        diffY = x2.size()[2] - x1.size()[2]
        diffX = x2.size()[3] - x1.size()[3]

        x1 = nn.functional.pad(x1, [diffX // 2, diffX - diffX // 2, diffY // 2, diffY - diffY // 2])
        
        x = torch.cat([x2, x1], dim=1)
        return self.conv(x)


class UNet(nn.Module):
    def __init__(self, input_channels = 3):
        super(UNet, self).__init__()
        # Define your layers
        self.input_channels = input_channels
        self.inc = DoubleConv(input_channels, 64)
        self.down1 = Down(64, 128)
        self.down2 = Down(128, 256)
        self.down3 = Down(256, 512)
        self.down4 = Down(512, 512)
        self.up1 = Up(1024, 256)
        self.up2 = Up(512, 128)
        self.up3 = Up(256, 64)
        self.up4 = Up(128, 64)
        self.outc = nn.Conv2d(64, 1, kernel_size=1)

    def forward(self, x):
        # Forward pass through the layers
        x1 = self.inc(x)
        x2 = self.down1(x1)
        x3 = self.down2(x2)
        x4 = self.down3(x3)
        x5 = self.down4(x4)
        x = self.up1(x5, x4)
        x = self.up2(x, x3)
        x = self.up3(x, x2)
        x = self.up4(x, x1)
        x = self.outc(x)
        return x

In [17]:
class Dice(nn.Module):
    def __init__(self, use_sigmoid=True):
        super(Dice, self).__init__()
        self.sigmoid = nn.Sigmoid()
        self.use_sigmoid = use_sigmoid

    def forward(self, inputs, targets, smooth=1):
        if self.use_sigmoid:
            inputs = self.sigmoid(inputs)       
        
        inputs = inputs.view(-1)
        targets = targets.view(-1)
        
        intersection = (inputs * targets).sum()
        dice = (2.0 *intersection + smooth)/(inputs.sum() + targets.sum() + smooth)  
        
        return dice
dice = Dice()

In [18]:
unet = UNet()
unet.to(DEVICE)
optimizer = torch.optim.Adam(unet.parameters(), lr=0.001)
loss_fn = nn.BCEWithLogitsLoss(pos_weight=torch.tensor(100))
metrics = {
    "dice": dice,
}
tt = TorchTrain(unet, optimizer, loss_fn, metrics = metrics)
history = tt.fit(train_data, validation_data, verbose = True, epochs = 5)



In [19]:
# model_scripted = torch.jit.script(unet)
# model_scripted.save('unet.pt')
# torch.save(unet.state_dict(), "unet_state.pt")

In [20]:
unet.eval();

In [21]:
def rle_encode(x, fg_val=1):
    """
    Args:
        x:  numpy array of shape (height, width), 1 - mask, 0 - background
    Returns: run length encoding as list
    """

    dots = np.where(
        x.T.flatten() == fg_val)[0]  # .T sets Fortran order down-then-right
    run_lengths = []
    prev = -2
    for b in dots:
        if b > prev + 1:
            run_lengths.extend((b + 1, 0))
        run_lengths[-1] += 1
        prev = b
    return run_lengths

def list_to_string(x):
    """
    Converts list to a string representation
    Empty list returns '-'
    """
    if x: # non-empty list
        s = str(x).replace("[", "").replace("]", "").replace(",", "")
    else:
        s = '-'
    return s


In [22]:
submission = pd.read_csv('/kaggle/input/google-research-identify-contrails-reduce-global-warming/sample_submission.csv', index_col='record_id')

In [23]:
for i, data in enumerate(test_data):
    images, image_id = data
    
    # Predict mask for this instance
    images = images.to(DEVICE)
    predicated_mask = unet(images)
    predicated_mask = torch.sigmoid(predicated_mask).cpu().detach().numpy()
    
    # Apply threshold
    predicated_mask_with_threshold = np.zeros((images.shape[0], 256, 256))
    predicated_mask_with_threshold[predicated_mask[:, 0, :, :] < 0.9] = 0
    predicated_mask_with_threshold[predicated_mask[:, 0, :, :] > 0.9] = 1
    
    for img_num in range(0, images.shape[0]):
        current_mask = predicated_mask_with_threshold[img_num, :, :]
        current_image_id = image_id[img_num]
        
        submission.loc[int(current_image_id), 'encoded_pixels'] = list_to_string(rle_encode(current_mask))

In [24]:
submission

Unnamed: 0_level_0,encoded_pixels
record_id,Unnamed: 1_level_1
1000834164244036115,40452 1 40966 2 41222 3 41479 4 41736 6 41994 ...
1002653297254493116,29126 2 29377 13 29629 13 29883 10 30138 7 303...


In [25]:
submission.to_csv('submission.csv')