In [1]:
!kaggle datasets download -d tekbahadurkshetri/landslide4sense

Dataset URL: https://www.kaggle.com/datasets/tekbahadurkshetri/landslide4sense
License(s): other
Downloading landslide4sense.zip to /kaggle/working
100%|██████████████████████████████████████▉| 2.84G/2.85G [00:27<00:00, 115MB/s]
100%|███████████████████████████████████████| 2.85G/2.85G [00:27<00:00, 110MB/s]


In [2]:
import zipfile
import os

def unzip_file(zip_filepath, dest_dir):
    """
    Unzips a .zip file to the specified destination directory.

    Parameters:
    zip_filepath (str): The path to the .zip file.
    dest_dir (str): The directory where the contents will be extracted.
    """
    # Ensure the destination directory exists
    os.makedirs(dest_dir, exist_ok=True)

    # Open the zip file
    with zipfile.ZipFile(zip_filepath, 'r') as zip_ref:
        # Extract all the contents to the destination directory
        zip_ref.extractall(dest_dir)
    print(f"Unzipped {zip_filepath} to {dest_dir}")

# Example usage
zip_filepath = '/kaggle/working/landslide4sense.zip'
dest_dir = '/kaggle/working'
unzip_file(zip_filepath, dest_dir)


Unzipped /kaggle/working/landslide4sense.zip to /kaggle/working


In [3]:
import pandas as pd
import numpy as np
import h5py
import glob
import matplotlib.pyplot as plt
%matplotlib inline

In [5]:
# Testing the dataset
path_single = r"/kaggle/working/TrainData/img/image_2000.h5"
path_single_mask = r'/kaggle/working/TrainData/mask/mask_2000.h5'


In [6]:
path_single = r"/kaggle/working/TrainData/img/image_10.h5"
path_single_mask = r'/kaggle/working/TrainData/mask/mask_1.h5'
TRAIN_PATH = r"/kaggle/working/TrainData/img/*.h5"
TRAIN_MASK = r'/kaggle/working/TrainData/mask/*.h5'

TRAIN_XX = np.zeros((3799, 128, 128, 6))
TRAIN_YY = np.zeros((3799, 128, 128, 1))
all_train = sorted(glob.glob(TRAIN_PATH))
all_mask = sorted(glob.glob(TRAIN_MASK))

In [7]:
for i, (img, mask) in enumerate(zip(all_train, all_mask)):
    print(i, img, mask)
    with h5py.File(img) as hdf:
        ls = list(hdf.keys())
        data = np.array(hdf.get('img'))

        # assign 0 for the nan value
        data[np.isnan(data)] = 0.000001

        # to normalize the data
        mid_rgb = data[:, :, 1:4].max() / 2.0
        mid_slope = data[:, :, 12].max() / 2.0
        mid_elevation = data[:, :, 13].max() / 2.0

        # ndvi calculation
        data_red = data[:, :, 3]
        data_nir = data[:, :, 7]
        data_ndvi = np.divide(data_nir - data_red,np.add(data_nir, data_red))

        # final array
        TRAIN_XX[i, :, :, 0] = 1 - data[:, :, 3] / mid_rgb  #RED
        TRAIN_XX[i, :, :, 1] = 1 - data[:, :, 2] / mid_rgb #GREEN
        TRAIN_XX[i, :, :, 2] = 1 - data[:, :, 1] / mid_rgb #BLUE
        TRAIN_XX[i, :, :, 3] = data_ndvi #NDVI
        TRAIN_XX[i, :, :, 4] = 1 - data[:, :, 12] / mid_slope #SLOPE
        TRAIN_XX[i, :, :, 5] = 1 - data[:, :, 13] / mid_elevation #ELEVATION


    with h5py.File(mask) as hdf:
        ls = list(hdf.keys())
        data=np.array(hdf.get('mask'))
        TRAIN_YY[i, :, :, 0] = data

0 /kaggle/working/TrainData/img/image_1.h5 /kaggle/working/TrainData/mask/mask_1.h5
1 /kaggle/working/TrainData/img/image_10.h5 /kaggle/working/TrainData/mask/mask_10.h5
2 /kaggle/working/TrainData/img/image_100.h5 /kaggle/working/TrainData/mask/mask_100.h5
3 /kaggle/working/TrainData/img/image_1000.h5 /kaggle/working/TrainData/mask/mask_1000.h5
4 /kaggle/working/TrainData/img/image_1001.h5 /kaggle/working/TrainData/mask/mask_1001.h5
5 /kaggle/working/TrainData/img/image_1002.h5 /kaggle/working/TrainData/mask/mask_1002.h5
6 /kaggle/working/TrainData/img/image_1003.h5 /kaggle/working/TrainData/mask/mask_1003.h5
7 /kaggle/working/TrainData/img/image_1004.h5 /kaggle/working/TrainData/mask/mask_1004.h5
8 /kaggle/working/TrainData/img/image_1005.h5 /kaggle/working/TrainData/mask/mask_1005.h5
9 /kaggle/working/TrainData/img/image_1006.h5 /kaggle/working/TrainData/mask/mask_1006.h5
10 /kaggle/working/TrainData/img/image_1007.h5 /kaggle/working/TrainData/mask/mask_1007.h5
11 /kaggle/working/Tr

  TRAIN_XX[i, :, :, 4] = 1 - data[:, :, 12] / mid_slope #SLOPE


1719 /kaggle/working/TrainData/img/image_2546.h5 /kaggle/working/TrainData/mask/mask_2546.h5
1720 /kaggle/working/TrainData/img/image_2547.h5 /kaggle/working/TrainData/mask/mask_2547.h5
1721 /kaggle/working/TrainData/img/image_2548.h5 /kaggle/working/TrainData/mask/mask_2548.h5
1722 /kaggle/working/TrainData/img/image_2549.h5 /kaggle/working/TrainData/mask/mask_2549.h5
1723 /kaggle/working/TrainData/img/image_255.h5 /kaggle/working/TrainData/mask/mask_255.h5
1724 /kaggle/working/TrainData/img/image_2550.h5 /kaggle/working/TrainData/mask/mask_2550.h5
1725 /kaggle/working/TrainData/img/image_2551.h5 /kaggle/working/TrainData/mask/mask_2551.h5
1726 /kaggle/working/TrainData/img/image_2552.h5 /kaggle/working/TrainData/mask/mask_2552.h5
1727 /kaggle/working/TrainData/img/image_2553.h5 /kaggle/working/TrainData/mask/mask_2553.h5
1728 /kaggle/working/TrainData/img/image_2554.h5 /kaggle/working/TrainData/mask/mask_2554.h5
1729 /kaggle/working/TrainData/img/image_2555.h5 /kaggle/working/TrainDa

  data_ndvi = np.divide(data_nir - data_red,np.add(data_nir, data_red))


2280 /kaggle/working/TrainData/img/image_3050.h5 /kaggle/working/TrainData/mask/mask_3050.h5
2281 /kaggle/working/TrainData/img/image_3051.h5 /kaggle/working/TrainData/mask/mask_3051.h5
2282 /kaggle/working/TrainData/img/image_3052.h5 /kaggle/working/TrainData/mask/mask_3052.h5
2283 /kaggle/working/TrainData/img/image_3053.h5 /kaggle/working/TrainData/mask/mask_3053.h5
2284 /kaggle/working/TrainData/img/image_3054.h5 /kaggle/working/TrainData/mask/mask_3054.h5
2285 /kaggle/working/TrainData/img/image_3055.h5 /kaggle/working/TrainData/mask/mask_3055.h5
2286 /kaggle/working/TrainData/img/image_3056.h5 /kaggle/working/TrainData/mask/mask_3056.h5
2287 /kaggle/working/TrainData/img/image_3057.h5 /kaggle/working/TrainData/mask/mask_3057.h5
2288 /kaggle/working/TrainData/img/image_3058.h5 /kaggle/working/TrainData/mask/mask_3058.h5
2289 /kaggle/working/TrainData/img/image_3059.h5 /kaggle/working/TrainData/mask/mask_3059.h5
2290 /kaggle/working/TrainData/img/image_306.h5 /kaggle/working/TrainD

In [8]:
# TRAIN_XX_n = TRAIN_XX / TRAIN_XX.max()
TRAIN_XX[np.isnan(TRAIN_XX)] = 0.000001
print(TRAIN_XX.min(), TRAIN_XX.max(), TRAIN_YY.min(), TRAIN_YY.max())

-1.0 1.0 0.0 1.0


In [9]:
from sklearn.model_selection import train_test_split

# Split the data
x_train, x_valid, y_train, y_valid = train_test_split(TRAIN_XX, TRAIN_YY, test_size=0.2, shuffle= True)

In [10]:
x_train.shape, y_train.shape

((3039, 128, 128, 6), (3039, 128, 128, 1))

In [11]:
# to release some memory, delete the unnecessary variable
del TRAIN_XX
del TRAIN_YY
del all_train
del all_mask

In [12]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
from sklearn.metrics import precision_score, recall_score
import torch.nn.functional as F
# UNet model definition
class UNet(nn.Module):
    def __init__(self, in_channels=1, out_channels=1):
        super(UNet, self).__init__()
        self.conv1 = self.double_conv(in_channels, 16)
        self.conv2 = self.double_conv(16, 32)
        self.conv3 = self.double_conv(32, 64)
        self.conv4 = self.double_conv(64, 128)
        self.conv5 = self.double_conv(128, 256)
        self.upconv1 = nn.ConvTranspose2d(256, 128, kernel_size=2, stride=2)
        self.conv6 = self.double_conv(256, 128)
        self.upconv2 = nn.ConvTranspose2d(128, 64, kernel_size=2, stride=2)
        self.conv7 = self.double_conv(128, 64)
        self.upconv3 = nn.ConvTranspose2d(64, 32, kernel_size=2, stride=2)
        self.conv8 = self.double_conv(64, 32)
        self.upconv4 = nn.ConvTranspose2d(32, 16, kernel_size=2, stride=2)
        self.conv9 = self.double_conv(32, 16)
        self.conv10 = nn.Conv2d(16, out_channels, kernel_size=1)

    def double_conv(self, in_channels, out_channels):
        return nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1),
            nn.ReLU(inplace=True)
        )

    def forward(self, x):
        c1 = self.conv1(x)
        p1 = F.max_pool2d(c1, kernel_size=2, stride=2)
        c2 = self.conv2(p1)
        p2 = F.max_pool2d(c2, kernel_size=2, stride=2)
        c3 = self.conv3(p2)
        p3 = F.max_pool2d(c3, kernel_size=2, stride=2)
        c4 = self.conv4(p3)
        p4 = F.max_pool2d(c4, kernel_size=2, stride=2)
        c5 = self.conv5(p4)
        u6 = self.upconv1(c5)
        u6 = torch.cat([u6, c4], dim=1)
        c6 = self.conv6(u6)
        u7 = self.upconv2(c6)
        u7 = torch.cat([u7, c3], dim=1)
        c7 = self.conv7(u7)
        u8 = self.upconv3(c7)
        u8 = torch.cat([u8, c2], dim=1)
        c8 = self.conv8(u8)
        u9 = self.upconv4(c8)
        u9 = torch.cat([u9, c1], dim=1)
        c9 = self.conv9(u9)
        outputs = self.conv10(c9)
        outputs = torch.sigmoid(outputs)
        return outputs

In [13]:
# Dice loss function
class DiceLoss(nn.Module):
    def __init__(self, smooth=1):
        super(DiceLoss, self).__init__()
        self.smooth = smooth

    def forward(self, y_true, y_pred):
        y_true_f = y_true.view(-1)
        y_pred_f = y_pred.view(-1)
        intersection = torch.sum(y_true_f * y_pred_f)
        return 1 - (2. * intersection + self.smooth) / (torch.sum(y_true_f) + torch.sum(y_pred_f) + self.smooth)

# Function to calculate Dice coefficient
def dice_coefficient(y_true, y_pred):
    smooth = 1e-6
    y_true_f = y_true.view(-1)
    y_pred_f = y_pred.view(-1)
    intersection = torch.sum(y_true_f * y_pred_f)
    return (2. * intersection + smooth) / (torch.sum(y_true_f) + torch.sum(y_pred_f) + smooth)

# Function to calculate accuracy
def accuracy(y_true, y_pred):
    y_pred = y_pred > 0.5
    correct = (y_true == y_pred).sum().float()
    return correct / y_true.numel()

# Function to calculate precision and recall
def precision_recall(y_true, y_pred):
    y_true = y_true.cpu().numpy().astype(int).flatten()
    y_pred = (y_pred > 0.5).cpu().numpy().astype(int).flatten()
    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    return precision, recall


# Assuming x_train, y_train, x_valid, y_valid are already loaded as numpy arrays or tensors
x_train = x_train[:, :, :, :6]
x_valid = x_valid[:, :, :, :6]
x_train_tensor = torch.tensor(x_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
x_valid_tensor = torch.tensor(x_valid, dtype=torch.float32)
y_valid_tensor = torch.tensor(y_valid, dtype=torch.float32)

# Normalize the input data
x_train_tensor = (x_train_tensor - x_train_tensor.mean()) / x_train_tensor.std()
x_valid_tensor = (x_valid_tensor - x_valid_tensor.mean()) / x_valid_tensor.std()

# Create DataLoader for training and validation sets
train_dataset = TensorDataset(x_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
valid_dataset = TensorDataset(x_valid_tensor, y_valid_tensor)
valid_loader = DataLoader(valid_dataset, batch_size=16, shuffle=False)


In [14]:
# Initialize UNet model
model = UNet(in_channels=6, out_channels=1)

# Define optimizer and loss function
optimizer = optim.Adam(model.parameters(), lr=1e-3)  # Adjusted learning rate
criterion = DiceLoss()  # Using Dice loss

In [None]:
# Training function
def train(model, train_loader, valid_loader, optimizer, criterion, epochs=100, device='cuda'):
    best_valid_f1 = -np.inf
    for epoch in range(epochs):
        # Training
        model.train()
        train_loss = 0.0
        train_dice = 0.0
        train_accuracy = 0.0
        train_precision = 0.0
        train_recall = 0.0

        for inputs, targets in train_loader:
            inputs = inputs.permute(0, 3, 1, 2).to(device)  # Permute to (batch, channels, height, width)
            targets = targets.permute(0, 3, 1, 2).to(device)  # Permute to (batch, channels, height, width)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()
            train_loss += loss.item() * inputs.size(0)
            train_dice += dice_coefficient(targets, torch.sigmoid(outputs)).item() * inputs.size(0)
            train_accuracy += accuracy(targets, outputs).item() * inputs.size(0)
            precision, recall = precision_recall(targets, outputs)
            train_precision += precision * inputs.size(0)
            train_recall += recall * inputs.size(0)

        train_loss /= len(train_loader.dataset)
        train_dice /= len(train_loader.dataset)
        train_accuracy /= len(train_loader.dataset)
        train_precision /= len(train_loader.dataset)
        train_recall /= len(train_loader.dataset)

        # Validation
        model.eval()
        valid_loss = 0.0
        valid_dice = 0.0
        valid_accuracy = 0.0
        valid_precision = 0.0
        valid_recall = 0.0

        with torch.no_grad():
            for inputs, targets in valid_loader:
                inputs = inputs.permute(0, 3, 1, 2).to(device)  # Permute to (batch, channels, height, width)
                targets = targets.permute(0, 3, 1, 2).to(device)  # Permute to (batch, channels, height, width)
                outputs = model(inputs)
                loss = criterion(outputs, targets)
                valid_loss += loss.item() * inputs.size(0)
                valid_dice += dice_coefficient(targets, torch.sigmoid(outputs)).item() * inputs.size(0)
                valid_accuracy += accuracy(targets, outputs).item() * inputs.size(0)
                precision, recall = precision_recall(targets, outputs)
                valid_precision += precision * inputs.size(0)
                valid_recall += recall * inputs.size(0)

        valid_loss /= len(valid_loader.dataset)
        valid_dice /= len(valid_loader.dataset)
        valid_accuracy /= len(valid_loader.dataset)
        valid_precision /= len(valid_loader.dataset)
        valid_recall /= len(valid_loader.dataset)

        print(f'Epoch {epoch+1}/{epochs}, '
              f'Train Loss: {train_loss:.4f}, Train Dice: {train_dice:.4f}, Train Acc: {train_accuracy:.4f}, '
              f'Train Precision: {train_precision:.4f}, Train Recall: {train_recall:.4f}, '
              f'Valid Loss: {valid_loss:.4f}, Valid Dice: {valid_dice:.4f}, Valid Acc: {valid_accuracy:.4f}, '
              f'Valid Precision: {valid_precision:.4f}, Valid Recall: {valid_recall:.4f}')

        # Save the best model based on validation F1-score
        valid_f1 = 2 * (valid_precision * valid_recall) / (valid_precision + valid_recall)
        if valid_f1 > best_valid_f1:
            best_valid_f1 = valid_f1
            torch.save(model.state_dict(), 'landslide_unet_best_model.pth')
            print(f'Saved Best Model with Valid F1: {valid_f1:.4f}')

# Train the model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)
train(model, train_loader, valid_loader, optimizer, criterion, epochs=100, device=device)

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Epoch 1/100, Train Loss: 0.5818, Train Dice: 0.0558, Train Acc: 0.9483, Train Precision: 0.4153, Train Recall: 0.6065, Valid Loss: 0.4764, Valid Dice: 0.0487, Valid Acc: 0.9793, Valid Precision: 0.4956, Valid Recall: 0.6062
Saved Best Model with Valid F1: 0.5454
Epoch 2/100, Train Loss: 0.4200, Train Dice: 0.0579, Train Acc: 0.9791, Train Precision: 0.5725, Train Recall: 0.6357, Valid Loss: 0.4813, Valid Dice: 0.0470, Valid Acc: 0.9820, Valid Precision: 0.5875, Valid Recall: 0.4985
Epoch 3/100, Train Loss: 0.4042, Train Dice: 0.0583, Train Acc: 0.9807, Train Precision: 0.5929, Train Recall: 0.6427, Valid Loss: 0.4963, Valid Dice: 0.0453, Valid Acc: 0.9847, Valid Precision: 0.6909, Valid Recall: 0.4099
Epoch 4/100, Train Loss: 0.3699, Train Dice: 0.0587, Train Acc: 0.9826, Train Precision: 0.6290, Train Recall: 0.6652, Valid Loss: 0.3984, Valid Dice: 0.0504, Valid Acc: 0.9827, Valid Precision: 0.5300, Valid Recall: 0.7198
Saved Best Model with Valid F1: 0.6105
Epoch 5/100, Train Loss: 0