In [None]:
# train_cpu.ipynb

# 1. Importing required libraries
import os
import torch
from torch.utils import data
from model.Networks import unet
import torch.optim as optim
import torch.nn as nn
import time
from dataset.landslide_dataset import LandslideDataSet
import numpy as np
import matplotlib.pyplot as plt

# Define the eval_image function
def eval_image(pred, label, num_classes):
    TP = np.zeros((num_classes, 1))
    FP = np.zeros((num_classes, 1))
    TN = np.zeros((num_classes, 1))
    FN = np.zeros((num_classes, 1))
    n_valid_sample = 0

    for i in range(num_classes):
        TP[i] = np.sum((pred == i) & (label == i))
        FP[i] = np.sum((pred == i) & (label != i))
        TN[i] = np.sum((pred != i) & (label != i))
        FN[i] = np.sum((pred != i) & (label == i))
        n_valid_sample += np.sum(label == i)

    return TP, FP, TN, FN, n_valid_sample

# 2. Defining the necessary configurations and settings
data_dir = './dataset/'  # Directory where your dataset is located
train_list = './dataset/train.txt'  # Path to the train.txt file
test_list = './dataset/test.txt'  # Path to the test.txt file
input_size = '128,128'  # Input size for the images
num_classes = 2  # Number of classes (Landslide, Non-Landslide)
batch_size = 32  # Batch size for training
num_workers = 4  # Number of workers for data loading
learning_rate = 1e-3  # Learning rate
num_steps = 500  # Number of training steps
num_steps_stop = 500  # Number of training steps for early stopping
weight_decay = 5e-4  # Weight decay for regularization
snapshot_dir = './snapshots/'  # Directory to save model snapshots

# Ensure that the snapshot directory exists
if not os.path.exists(snapshot_dir):
    os.makedirs(snapshot_dir)

# 3. Loading the data
def load_data():
    src_loader = data.DataLoader(
        LandslideDataSet(data_dir, train_list, max_iters=num_steps_stop * batch_size, set='labeled'),
        batch_size=batch_size, shuffle=True, num_workers=num_workers, pin_memory=True)

    test_loader = data.DataLoader(
        LandslideDataSet(data_dir, test_list, set='unlabeled'),
        batch_size=1, shuffle=False, num_workers=num_workers, pin_memory=True)

    return src_loader, test_loader

src_loader, test_loader = load_data()

# 4. Initializing the U-Net model
model = unet(n_classes=num_classes)

# 5. Defining the optimizer and loss function
optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
cross_entropy_loss = nn.CrossEntropyLoss(ignore_index=255)

# 6. Training function
def train_model(num_steps_stop):
    hist = np.zeros((num_steps_stop, 3))
    for batch_id, src_data in enumerate(src_loader):
        if batch_id == num_steps_stop:
            break

        start_time = time.time()
        model.train()
        optimizer.zero_grad()

        images, labels, _, _ = src_data

        # Forward pass
        pred = model(images)

        # Compute loss
        labels = labels.long()
        loss = cross_entropy_loss(pred, labels)

        # Backpropagation and optimization
        loss.backward()
        optimizer.step()

        # Calculate batch accuracy
        _, predicted_labels = torch.max(pred, 1)
        predicted_labels = predicted_labels.detach().cpu().numpy()
        labels = labels.numpy()

        batch_oa = np.sum(predicted_labels == labels) * 1.0 / len(labels.reshape(-1))
        hist[batch_id, 0] = loss.item()
        hist[batch_id, 1] = batch_oa
        hist[batch_id, -1] = time.time() - start_time

        # Print progress
        if (batch_id + 1) % 10 == 0:
            print(f'Iter {batch_id+1}/{num_steps} Time: {np.mean(hist[batch_id-9:batch_id+1,-1]):.2f} Batch_OA = {np.mean(hist[batch_id-9:batch_id+1,1])*100:.1f} cross_entropy_loss = {np.mean(hist[batch_id-9:batch_id+1,0]):.3f}')


Number of files labeled: 19799
Number of files loaded: 19799
Number of files labeled: 800
Number of files loaded: 800


In [8]:

# 7. Testing function
def test_model():
    print('Testing..........')
    model.eval()
    TP_all = np.zeros((num_classes, 1))
    FP_all = np.zeros((num_classes, 1))
    TN_all = np.zeros((num_classes, 1))
    FN_all = np.zeros((num_classes, 1))
    n_valid_sample_all = 0
    F1 = np.zeros((num_classes, 1))
    F1_best = 0.5  # Initial best F1 score

    for _, batch in enumerate(test_loader):
        image, label, _, name = batch
        label = label.squeeze().numpy()
        image = image.float()

        with torch.no_grad():
            pred = model(image)

        _, pred = torch.max(pred, 1)
        pred = pred.squeeze().data.cpu().numpy()

        TP, FP, TN, FN, n_valid_sample = eval_image(pred.reshape(-1), label.reshape(-1), num_classes)
        TP_all += TP
        FP_all += FP
        TN_all += TN
        FN_all += FN
        n_valid_sample_all += n_valid_sample

    # Calculate precision, recall, F1 score
    OA = np.sum(TP_all) * 1.0 / n_valid_sample_all
    for i in range(num_classes):
        P = TP_all[i] * 1.0 / (TP_all[i] + FP_all[i] + 1e-14)
        R = TP_all[i] * 1.0 / (TP_all[i] + FN_all[i] + 1e-14)
        F1[i] = 2.0 * P * R / (P + R + 1e-14)

        if i == 1:
            print(f'===> Precision: {P*100:.2f} Recall: {R*100:.2f} F1: {F1[i]*100:.2f}')

    mF1 = np.mean(F1)
    print(f'===> Mean F1: {mF1*100:.2f} OA: {OA*100:.2f}')

    # Save the best model based on F1 score
    if F1[1] > F1_best:
        F1_best = F1[1]
        print('Saving Model...')
        model_name = f'best_model_F1_{int(F1[1]*10000)}.pth'
        torch.save(model.state_dict(), os.path.join(snapshot_dir, model_name))


In [9]:

# 8. Run training and testing
train_model(num_steps_stop)

Iter 10/500 Time: 16.29 Batch_OA = 76.8 cross_entropy_loss = 0.593
Iter 20/500 Time: 20.39 Batch_OA = 97.8 cross_entropy_loss = 0.337
Iter 30/500 Time: 12.57 Batch_OA = 97.9 cross_entropy_loss = 0.227
Iter 40/500 Time: 13.58 Batch_OA = 98.4 cross_entropy_loss = 0.166
Iter 50/500 Time: 12.51 Batch_OA = 98.1 cross_entropy_loss = 0.141
Iter 60/500 Time: 12.52 Batch_OA = 98.5 cross_entropy_loss = 0.114
Iter 70/500 Time: 12.49 Batch_OA = 98.3 cross_entropy_loss = 0.100
Iter 80/500 Time: 12.46 Batch_OA = 98.5 cross_entropy_loss = 0.086
Iter 90/500 Time: 12.47 Batch_OA = 98.5 cross_entropy_loss = 0.076
Iter 100/500 Time: 13.03 Batch_OA = 98.7 cross_entropy_loss = 0.065
Iter 110/500 Time: 12.46 Batch_OA = 98.6 cross_entropy_loss = 0.064
Iter 120/500 Time: 12.36 Batch_OA = 98.3 cross_entropy_loss = 0.066
Iter 130/500 Time: 12.62 Batch_OA = 98.5 cross_entropy_loss = 0.059
Iter 140/500 Time: 13.22 Batch_OA = 98.5 cross_entropy_loss = 0.054
Iter 150/500 Time: 15.69 Batch_OA = 98.8 cross_entropy_lo

In [10]:
# 7. Saving the Trained Model
def save_model(model, snapshot_dir, model_name='unet_trained_model.pth'):
    # Ensure the snapshot directory exists
    if not os.path.exists(snapshot_dir):
        os.makedirs(snapshot_dir)
    
    # Save the model's state_dict (recommended way of saving models)
    model_path = os.path.join(snapshot_dir, model_name)
    torch.save(model.state_dict(), model_path)
    print(f'Model saved to {model_path}')

# Example usage after your training loop:
save_model(model, snapshot_dir, 'unet_trained_model.pth')


Model saved to ./snapshots/unet_trained_model.pth


In [11]:
test_model()

Testing..........


FileNotFoundError: Caught FileNotFoundError in DataLoader worker process 0.
Original Traceback (most recent call last):
  File "C:\Users\cvlns\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.12_qbz5n2kfra8p0\LocalCache\local-packages\Python312\site-packages\torch\utils\data\_utils\worker.py", line 351, in _worker_loop
    data = fetcher.fetch(index)  # type: ignore[possibly-undefined]
           ^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\cvlns\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.12_qbz5n2kfra8p0\LocalCache\local-packages\Python312\site-packages\torch\utils\data\_utils\fetch.py", line 52, in fetch
    data = [self.dataset[idx] for idx in possibly_batched_index]
            ~~~~~~~~~~~~^^^^^
  File "d:\VIT\Project - 1\Landslide detection\Mine\dataset\landslide_dataset.py", line 78, in __getitem__
    with h5py.File(datafiles["label"], "r") as hf:
         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\cvlns\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.12_qbz5n2kfra8p0\LocalCache\local-packages\Python312\site-packages\h5py\_hl\files.py", line 561, in __init__
    fid = make_fid(name, mode, userblock_size, fapl, fcpl, swmr=swmr)
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\cvlns\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.12_qbz5n2kfra8p0\LocalCache\local-packages\Python312\site-packages\h5py\_hl\files.py", line 235, in make_fid
    fid = h5f.open(name, flags, fapl=fapl)
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "h5py\\_objects.pyx", line 54, in h5py._objects.with_phil.wrapper
  File "h5py\\_objects.pyx", line 55, in h5py._objects.with_phil.wrapper
  File "h5py\\h5f.pyx", line 102, in h5py.h5f.open
FileNotFoundError: [Errno 2] Unable to synchronously open file (unable to open file: name = './dataset/TestData/mask/mask_1.h5', errno = 2, error message = 'No such file or directory', flags = 0, o_flags = 0)
