# Libraries

In [None]:
pip install -U albumentations

Collecting albumentations
  Downloading albumentations-1.4.20-py3-none-any.whl.metadata (32 kB)
Collecting albucore==0.0.19 (from albumentations)
  Downloading albucore-0.0.19-py3-none-any.whl.metadata (5.2 kB)
Collecting stringzilla>=3.10.4 (from albucore==0.0.19->albumentations)
  Downloading stringzilla-3.10.7-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_28_x86_64.whl.metadata (79 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.8/79.8 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
Downloading albumentations-1.4.20-py3-none-any.whl (225 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m225.8/225.8 kB[0m [31m12.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading albucore-0.0.19-py3-none-any.whl (11 kB)
Downloading stringzilla-3.10.7-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_28_x86_64.whl (291 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m291.4/291.4 kB[0m [31m17.6 MB/s[0m eta [36m0:00:

In [None]:
import pandas as pd
import os
from sklearn.model_selection import train_test_split
from PIL import Image
import torch
from torch.utils.data import Dataset
from torchvision import transforms
from torch.utils.data import DataLoader
import albumentations as A
from albumentations.pytorch import ToTensorV2
import matplotlib.pyplot as plt
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
from tqdm import tqdm
import torch.optim as optim
from torch.optim import lr_scheduler
from torchinfo import summary
import cv2
import itertools
import random

# Data Preparation

In [None]:
## Here is created a list to store the image paths and a list to store the masks
# Directories for the original images
dataset1_original_images_dir = '/kaggle/input/droneimages/Dataset1/Dataset/original_images'
dataset2_original_images_dir = '/kaggle/input/droneimages/Dataset2/Dataset/original_images'

# Create a list to store the image paths
image_paths = []

# Add files from Dataset1
for root, dirs, files in os.walk(dataset1_original_images_dir):
    for file in files:
        if file.endswith(('jpg', 'png')):
            image_paths.append(os.path.join(root, file))

# Add files from Dataset2
for root, dirs, files in os.walk(dataset2_original_images_dir):
    for file in files:
        if file.endswith(('jpg', 'png')):
            image_paths.append(os.path.join(root, file))

# Convert the list into a DataFrame
df_images = pd.DataFrame(image_paths)

# Save the DataFrame to a CSV file
df_images.to_csv('/kaggle/working/image_paths.csv', index=False, header=False)



In [None]:
# Split the data into training set (90%) and validation set (10%)
train_data, val_data = train_test_split(df_images, test_size=0.10, random_state=42)

# Save these splits into CSV
train_data.to_csv('/kaggle/working/train_set.csv', index=False, header=False)


# Data Preprocessing

In [None]:
puzzle_size = 2
n_patches = puzzle_size * puzzle_size

# List of all permutations for a puzzle_size x puzzle_size puzzle
all_permutations = list(itertools.permutations(range(n_patches)))

# A map from permutation to label
perm_to_label = {perm: idx for idx, perm in enumerate(all_permutations)}

n_classes = len(all_permutations)

In [None]:
def create_permuted_image(image, permutation, puzzle_size=2):
    img_width, img_height = image.size
    piece_width = img_width // puzzle_size
    piece_height = img_height // puzzle_size

    # create pieces
    pieces = []
    for i in range(puzzle_size):
        for j in range(puzzle_size):
            left = j * piece_width
            upper = i * piece_height
            right = left + piece_width
            lower = upper + piece_height
            piece = image.crop((left, upper, right, lower))
            pieces.append(piece)

    # permutation
    permuted_pieces = [pieces[idx] for idx in permutation]

    # Reconstruct the permuted image
    new_image = Image.new('RGB', (img_width, img_height))
    for idx, piece in enumerate(permuted_pieces):
        i = idx // puzzle_size
        j = idx % puzzle_size
        left = j * piece_width
        upper = i * piece_height
        new_image.paste(piece, (left, upper))

    return new_image


In [None]:

class SSLDataset(Dataset):
    def __init__(self, csv_file, puzzle_size=2, transform=None):
        self.data = pd.read_csv(csv_file)
        self.puzzle_size = puzzle_size
        self.transform = transform
        self.permutations = list(perm_to_label.keys())

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        # Load the image
        img_path = self.data.iloc[idx, 0]
        image = Image.open(img_path).convert('RGB')

        # Randomly select a permutation
        permutation = random.choice(self.permutations)

        # The permuted image
        modified_image = create_permuted_image(image, permutation, self.puzzle_size)

        if self.transform:
            modified_image = self.transform(image=np.array(modified_image))['image']


        # idx of the this permutation
        label = perm_to_label[permutation]

        return modified_image, label


In [None]:
# Transformations for training with data augmentation
transform_train = A.Compose([
    A.Resize(128, 128),  # Resize images to 128x128
    A.Normalize(mean=(0.445, 0.443, 0.401),
                std=(0.205, 0.200, 0.210)),
    ToTensorV2()  # Convert image and its augmentations to a PyTorch tensor
])

In [None]:
# Create the dataset
train_set = SSLDataset(csv_file='/kaggle/working/train_set.csv', puzzle_size=puzzle_size, transform=transform_train)

batch_size = 40

# Create the DataLoader
trainloader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=4,pin_memory=True,prefetch_factor=4)

# U-Net

In [None]:
class DoubleConv(nn.Module):

    def __init__(self, in_channels, out_channels, mid_channels=None):
        super().__init__()
        if not mid_channels:
            mid_channels = out_channels
        self.double_conv = nn.Sequential(
            nn.Conv2d(in_channels, mid_channels, kernel_size=3, padding=1, bias=False),
            nn.BatchNorm2d(mid_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(mid_channels, out_channels, kernel_size=3, padding=1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True)
        )

    def forward(self, x):
        return self.double_conv(x)

In [None]:
class Down(nn.Module):
    """Downscaling with maxpool then double conv"""

    def __init__(self, in_channels, out_channels):
        super().__init__()
        self.maxpool_conv = nn.Sequential(
            nn.MaxPool2d(2),
            DoubleConv(in_channels, out_channels)
        )

    def forward(self, x):
        return self.maxpool_conv(x)


In [None]:
class Up(nn.Module):
    """Upscaling then double conv"""

    def __init__(self, in_channels, out_channels, bilinear=True):
        super().__init__()

        # if bilinear, use the normal convolutions to reduce the number of channels
        if bilinear:
            self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
            self.conv = DoubleConv(in_channels, out_channels, in_channels // 2)
        else:
            self.up = nn.ConvTranspose2d(in_channels, in_channels // 2, kernel_size=2, stride=2)
            self.conv = DoubleConv(in_channels, out_channels)

    def forward(self, x1, x2):
        x1 = self.up(x1)
        # input is CHW
        diffY = x2.size()[2] - x1.size()[2]
        diffX = x2.size()[3] - x1.size()[3]

        x1 = F.pad(x1, [diffX // 2, diffX - diffX // 2,
                        diffY // 2, diffY - diffY // 2])
        # if you have padding issues, see
        # https://github.com/HaiyongJiang/U-Net-Pytorch-Unstructured-Buggy/commit/0e854509c2cea854e247a9c615f175f76fbb2e3a
        # https://github.com/xiaopeng-liao/Pytorch-UNet/commit/8ebac70e633bac59fc22bb5195e513d5832fb3bd
        x = torch.cat([x2, x1], dim=1)
        return self.conv(x)


In [None]:
class OutConv(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(OutConv, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=1)

    def forward(self, x):
        return self.conv(x)


In [None]:
class UNet(nn.Module):
    def __init__(self, n_channels,n_classes , bilinear=False):
        super(UNet, self).__init__()
        self.num_permutations = n_classes
        self.n_channels = n_channels
        self.bilinear = bilinear

        self.inc = (DoubleConv(n_channels, 64))
        self.down1 = (Down(64, 128))
        self.down2 = (Down(128, 256))
        self.down3 = (Down(256, 512))
        factor = 2 if bilinear else 1
        self.down4 = (Down(512, 1024 // factor))
        self.up1 = (Up(1024, 512 // factor, bilinear))
        self.up2 = (Up(512, 256 // factor, bilinear))
        self.up3 = (Up(256, 128 // factor, bilinear))
        self.up4 = (Up(128, 64, bilinear))
        self.reduce_spatial1 = nn.Conv2d(64, 64, kernel_size=3, stride=4, padding=1)
        self.reduce_spatial2 = nn.Conv2d(64, 64, kernel_size=3, stride=4, padding=1)
        self.fc1 = nn.Linear(64 * 8 * 8, n_classes)

    def forward(self, x):
        x1 = self.inc(x)
        x2 = self.down1(x1)
        x3 = self.down2(x2)
        x4 = self.down3(x3)
        x5 = self.down4(x4)
        x = self.up1(x5, x4)
        x = self.up2(x, x3)
        x = self.up3(x, x2)
        x = self.up4(x, x1)
        x = self.reduce_spatial1(x)
        x = self.reduce_spatial2(x)
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        return x

# Training

In [None]:
# Initialize the model
model = UNet(n_channels=3,n_classes =n_classes , bilinear=False)


# Define the loss function
criterion = nn.CrossEntropyLoss()

# Define the optimizer
optimizer = optim.Adam(model.parameters(), lr=1e-3)

# Define the learning rate scheduler
scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=4)

In [None]:
# Check if CUDA is available and count the number of GPUs
if torch.cuda.is_available():
    num_gpus = torch.cuda.device_count()
    print(f'Number of GPUs available: {num_gpus}')
    if num_gpus < 2:
        print("There are less than 2 GPUs detected.")
    device = torch.device('cuda:0')
else:
    device = torch.device('cpu')
    print('GPU is not available. Using CPU.')

GPU is not available. Using CPU.


In [None]:
## If multiple GPUs are available
if torch.cuda.is_available() and torch.cuda.device_count() > 1:
    print("Using DataParallel for multi-GPU training.")
    model = nn.DataParallel(model, device_ids=[0, 1])

## Move the model to device
model.to(device)
print(f'Model is using device: {device}')

if isinstance(model, nn.DataParallel):
    print(f'Model is parallelized on devices: {model.device_ids}')

Model is using device: cpu


In [None]:
train_losses = []
train_accuracies = []
# Training parameters
num_epochs = 100

for epoch in range(num_epochs):

    print(f'Epoch {epoch+1}/{num_epochs}')
    print('-' * 10)

    # Training Phase
    model.train()
    running_loss = 0.0
    running_corrects = 0
    total=0
    for inputs, labels in tqdm(trainloader, desc='Training'):
        inputs = inputs.to(device)
        labels = labels.to(device)
        # Zero the gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        # Calculate loss
        loss = criterion(outputs, labels)
        # Backward pass and optimization
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * inputs.size(0)
        _, preds = torch.max(outputs, 1)
        running_corrects += torch.sum(preds == labels.data)
        total += labels.size(0)


    epoch_loss = running_loss / total
    epoch_acc = running_corrects.double() / total

    print(f'Training Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')
    train_losses.append(epoch_loss)
    train_accuracies.append(epoch_acc)
    # Update the scheduler with the validation loss
    scheduler.step(epoch_loss)

    print()

torch.save(model.state_dict(), 'unet_model.pth')
print('Model saved!')