In [1]:
import os
from pathlib import Path

# Define directory structure
dirs = [
    'configs',
    'datasets/shenzhen/images/train',
    'datasets/shenzhen/images/val',
    'datasets/shenzhen/images/test',
    'datasets/shenzhen/masks/train',
    'datasets/shenzhen/masks/val',
    'datasets/shenzhen/masks/test',
    'data',
    'models',
    'engines',
    'scripts',
    'results/logs',
    'results/checkpoints'
]

# Create directories
for d in dirs:
    Path(d).mkdir(parents=True, exist_ok=True)

# Define files to create
files = [
    'configs/default.yaml',
    'configs/unet.yaml',
    'data/dataset.py',
    'models/__init__.py',
    'engines/trainer.py',
    'engines/evaluator.py',
    'scripts/train.py',
    'scripts/evaluate.py',
    'scripts/predict.py',
    'README.md'
]

# Create empty files
for f in files:
    Path(f).parent.mkdir(parents=True, exist_ok=True)
    Path(f).touch()

print("Project structure created successfully!")


Project structure created successfully!


In [5]:
# print structure of of a folder 'datasets' ignoring the png 
def print_structure(path, ignore_ext='.png'):
    for root, dirs, files in os.walk(path):
        for name in dirs:
            print(os.path.join(root, name))
        for name in files:
            if not name.endswith(ignore_ext):
                print(os.path.join(root, name))

# Print structure of the 'datasets' folder
print_structure('datasets', ignore_ext='.png')

datasets\raw
datasets\raw\montgomery
datasets\raw\shenzhen
datasets\raw\montgomery\images
datasets\raw\montgomery\masks
datasets\raw\shenzhen\images
datasets\raw\shenzhen\masks


In [5]:
import os
# check whether C:\Users\nailf\Desktop\skripsi-2\datasets\processed\montgomery\masks\MCUCXR_0001_0.png is 1 and 0
def check_mask_value(mask_path):
    if not os.path.exists(mask_path):
        print(f"Mask file {mask_path} does not exist.")
        return

    with open(mask_path, 'rb') as f:
        content = f.read()
        if b'\x00' in content and b'\x01' in content:
            print(f"Mask {mask_path} contains both 0 and 1 values.")
        elif b'\x00' in content:
            print(f"Mask {mask_path} contains only 0 values.")
        elif b'\x01' in content:
            print(f"Mask {mask_path} contains only 1 values.")
        else:
            print(f"Mask {mask_path} contains neither 0 nor 1 values.")

# Check a specific mask file
mask_file = 'datasets/processed/montgomery/masks/MCUCXR_0001_0.png'
check_mask_value(mask_file)

Mask datasets/processed/montgomery/masks/MCUCXR_0001_0.png contains both 0 and 1 values.


In [7]:
import os
import shutil
import random

def create_small_dataset_with_subfolders(source_base_path, dest_base_path, percentage=0.20):
    """
    Copies a percentage of files from 'images' and 'masks' subdirectories
    within test/train/val folders to new destination directories,
    maintaining the directory structure.

    Args:
        source_base_path (str): The base path of the original datasets (e.g., 'C:/Users/nailf/Desktop/skripsi-2/datasets/split/shenzhen').
        dest_base_path (str): The base path for the new small datasets (e.g., 'C:/Users/nailf/Desktop/skripsi-2/datasets/split-small/shenzhen').
        percentage (float): The percentage of files to copy (e.g., 0.20 for 20%).
    """
    top_level_dirs = ["test", "train", "val"]
    data_sub_dirs = ["images", "masks"] # The new level of subdirectories

    for top_dir in top_level_dirs:
        for data_sub_dir in data_sub_dirs:
            source_dir = os.path.join(source_base_path, top_dir, data_sub_dir)
            dest_dir = os.path.join(dest_base_path, top_dir, data_sub_dir)

            print(f"Processing: {source_dir}")

            # Create destination directory if it doesn't exist
            os.makedirs(dest_dir, exist_ok=True)

            # List all files in the source directory (assuming .npy files are directly here)
            # Filter for .npy files if that's the only type you're interested in
            all_files = [f for f in os.listdir(source_dir) if os.path.isfile(os.path.join(source_dir, f)) and f.lower().endswith('.npy')]

            if not all_files:
                print(f"No .npy files found in {source_dir}. Skipping.")
                print("-" * 30)
                continue

            # Calculate the number of files to copy
            num_files_to_copy = max(1, int(len(all_files) * percentage)) # Ensure at least 1 file if available

            # Randomly select files
            selected_files = random.sample(all_files, num_files_to_copy)

            print(f"Total .npy files in '{top_dir}/{data_sub_dir}': {len(all_files)}")
            print(f"Copying {len(selected_files)} files to '{dest_dir}'")

            # Copy selected files to the destination
            for file_name in selected_files:
                source_file_path = os.path.join(source_dir, file_name)
                dest_file_path = os.path.join(dest_dir, file_name)
                try:
                    shutil.copy2(source_file_path, dest_file_path) # copy2 preserves metadata
                except Exception as e:
                    print(f"Error copying {source_file_path} to {dest_file_path}: {e}")
            print("-" * 30)

# Define your paths
source_base_path = r"C:\Users\nailf\Desktop\skripsi-2\datasets\split\shenzhen"
dest_base_path = r"C:\Users\nailf\Desktop\skripsi-2\datasets\split-small\shenzhen"

# Run the function
create_small_dataset_with_subfolders(source_base_path, dest_base_path, percentage=0.20)

print("Small dataset creation complete!")

Processing: C:\Users\nailf\Desktop\skripsi-2\datasets\split\shenzhen\test\images
Total .npy files in 'test/images': 86
Copying 17 files to 'C:\Users\nailf\Desktop\skripsi-2\datasets\split-small\shenzhen\test\images'
------------------------------
Processing: C:\Users\nailf\Desktop\skripsi-2\datasets\split\shenzhen\test\masks
Total .npy files in 'test/masks': 86
Copying 17 files to 'C:\Users\nailf\Desktop\skripsi-2\datasets\split-small\shenzhen\test\masks'
------------------------------
Processing: C:\Users\nailf\Desktop\skripsi-2\datasets\split\shenzhen\train\images
Total .npy files in 'train/images': 396
Copying 79 files to 'C:\Users\nailf\Desktop\skripsi-2\datasets\split-small\shenzhen\train\images'
------------------------------
Processing: C:\Users\nailf\Desktop\skripsi-2\datasets\split\shenzhen\train\masks
Total .npy files in 'train/masks': 396
Copying 79 files to 'C:\Users\nailf\Desktop\skripsi-2\datasets\split-small\shenzhen\train\masks'
------------------------------
Processing

In [10]:
# path: models/swin_unet.py

import math
import torch
import torch.nn as nn
import torch.nn.functional as F
from timm.models.swin_transformer import SwinTransformer

class SwinUNet(nn.Module):
    def __init__(self,
                 n_channels: int = 3,
                 n_classes: int = 2,
                 img_size: int = 224,  # Changed to 224 (more standard)
                 patch_size: int = 4,
                 embed_dim: int = 96,  # Base embedding dimension
                 depths = [2, 2, 6, 2],
                 num_heads = [3, 6, 12, 24],
                 window_size: int = 7,
                 drop_rate: float = 0.0,
                 drop_path_rate: float = 0.1):
        super().__init__()
        
        self.img_size = img_size
        self.patch_size = patch_size
        self.n_classes = n_classes
        
        # Calculate number of patches and final feature map size
        self.num_patches = (img_size // patch_size) ** 2
        
        # Build the Swin Transformer backbone
        self.backbone = SwinTransformer(
            img_size=img_size,
            patch_size=patch_size,
            in_chans=n_channels,
            num_classes=0,  # No classification head
            embed_dim=embed_dim,
            depths=depths,
            num_heads=num_heads,
            window_size=window_size,
            mlp_ratio=4.0,
            qkv_bias=True,
            drop_rate=drop_rate,
            drop_path_rate=drop_path_rate,
            ape=False,  # Absolute position embedding
            patch_norm=True,
            use_checkpoint=False
        )
        
        # Get the number of features from the backbone
        # Based on the debug output, the final feature dimension is 768
        self.num_features = 768  # This matches the actual output from backbone
        
        # Calculate the spatial dimensions after all downsampling
        # Based on debug output: 7x7 feature maps for 224x224 input
        self.feature_size = 7  # This matches the actual spatial size
        
        # Decoder layers for upsampling
        self.decoder_layers = nn.ModuleList()
        
        # Current channels starts from the deepest features (768 from debug output)
        current_channels = 768
        
        # Build decoder layers for progressive upsampling
        # We need to go from 7x7 -> 14x14 -> 28x28 -> 56x56 -> 224x224
        decoder_channels = [384, 192, 96, 48]  # Progressive channel reduction
        
        for target_channels in decoder_channels:
            self.decoder_layers.append(
                nn.Sequential(
                    nn.ConvTranspose2d(
                        current_channels, 
                        target_channels, 
                        kernel_size=2, 
                        stride=2
                    ),
                    nn.BatchNorm2d(target_channels),
                    nn.ReLU(inplace=True),
                    nn.Conv2d(
                        target_channels, 
                        target_channels, 
                        kernel_size=3, 
                        padding=1
                    ),
                    nn.BatchNorm2d(target_channels),
                    nn.ReLU(inplace=True)
                )
            )
            current_channels = target_channels
        
        # Final upsampling to original resolution  
        # From current_channels (48) to final output
        # We need to go from 56x56 -> 224x224 (4x upsampling)
        self.final_upsample = nn.Sequential(
            nn.ConvTranspose2d(
                current_channels,  # 48 
                24, 
                kernel_size=4, 
                stride=4
            ),
            nn.BatchNorm2d(24),
            nn.ReLU(inplace=True),
            nn.Conv2d(24, 12, kernel_size=3, padding=1),
            nn.BatchNorm2d(12),
            nn.ReLU(inplace=True)
        )
        
        # Final classification head
        self.head = nn.Conv2d(12, n_classes, kernel_size=1)
        
        # Initialize weights
        self.apply(self._init_weights)
    
    def _init_weights(self, m):
        if isinstance(m, nn.Linear):
            nn.init.trunc_normal_(m.weight, std=.02)
            if m.bias is not None:
                nn.init.constant_(m.bias, 0)
        elif isinstance(m, nn.LayerNorm):
            nn.init.constant_(m.bias, 0)
            nn.init.constant_(m.weight, 1.0)
        elif isinstance(m, nn.Conv2d):
            nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            if m.bias is not None:
                nn.init.constant_(m.bias, 0)
    
    def forward(self, x):
        B, C, H, W = x.shape
        
        # Ensure input size is compatible
        if H != self.img_size or W != self.img_size:
            x = F.interpolate(x, size=(self.img_size, self.img_size), 
                            mode='bilinear', align_corners=False)
        
        # Get features from Swin Transformer backbone
        features = self.backbone.forward_features(x)  # Returns [B, H, W, C] format
        
        # Handle the actual return format [B, H, W, C]
        if len(features.shape) == 4:
            # Convert from [B, H, W, C] to [B, C, H, W] for conv layers
            features = features.permute(0, 3, 1, 2).contiguous()
        elif len(features.shape) == 2:
            # If it returns [B, C], we need to reshape to spatial format
            h = w = self.feature_size
            features = features.view(B, self.num_features, h, w)
        elif len(features.shape) == 3:
            # If it returns [B, L, C], reshape to spatial
            B_feat, L, C_feat = features.shape
            h = w = int(math.sqrt(L))
            features = features.transpose(1, 2).contiguous().view(B, C_feat, h, w)
        else:
            raise ValueError(f"Unexpected feature shape: {features.shape}")
        
        # Apply decoder layers
        for decoder_layer in self.decoder_layers:
            features = decoder_layer(features)
        
        # Final upsampling to original resolution
        features = self.final_upsample(features)
        
        # Apply classification head
        output = self.head(features)
        
        # Resize to match original input size if needed
        if output.shape[-2:] != (H, W):
            output = F.interpolate(output, size=(H, W), 
                                 mode='bilinear', align_corners=False)
        
        return output

# Example usage and testing
if __name__ == "__main__":
    # Test the model
    model = SwinUNet(
        n_channels=3,
        n_classes=2,
        img_size=224,
        patch_size=4,
        embed_dim=96,
        depths=[2, 2, 6, 2],
        num_heads=[3, 6, 12, 24]
    )
    
    # Test with dummy input
    x = torch.randn(2, 3, 224, 224)
    
    # Debug: Check what the backbone returns
    print("Debugging backbone output...")
    with torch.no_grad():
        backbone_features = model.backbone.forward_features(x)
        print(f"Backbone features shape: {backbone_features.shape}")
        print(f"Backbone features type: {type(backbone_features)}")
        
        # Test full forward pass
        try:
            output = model(x)
            print(f"Input shape: {x.shape}")
            print(f"Output shape: {output.shape}")
            print(f"Model parameters: {sum(p.numel() for p in model.parameters()):,}")
        except Exception as e:
            print(f"Error during forward pass: {e}")
            print("This helps identify the exact issue with the backbone output format")

Debugging backbone output...
Backbone features shape: torch.Size([2, 7, 7, 768])
Backbone features type: <class 'torch.Tensor'>
Input shape: torch.Size([2, 3, 224, 224])
Output shape: torch.Size([2, 2, 224, 224])
Model parameters: 30,874,112
