# Dimension of S1GRD

In [9]:
import xarray as xr
ds = xr.open_zarr(r'data/SSL4EO-S12-v1.1/train/S1GRD/ssl4eos12_train_seasonal_data_000001.zarr')  # load xarray dataset
data = ds.bands.values  # load numpy array with dims [B, T, C, H, W]
print(ds['bands'].shape)


(64, 4, 2, 264, 264)


In [8]:
import os
import zipfile

# Define path
base_dir = r"D:\IVP _ project\data\SSL4EO-S12-v1.1\train\S1GRD"

# List all .zip files in the folder
zip_files = [f for f in os.listdir(base_dir) if f.endswith('.zarr.zip')]

for zip_file in zip_files:
    zip_path = os.path.join(base_dir, zip_file)
    extract_folder_name = zip_file.replace('.zip', '')  # Get .zarr folder name
    extract_path = os.path.join(base_dir, extract_folder_name)

    # Skip if already extracted
    if os.path.exists(extract_path):
        print(f"[✓] Already extracted: {extract_folder_name}")
        continue

    # Extract
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_path)
        print(f"[+] Extracted: {zip_file} → {extract_path}")



[+] Extracted: ssl4eos12_train_seasonal_data_000001.zarr.zip → D:\IVP _ project\data\SSL4EO-S12-v1.1\train\S1GRD\ssl4eos12_train_seasonal_data_000001.zarr
[+] Extracted: ssl4eos12_train_seasonal_data_000002.zarr.zip → D:\IVP _ project\data\SSL4EO-S12-v1.1\train\S1GRD\ssl4eos12_train_seasonal_data_000002.zarr
[+] Extracted: ssl4eos12_train_seasonal_data_000003.zarr.zip → D:\IVP _ project\data\SSL4EO-S12-v1.1\train\S1GRD\ssl4eos12_train_seasonal_data_000003.zarr
[+] Extracted: ssl4eos12_train_seasonal_data_000004.zarr.zip → D:\IVP _ project\data\SSL4EO-S12-v1.1\train\S1GRD\ssl4eos12_train_seasonal_data_000004.zarr
[+] Extracted: ssl4eos12_train_seasonal_data_000005.zarr.zip → D:\IVP _ project\data\SSL4EO-S12-v1.1\train\S1GRD\ssl4eos12_train_seasonal_data_000005.zarr
[+] Extracted: ssl4eos12_train_seasonal_data_000006.zarr.zip → D:\IVP _ project\data\SSL4EO-S12-v1.1\train\S1GRD\ssl4eos12_train_seasonal_data_000006.zarr
[+] Extracted: ssl4eos12_train_seasonal_data_000007.zarr.zip → D:\IVP 

In [12]:
import xarray as xr
import numpy as np
import cv2

# Load Zarr
zarr_path = r'D:\IVP _ project\data\SSL4EO-S12-v1.1\train\S1GRD\ssl4eos12_train_seasonal_data_000001.zarr'
ds = xr.open_zarr(zarr_path)
bands = ds['bands'].values  # Shape: (64, 4, 2, 264, 264)

print("Loaded data shape:", bands.shape)

# Denoising function for SAR
def bilateral_denoise(image):
    return cv2.bilateralFilter(image.astype(np.float32), d=9, sigmaColor=75, sigmaSpace=75)

# Preallocate array for denoised output
denoised = np.zeros_like(bands)

# Apply bilateral filter to all [N=64, T=4, C=2]
for n in range(bands.shape[0]):
    for t in range(bands.shape[1]):
        for c in range(bands.shape[2]):
            denoised[n, t, c] = bilateral_denoise(bands[n, t, c])
    print(f"[✓] Finished sample {n+1}/64")

print("✅ Denoising complete. Final shape:", denoised.shape)



Loaded data shape: (64, 4, 2, 264, 264)
[✓] Finished sample 1/64
[✓] Finished sample 2/64
[✓] Finished sample 3/64
[✓] Finished sample 4/64
[✓] Finished sample 5/64
[✓] Finished sample 6/64
[✓] Finished sample 7/64
[✓] Finished sample 8/64
[✓] Finished sample 9/64
[✓] Finished sample 10/64
[✓] Finished sample 11/64
[✓] Finished sample 12/64
[✓] Finished sample 13/64
[✓] Finished sample 14/64
[✓] Finished sample 15/64
[✓] Finished sample 16/64
[✓] Finished sample 17/64
[✓] Finished sample 18/64
[✓] Finished sample 19/64
[✓] Finished sample 20/64
[✓] Finished sample 21/64
[✓] Finished sample 22/64
[✓] Finished sample 23/64
[✓] Finished sample 24/64
[✓] Finished sample 25/64
[✓] Finished sample 26/64
[✓] Finished sample 27/64
[✓] Finished sample 28/64
[✓] Finished sample 29/64
[✓] Finished sample 30/64
[✓] Finished sample 31/64
[✓] Finished sample 32/64
[✓] Finished sample 33/64
[✓] Finished sample 34/64
[✓] Finished sample 35/64
[✓] Finished sample 36/64
[✓] Finished sample 37/64
[✓] Fin

In [13]:
import xarray as xr
import numpy as np

# Load Zarr file (adjust filename as needed)
zarr_path = r'D:\IVP _ project\data\SSL4EO-S12-v1.1\train\S1GRD\ssl4eos12_train_seasonal_data_000001.zarr'
ds = xr.open_zarr(zarr_path)

# Select a single sample (sample_index = 0)
sample_index = 0
sar_cube = ds.bands[sample_index].values  # shape: [T=4, C=2, H=264, W=264]
print("Shape of one SAR cube:", sar_cube.shape)


Shape of one SAR cube: (4, 2, 264, 264)


In [14]:
import cv2
from skimage.transform import resize

def bilateral_denoise(image):
    return cv2.bilateralFilter(image.astype(np.float32), d=9, sigmaColor=75, sigmaSpace=75)

def apply_clahe(image):
    norm = cv2.normalize(image, None, 0, 255, cv2.NORM_MINMAX).astype(np.uint8)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
    return clahe.apply(norm)

def sobel_edges(image):
    grad_x = cv2.Sobel(image, cv2.CV_32F, 1, 0, ksize=3)
    grad_y = cv2.Sobel(image, cv2.CV_32F, 0, 1, ksize=3)
    magnitude = cv2.magnitude(grad_x, grad_y)
    return magnitude


In [15]:
processed = []

for t in range(sar_cube.shape[0]):  # T = 4 seasons
    vv = sar_cube[t, 0]  # VV polarization
    vh = sar_cube[t, 1]  # VH polarization

    vv_denoised = bilateral_denoise(vv)
    vh_denoised = bilateral_denoise(vh)

    vv_clahe = apply_clahe(vv_denoised)
    edge_map = sobel_edges(vv_clahe)

    # Stack: [vv_denoised, vh_denoised, vv_clahe, edge_map]
    processed.append(np.stack([vv_denoised, vh_denoised, vv_clahe, edge_map], axis=0))

processed = np.stack(processed)  # shape: [T=4, C=4, H=264, W=264]
print("Processed shape:", processed.shape)


Processed shape: (4, 4, 264, 264)


In [16]:
mean_img = np.mean(processed, axis=0)           # [C=4, H, W]
std_img = np.std(processed, axis=0)             # [C=4, H, W]
diff_img = processed[-1] - processed[0]         # [C=4, H, W]

temporal_stack = np.concatenate([mean_img, std_img, diff_img], axis=0)  # [C=12, H, W]
print("Temporal aggregated shape:", temporal_stack.shape)


Temporal aggregated shape: (12, 264, 264)


In [17]:
def downsample(image, target_shape=(64, 64)):
    return resize(image, target_shape, mode='reflect', preserve_range=True, anti_aliasing=True)

downsampled = np.stack([downsample(temporal_stack[c]) for c in range(temporal_stack.shape[0])])
# Shape: [C=12, H=64, W=64]
print("Final downsampled shape:", downsampled.shape)


Final downsampled shape: (12, 64, 64)


In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class CNNTransformerEncoder(nn.Module):
    def __init__(self, input_channels=27, compressed_dim=1024):
        super(CNNTransformerEncoder, self).__init__()

        # CNN Encoder to extract spatial features
        self.cnn = nn.Sequential(
            nn.Conv2d(input_channels, 64, kernel_size=3, stride=2, padding=1),  # [B, 64, 132, 132]
            nn.BatchNorm2d(64),
            nn.ReLU(),

            nn.Conv2d(64, 128, kernel_size=3, stride=2, padding=1),  # [B, 128, 66, 66]
            nn.BatchNorm2d(128),
            nn.ReLU(),

            nn.Conv2d(128, 256, kernel_size=3, stride=2, padding=1),  # [B, 256, 33, 33]
            nn.BatchNorm2d(256),
            nn.ReLU(),

            nn.Conv2d(256, 512, kernel_size=3, stride=2, padding=1),  # [B, 512, 17, 17]
            nn.BatchNorm2d(512),
            nn.ReLU(),
        )

        # Flatten spatial dimensions to transformer tokens
        self.proj = nn.Linear(512, 128)
        self.transformer = nn.TransformerEncoder(
            nn.TransformerEncoderLayer(d_model=128, nhead=8, dim_feedforward=256, batch_first=True),
            num_layers=2
        )

        # Compress to 1024 features
        self.fc = nn.Linear(128, compressed_dim)

    def forward(self, x):
        x = self.cnn(x)  # [B, 512, 17, 17]
        B, C, H, W = x.shape
        x = x.view(B, C, H * W).permute(0, 2, 1)  # [B, 289, 512]
        x = self.proj(x)  # [B, 289, 128]
        x = self.transformer(x)  # [B, 289, 128]
        x = x.mean(dim=1)  # [B, 128]
        compressed = self.fc(x)  # [B, 1024]
        return compressed

# Example usage and saving to CSV
if __name__ == '__main__':
    import numpy as np
    import pandas as pd

    # Simulated preprocessed datacube [B, C=27, H=264, W=264]
    batch_size = 1
    datacube_ids = [f"cube_{i:05d}" for i in range(batch_size)]
    dummy_input = torch.randn(batch_size, 27, 264, 264)

    model = CNNTransformerEncoder()
    model.eval()

    with torch.no_grad():
        embeddings = model(dummy_input).numpy()  # [B, 1024]

    # Save to CSV with first column as datacube ID
    df = pd.DataFrame(embeddings, columns=[f"f{i+1}" for i in range(1024)])
    df.insert(0, "datacube_id", datacube_ids)
    df.to_csv("compressed_embeddings.csv", index=False)

    print("Compressed embeddings saved to 'compressed_embeddings.csv'")


Compressed embeddings saved to 'compressed_embeddings.csv'


: 

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import transforms
import numpy as np

# Simulated single preprocessed datacube: [C=12, H=64, W=64]
# Normally, you’d load this from your Zarr processing pipeline
preprocessed_sample = torch.randn(12, 64, 64)  # Placeholder

# Move to batch format
preprocessed_sample = preprocessed_sample.unsqueeze(0)  # [1, 12, 64, 64]

# ------------------------------------------------------------
# Data Augmentation for SimCLR
# ------------------------------------------------------------
augmentation = transforms.Compose([
    transforms.RandomResizedCrop(64, scale=(0.8, 1.0)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomApply([transforms.GaussianBlur(3)], p=0.5),
])

def apply_augments(image_tensor):
    # Apply torchvision transforms to each channel independently
    channels = []
    for c in image_tensor.squeeze(0):  # remove batch dim
        pil = transforms.ToPILImage()(c.unsqueeze(0))  # Convert to PIL
        aug = augmentation(pil)
        tensor = transforms.ToTensor()(aug).squeeze(0)  # Back to tensor
        channels.append(tensor)
    return torch.stack(channels).unsqueeze(0)  # Add batch dim back

# Generate (d1, d2)
d1 = apply_augments(preprocessed_sample)
d2 = apply_augments(preprocessed_sample)


