In [4]:
import zarr
import numpy as np

# Specify the folder where the Zarr dataset should be saved
zarr_store = zarr.open_group(r"D:\Eval.AI\train\ssl4eos12_train_seasonal_data_000001.zarr", mode='w')

# Create a dataset with chunking
zarr_store.create_dataset("images", shape=(1000, 256, 256, 3), chunks=(10, 256, 256, 3), dtype=np.uint8)

# Store some random samples
zarr_store["images"][:10] = np.random.randint(0, 255, (10, 256, 256, 3), dtype=np.uint8)

print("Zarr dataset successfully saved in a folder!")



Zarr dataset successfully saved in a folder!


In [6]:
!pip install zarr xarray numpy imageio dask





[notice] A new release of pip is available: 23.2.1 -> 25.0.1
[notice] To update, run: C:\Users\Dhina\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip



Collecting xarray
  Obtaining dependency information for xarray from https://files.pythonhosted.org/packages/91/fd/973deafd9f87085136a58573600646b408ae7af47859f35151f0d83d5090/xarray-2025.3.1-py3-none-any.whl.metadata
  Downloading xarray-2025.3.1-py3-none-any.whl.metadata (12 kB)
Collecting dask
  Obtaining dependency information for dask from https://files.pythonhosted.org/packages/bd/8a/3609033a4bfd7c9b3e8a4e8a5d6e318dfc06ab2e2d3b5cb0e01a60458858/dask-2025.3.0-py3-none-any.whl.metadata
  Downloading dask-2025.3.0-py3-none-any.whl.metadata (3.8 kB)
Collecting cloudpickle>=3.0.0 (from dask)
  Obtaining dependency information for cloudpickle>=3.0.0 from https://files.pythonhosted.org/packages/7e/e8/64c37fadfc2816a7701fa8a6ed8d87327c7d54eacfbfb6edab14a2f2be75/cloudpickle-3.1.1-py3-none-any.whl.metadata
  Downloading cloudpickle-3.1.1-py3-none-any.whl.metadata (7.1 kB)
Collecting partd>=1.4.0 (from dask)
  Obtaining dependency information for partd>=1.4.0 from https://files.pythonhosted

In [8]:
import xarray as xr
ds = xr.open_zarr('train/ssl4eos12_train_seasonal_data_000001.zarr.zip')  # load xarray dataset
data = ds.bands.values  # load numpy array with dims [B, T, C, H, W]

In [11]:
print(data.shape)  # Should show (B, T, C, H, W)



(64, 4, 2, 264, 264)


In [3]:
import xarray as xr
import numpy as np
import os
from PIL import Image

# Load the xarray dataset
ds = xr.open_zarr('train/ssl4eos12_train_seasonal_data_000001.zarr.zip')

# Extract the bands data as a NumPy array with shape [B, T, C, H, W]
data = ds.bands.values  

# Ensure the output directory exists
output_dir = os.path.join("train", "output")
os.makedirs(output_dir, exist_ok=True)

def transform_image(img, offset=1000):
    """Applies contrast stretching and normalization to the input image."""
    img = img - offset  # Apply offset correction

    # Compute 2nd and 98th percentile for contrast adjustment
    Q2, Q98 = np.quantile(img, [0.02, 0.98])

    # Adjust outlier values
    img = np.where(img >= Q2, img, Q2 + (img - Q2) * 0.5)
    img = np.where(img <= Q98, img, Q98 + (img - Q98) * 0.5)

    # Compute additional quantiles for normalization
    Q02, Q50, Q998 = np.quantile(img, [0.002, 0.5, 0.998])

    # Determine normalization limits
    U = max(2000, Q998)
    L = 0 if Q50 < 1000 else Q02

    # Normalize to 0-255 range
    img = (img - L) / (U - L) * 255
    img = np.clip(img, 0, 255)  # Ensure values stay in range

    return img.astype(np.uint8)

# Loop through each image and save as PNG
for b in range(data.shape[0]):  # B: batch index
    for t in range(data.shape[1]):  # T: time index
        for c in range(data.shape[2]):  # C: channel index
            img = transform_image(data[b, t, c])  # Transform the image
            img_path = os.path.join(output_dir, f"image_B{b}_T{t}_C{c}.png")  # Naming convention
            Image.fromarray(img).save(img_path)  # Save as PNG

print(f"Images saved in: {output_dir}")


Images saved in: train\output


In [5]:
import os
import matplotlib.pyplot as plt
from PIL import Image
import numpy as np
import xarray as xr

def normalize(img):
    """Normalize array to 0–255 for image saving."""
    img_min = img.min()
    img_max = img.max()
    return ((img - img_min) / (img_max - img_min + 1e-6) * 255).astype(np.uint8)

def save_image(img_array, path):
    """Save an image array to the given path."""
    img = Image.fromarray(img_array)
    img.save(path)

def extract_and_save_all_images(zarr_path, output_dir, band_indices=[0, 1, 2], max_timestamps=4):
    """Extracts all samples and up to 4 time steps from Sentinel-2 Zarr data, saves as images."""
    # Load dataset
    ds = xr.open_zarr(zarr_path)
    data = ds.bands.values  # shape: [B, T, C, H, W]
    B, T, C, H, W = data.shape
    print(f"Loaded data shape: {data.shape}")

    os.makedirs(output_dir, exist_ok=True)

    for b in range(B):
        for t in range(min(T, max_timestamps)):
            # Extract selected bands
            image = data[b, t, band_indices, :, :]  # shape: [3, H, W]

            # Normalize each band
            normalized_image = np.stack([normalize(image[i]) for i in range(len(band_indices))], axis=-1)

            # Save image
            filename = f"sample_{b}_time_{t}.png"
            save_path = os.path.join(output_dir, filename)
            save_image(normalized_image, save_path)

            print(f"Saved {filename}")

    print("All images extracted and saved.")

# === Run the function ===
zarr_file_path = r'D:\Eval.AI\train\ssl4eos12_train_seasonal_data_000001.zarr'
output_folder = "output_images"

extract_and_save_all_images(zarr_file_path, output_folder)




Loaded data shape: (64, 4, 3, 264, 264)
Saved sample_0_time_0.png
Saved sample_0_time_1.png
Saved sample_0_time_2.png
Saved sample_0_time_3.png
Saved sample_1_time_0.png
Saved sample_1_time_1.png
Saved sample_1_time_2.png
Saved sample_1_time_3.png
Saved sample_2_time_0.png
Saved sample_2_time_1.png
Saved sample_2_time_2.png
Saved sample_2_time_3.png
Saved sample_3_time_0.png
Saved sample_3_time_1.png
Saved sample_3_time_2.png
Saved sample_3_time_3.png
Saved sample_4_time_0.png
Saved sample_4_time_1.png
Saved sample_4_time_2.png
Saved sample_4_time_3.png
Saved sample_5_time_0.png
Saved sample_5_time_1.png
Saved sample_5_time_2.png
Saved sample_5_time_3.png
Saved sample_6_time_0.png
Saved sample_6_time_1.png
Saved sample_6_time_2.png
Saved sample_6_time_3.png
Saved sample_7_time_0.png
Saved sample_7_time_1.png
Saved sample_7_time_2.png
Saved sample_7_time_3.png
Saved sample_8_time_0.png
Saved sample_8_time_1.png
Saved sample_8_time_2.png
Saved sample_8_time_3.png
Saved sample_9_time_0.pn

In [15]:
import os
import numpy as np
import xarray as xr
import cv2
from skimage.filters import sobel
from scipy.ndimage import gaussian_filter
from tqdm import tqdm

# ---------- Configuration ----------
ZARR_PATH = 'D:\Eval.AI\train\ssl4eos12_train_seasonal_data_000001.zarr'
OUTPUT_DIR = './embeddings'
DOWNSAMPLED_SIZE = (64, 64)  # H, W
os.makedirs(OUTPUT_DIR, exist_ok=True)

# ---------- Load Data ----------
ds = xr.open_zarr(ZARR_PATH)
data = ds['bands'].values  # shape: [B, T=4, C=2, H, W]
print("Data shape:", data.shape)

# ---------- Helper Functions ----------
def normalize(img):
    img = img.astype(np.float32)
    return (img - img.min()) / (img.max() - img.min() + 1e-6)

def bilateral_denoise(img):
    img = normalize(img)
    return cv2.bilateralFilter((img * 255).astype(np.uint8), d=5, sigmaColor=75, sigmaSpace=75)

def extract_edge_map(img):
    return sobel(normalize(img))

def downsample(img, size):
    return cv2.resize(img, size[::-1], interpolation=cv2.INTER_AREA)

# ---------- Main Pipeline ----------
embeddings = []
for i in tqdm(range(data.shape[0])):  # For each cube
    temporal_images = []

    for t in range(4):  # For each season
        vv = data[i, t, 0]  # VV
        vh = data[i, t, 1]  # VH

        vv_denoised = bilateral_denoise(vv)
        vh_denoised = bilateral_denoise(vh)

        vv_edge = extract_edge_map(vv_denoised)
        vh_edge = extract_edge_map(vh_denoised)

        # Stack channels
        stacked = np.stack([
            normalize(vv_denoised),
            normalize(vh_denoised),
            vv_edge,
            vh_edge
        ], axis=0)  # Shape: [4, H, W]

        temporal_images.append(stacked)

    temporal_images = np.stack(temporal_images, axis=0)  # Shape: [T=4, 4, H, W]

    # ---------- Temporal Aggregation ----------
    mean_img = np.mean(temporal_images, axis=0)
    std_img = np.std(temporal_images, axis=0)
    diff_img = temporal_images[3] - temporal_images[0]

    agg_stack = np.concatenate([mean_img, std_img, diff_img], axis=0)  # Shape: [12, H, W]

    # ---------- Downsampling ----------
    downsampled = np.stack([downsample(agg_stack[c], DOWNSAMPLED_SIZE) for c in range(agg_stack.shape[0])], axis=0)  # [12, 64, 64]

    # ---------- Neural Encoding Placeholder ----------
    # Flatten to 1D (pretend we passed through neural encoder)
    embedding = downsampled.flatten()[:1024]  # Pad or truncate
    if embedding.shape[0] < 1024:
        embedding = np.pad(embedding, (0, 1024 - embedding.shape[0]))

    embeddings.append(np.concatenate([[i], embedding]))

# ---------- Save Embeddings ----------
embeddings = np.array(embeddings)
np.savetxt(os.path.join(OUTPUT_DIR, 'sar_embeddings.csv'), embeddings, delimiter=',', fmt='%.6f')
print("✅ Embeddings saved to:", os.path.join(OUTPUT_DIR, 'sar_embeddings.csv'))


FileNotFoundError: No such file or directory: 'D:\Eval.AI	rain\ssl4eos12_train_seasonal_data_000001.zarr'

In [13]:
!pip install -U scikit-image


Collecting scikit-image
  Obtaining dependency information for scikit-image from https://files.pythonhosted.org/packages/5f/ee/c53a009e3997dda9d285402f19226fbd17b5b3cb215da391c4ed084a1424/scikit_image-0.25.2-cp310-cp310-win_amd64.whl.metadata
  Downloading scikit_image-0.25.2-cp310-cp310-win_amd64.whl.metadata (14 kB)
Collecting scipy>=1.11.4 (from scikit-image)
  Obtaining dependency information for scipy>=1.11.4 from https://files.pythonhosted.org/packages/d0/d2/f0683b7e992be44d1475cc144d1f1eeae63c73a14f862974b4db64af635e/scipy-1.15.2-cp310-cp310-win_amd64.whl.metadata
  Downloading scipy-1.15.2-cp310-cp310-win_amd64.whl.metadata (60 kB)
     ---------------------------------------- 0.0/60.8 kB ? eta -:--:--
     ---------------------------------------- 60.8/60.8 kB ? eta 0:00:00
Collecting pillow>=10.1 (from scikit-image)
  Obtaining dependency information for pillow>=10.1 from https://files.pythonhosted.org/packages/14/81/d0dff759a74ba87715509af9f6cb21fa21d93b02b3316ed43bda83664db9

ERROR: Could not install packages due to an OSError: [WinError 5] Access is denied: 'C:\\Users\\Dhina\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python310\\site-packages\\~cipy.libs\\libopenblas-802f9ed1179cb9c9b03d67ff79f48187.dll'
Check the permissions.


[notice] A new release of pip is available: 23.2.1 -> 25.0.1
[notice] To update, run: C:\Users\Dhina\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip
