In [3]:
import os
import numpy as np
import xarray as xr
from pystac_client import Client

# Path to the directory containing NetCDF files
netcdf_directory = "ai4arctic-sea-ice-challenge-ready-to-train/v2/ready-to-train_train"

# STAC API client setup
client = Client.open("https://catalogue.dataspace.copernicus.eu/stac") 
collection = "SENTINEL-1"

# Initialize an empty list to collect matching filtered items
matching_items = []

# Iterate through each NetCDF file in the directory
for netcdf_file in os.listdir(netcdf_directory):
    if netcdf_file.endswith('.nc'):
        # Read the NetCDF file
        ds = xr.open_dataset(os.path.join(netcdf_directory, netcdf_file))
        
        # Calculate the bounding box
        lat_min = np.min(ds.sar_grid2d_latitude.values)
        lat_max = np.max(ds.sar_grid2d_latitude.values)
        lon_min = np.min(ds.sar_grid2d_longitude.values)
        lon_max = np.max(ds.sar_grid2d_longitude.values)

        # Define the bounding box (min_lon, min_lat, max_lon, max_lat)
        bbox = [lon_min, lat_min, lon_max, lat_max]

        # Extract the date from the scene ID in attributes (if exists)
        scene_id = ds.attrs.get('scene_id', '')  # Adjust based on actual attribute name
        date = scene_id[:8] if scene_id else ''  # Extract date part

        # Perform the Search with bbox and datetime
        search = client.search(
            collections=[collection],
            bbox=bbox,
            datetime=f"{date[:4]}-{date[4:6]}-{date[6:8]}",
            limit=100
        )

        # Fetch All Matching Items
        try:
            items = list(search.get_items())
            print(f"Found {len(items)} items matching bbox and datetime criteria for {netcdf_file}.")
        except Exception as e:
            print(f"An error occurred during the search: {e}")
            continue

        # Initialize an empty list to store filtered items
        filtered_items = []

        # Iterate through retrieved items and filter by 's1:instrument_mode' and 'processingLevel'
        for item in items:
            instrument_mode = item.properties.get('operationalMode', '')
            processing_level = item.properties.get('processingLevel', '')
            
            # Check if both conditions are met
            if instrument_mode == 'EW' and processing_level == 'LEVEL1':
                filtered_items.append(item)

        print(f"Filtered down to {len(filtered_items)} EW mode and LEVEL1 items for {netcdf_file}.")

        # Get the original ID from the dataset attributes
        original_id = ds.attrs.get('original_id', '')  # Adjust based on actual attribute name
        # Extract the part of original_id before '_icechart'
        original_id_prefix = original_id.split('_icechart')[0] if '_icechart' in original_id else original_id

        # Match the original_id with filtered items
        for filtered_item in filtered_items:
            filtered_id = filtered_item.id.split('.')[0]  # Remove the ".SAFE" part
            if original_id_prefix in filtered_id:  # Check for a match
                matching_items.append(filtered_item)
                break  # Stop after finding the first match

# Output the matched items
print(f"Total matching items collected: {len(matching_items)}")
for item in matching_items:
    print(item.id)




Found 68 items matching bbox and datetime criteria for 20201229T104119_dmi_prep.nc.
Filtered down to 42 EW mode and LEVEL1 items for 20201229T104119_dmi_prep.nc.
Found 32 items matching bbox and datetime criteria for 20190831T144552_cis_prep.nc.
Filtered down to 10 EW mode and LEVEL1 items for 20190831T144552_cis_prep.nc.
Found 27 items matching bbox and datetime criteria for 20190612T101220_cis_prep.nc.
Filtered down to 12 EW mode and LEVEL1 items for 20190612T101220_cis_prep.nc.
Found 16 items matching bbox and datetime criteria for 20190411T102726_cis_prep.nc.
Filtered down to 12 EW mode and LEVEL1 items for 20190411T102726_cis_prep.nc.
Found 16 items matching bbox and datetime criteria for 20210509T104806_dmi_prep.nc.
Filtered down to 6 EW mode and LEVEL1 items for 20210509T104806_dmi_prep.nc.
Found 35 items matching bbox and datetime criteria for 20210728T120930_dmi_prep.nc.
Filtered down to 15 EW mode and LEVEL1 items for 20210728T120930_dmi_prep.nc.
Found 36 items matching bbox 

In [9]:
from oauthlib.oauth2 import BackendApplicationClient
from requests_oauthlib import OAuth2Session

# Your client credentials
client_id = '###'
client_secret = '###'

# Create a session
client = BackendApplicationClient(client_id=client_id)
oauth = OAuth2Session(client=client)

# Get token for the session
token = oauth.fetch_token(token_url='https://identity.dataspace.copernicus.eu/auth/realms/CDSE/protocol/openid-connect/token',
                          client_secret=client_secret, include_client_id=True)

def sentinelhub_compliance_hook(response):
    response.raise_for_status()
    return response

oauth.register_compliance_hook("access_token_response", sentinelhub_compliance_hook)
# All requests using this session will have an access token automatically added
resp = oauth.get("https://sh.dataspace.copernicus.eu/configuration/v1/wms/instances")
print(resp.content)



b'[]'


In [13]:
import requests
def get_keycloak(username: str, password: str) -> str:
    data = {
        "client_id": "cdse-public",
        "username": username,
        "password": password,
        "grant_type": "password",
    }
    try:
        r = requests.post(
            "https://identity.dataspace.copernicus.eu/auth/realms/CDSE/protocol/openid-connect/token",
            data=data,
        )
        r.raise_for_status()
    except Exception as e:
        raise Exception(
            f"Keycloak token creation failed. Reponse from the server was: {r.json()}"
        )
    return r.json()["access_token"]

keycloak_token = get_keycloak('###','###')
keycloak_token

'eyJhbGciOiJSUzI1NiIsInR5cCIgOiAiSldUIiwia2lkIiA6ICJYVUh3VWZKaHVDVWo0X3k4ZF8xM0hxWXBYMFdwdDd2anhob2FPLUxzREZFIn0.eyJleHAiOjE3MzE1Mzc2NzIsImlhdCI6MTczMTUzNzA3MiwianRpIjoiZjk1NmQyMmEtMWNmMC00Nzg1LTg1OWYtMmE5MzcwZGY4NWJkIiwiaXNzIjoiaHR0cHM6Ly9pZGVudGl0eS5kYXRhc3BhY2UuY29wZXJuaWN1cy5ldS9hdXRoL3JlYWxtcy9DRFNFIiwiYXVkIjpbIkNMT1VERkVSUk9fUFVCTElDIiwiYWNjb3VudCJdLCJzdWIiOiIyYWMyNDIwZS1kM2MwLTQyNWYtYTY2OS1jYWY4YjA3N2QxZjMiLCJ0eXAiOiJCZWFyZXIiLCJhenAiOiJjZHNlLXB1YmxpYyIsInNlc3Npb25fc3RhdGUiOiIyMTA3NDM4OS1jOGViLTQ0NzAtYTQxNC1hMGRjNWE3MjgzOWEiLCJhbGxvd2VkLW9yaWdpbnMiOlsiaHR0cHM6Ly9sb2NhbGhvc3Q6NDIwMCIsIioiLCJodHRwczovL3dvcmtzcGFjZS5zdGFnaW5nLWNkc2UtZGF0YS1leHBsb3Jlci5hcHBzLnN0YWdpbmcuaW50cmEuY2xvdWRmZXJyby5jb20iXSwicmVhbG1fYWNjZXNzIjp7InJvbGVzIjpbIm9mZmxpbmVfYWNjZXNzIiwidW1hX2F1dGhvcml6YXRpb24iLCJkZWZhdWx0LXJvbGVzLWNkYXMiLCJjb3Blcm5pY3VzLWdlbmVyYWwiXX0sInJlc291cmNlX2FjY2VzcyI6eyJhY2NvdW50Ijp7InJvbGVzIjpbIm1hbmFnZS1hY2NvdW50IiwibWFuYWdlLWFjY291bnQtbGlua3MiLCJ2aWV3LXByb2ZpbGUiXX19LCJzY29wZSI6IkFVREl

## Get Safe Products

In [15]:
import os
import requests
import time
from datetime import datetime, timedelta

# Ensure the directory exists
output_dir = 'ai4arctic-sea-ice-challenge-ready-to-train/v2/Cop_Matching/'
os.makedirs(output_dir, exist_ok=True)

# Global variables to store the token and its expiration time
keycloak_token = None
token_expiration_time = None

def get_keycloak(username: str, password: str) -> str:
    global keycloak_token, token_expiration_time
    
    # If the token exists and hasn't expired, return it
    if keycloak_token and token_expiration_time > datetime.utcnow():
        return keycloak_token
    
    # Otherwise, request a new token
    data = {
        "client_id": "cdse-public",
        "username": username,
        "password": password,
        "grant_type": "password",
    }
    try:
        r = requests.post(
            "https://identity.dataspace.copernicus.eu/auth/realms/CDSE/protocol/openid-connect/token",
            data=data,
        )
        r.raise_for_status()
    except Exception as e:
        raise Exception(f"Keycloak token creation failed. Response from the server was: {r.json()}")
    
    # Extract the access token and expiration time (in seconds)
    token_data = r.json()
    keycloak_token = token_data["access_token"]
    expires_in = token_data.get("expires_in", 600)  # Default to 600 if not provided
    
    # Calculate the expiration time
    token_expiration_time = datetime.utcnow() + timedelta(seconds=expires_in)
    
    print(f"Token will expire at: {token_expiration_time}")
    
    return keycloak_token

# Function to download the item
def download_item(item):
    # Get the valid Keycloak token
    token = get_keycloak('###', '###')
    
    # Access the 'PRODUCT' asset to get the download link
    product_asset = item.assets.get('PRODUCT')
    
    if product_asset:
        # Extract the product ID from the asset URL
        product_id = product_asset.href.split('(')[1].split(')')[0]  # Extract the product ID
        
        # Construct the download URL for the ZIP file
        download_url = f'https://zipper.dataspace.copernicus.eu/odata/v1/Products({product_id})/$value'

        print(f"Downloading from: {download_url}")
        
        # Optional: Use headers for authorization if needed
        headers = {
            'Authorization': f"Bearer {token}"  # Use the valid token here
        }
        
        # Download the file
        response = requests.get(download_url, headers=headers)
        
        if response.status_code == 200:
            # Save the file as a ZIP
            file_name = os.path.join(output_dir, f"{item.id}.zip")  # Change the extension to .zip
            with open(file_name, 'wb') as f:
                f.write(response.content)
            print(f"Downloaded: {file_name}")
        else:
            print(f"Failed to download {item.id} with status code: {response.status_code}")
            print(f"Error Message: {response.text if response.text else 'No additional error message'}")
    else:
        print(f"No PRODUCT asset found for {item.id}")

# Iterate through filtered items and download each
for item in matching_items:
    download_item(item)


Token will expire at: 2024-11-13 22:45:34.152218
Downloading from: https://zipper.dataspace.copernicus.eu/odata/v1/Products(ce0bc2f4-700c-50e4-92b7-9a625df89912)/$value
Downloaded: ai4arctic-sea-ice-challenge-ready-to-train/v2/Cop_Matching/S1B_EW_GRDM_1SDH_20201229T104119_20201229T104219_024916_02F703_C262.SAFE.zip
Downloading from: https://zipper.dataspace.copernicus.eu/odata/v1/Products(94ea1e1c-61c5-54fe-8898-e5c783faac10)/$value
Downloaded: ai4arctic-sea-ice-challenge-ready-to-train/v2/Cop_Matching/S1B_EW_GRDM_1SDH_20190831T144552_20190831T144652_017831_0218EB_1D2C.SAFE.zip
Downloading from: https://zipper.dataspace.copernicus.eu/odata/v1/Products(ee58b06a-614a-5fd5-837f-de768fff7545)/$value
Downloaded: ai4arctic-sea-ice-challenge-ready-to-train/v2/Cop_Matching/S1A_EW_GRDM_1SDH_20190612T101220_20190612T101253_027645_031ECD_9713.SAFE.zip
Downloading from: https://zipper.dataspace.copernicus.eu/odata/v1/Products(4d6d0585-78b4-52a0-98d3-3ca23c4d12ce)/$value
Downloaded: ai4arctic-sea-i

## Find HH and HV and save them 

In [1]:
import os
import zipfile
import xarray as xr
import rasterio
import numpy as np
from io import BytesIO

# Paths
source_dir = r'ai4arctic-sea-ice-challenge-ready-to-train/v2/Cop_Matching/'
netcdf_dir = r'ai4arctic-sea-ice-challenge-ready-to-train/v2/Cop_Matching/netcdfs/'

# Ensure the NetCDF directory exists
os.makedirs(netcdf_dir, exist_ok=True)

# Function to read TIFF data directly from bytes and add it as a DataArray
def read_tiff_from_bytes(tiff_bytes, variable_name):
    print(f"Reading {variable_name} TIFF data into memory...")
    with rasterio.open(BytesIO(tiff_bytes)) as src:
        data = src.read(1)  # Read the first band
        coords = {
            'y': np.arange(data.shape[0]),
            'x': np.arange(data.shape[1])
        }
        da = xr.DataArray(data, dims=['y', 'x'], coords=coords, name=variable_name)
    print(f"Finished reading {variable_name} data.")
    return da

# Counter for processed files
total_files = len([f for f in os.listdir(source_dir) if f.endswith('.zip')])
processed_files = 0

print(f"Found {total_files} ZIP files to process.")

# Loop through all ZIP files
for file_name in os.listdir(source_dir):
    if file_name.endswith('.zip'):
        zip_path = os.path.join(source_dir, file_name)
        base_name = os.path.splitext(file_name)[0]  # Name without extension

        # Create NetCDF file for each ZIP file
        netcdf_path = os.path.join(netcdf_dir, f"{base_name}.nc")
        datasets = []

        print(f"\nProcessing ZIP file: {file_name}")

        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
            hh_data = None
            hv_data = None

            # Track the number of TIFF files processed in this ZIP
            tiff_count = 0

            for zip_info in zip_ref.infolist():
                # Only process files in the "measurement" folder
                if 'measurement' in zip_info.filename and zip_info.filename.endswith('.tiff'):
                    print(f"Found TIFF file: {zip_info.filename}")
                    tiff_bytes = zip_ref.read(zip_info.filename)
                    tiff_count += 1

                    # Check if the file name contains "hh" or "hv"
                    if 'hh' in zip_info.filename.lower():
                        hh_data = read_tiff_from_bytes(tiff_bytes, 'hh')
                    elif 'hv' in zip_info.filename.lower():
                        hv_data = read_tiff_from_bytes(tiff_bytes, 'hv')

            print(f"Processed {tiff_count} TIFF files from {file_name}.")

            # Combine HH and HV data into a single xarray Dataset
            if hh_data is not None and hv_data is not None:
                ds = xr.Dataset({'hh': hh_data, 'hv': hv_data})
                datasets.append(ds)
                print(f"Created xarray Dataset for {file_name}.")

        # Write the combined dataset to NetCDF
        if datasets:
            combined_ds = xr.combine_by_coords(datasets)
            combined_ds.to_netcdf(netcdf_path)
            print(f"Saved NetCDF: {netcdf_path}")

        # Increment processed file count and print progress
        processed_files += 1
        print(f"Completed {processed_files}/{total_files} ZIP files.")

print("All TIFF files have been saved in NetCDF files!")


Found 510 ZIP files to process.

Processing ZIP file: S1B_EW_GRDM_1SDH_20180621T214028_20180621T214132_011477_01516D_E6E0.SAFE.zip
Found TIFF file: S1B_EW_GRDM_1SDH_20180621T214028_20180621T214132_011477_01516D_E6E0.SAFE/measurement/s1b-ew-grd-hh-20180621t214028-20180621t214132-011477-01516d-001.tiff
Reading hh TIFF data into memory...
Finished reading hh data.
Found TIFF file: S1B_EW_GRDM_1SDH_20180621T214028_20180621T214132_011477_01516D_E6E0.SAFE/measurement/s1b-ew-grd-hv-20180621t214028-20180621t214132-011477-01516d-002.tiff
Reading hv TIFF data into memory...
Finished reading hv data.
Processed 2 TIFF files from S1B_EW_GRDM_1SDH_20180621T214028_20180621T214132_011477_01516D_E6E0.SAFE.zip.
Created xarray Dataset for S1B_EW_GRDM_1SDH_20180621T214028_20180621T214132_011477_01516D_E6E0.SAFE.zip.
Saved NetCDF: ai4arctic-sea-ice-challenge-ready-to-train/v2/Cop_Matching/netcdfs/S1B_EW_GRDM_1SDH_20180621T214028_20180621T214132_011477_01516D_E6E0.SAFE.nc
Completed 1/510 ZIP files.

Process

## Downsample Images

In [2]:
import os
import xarray as xr
import numpy as np

# Path to original and downsampled NetCDF files
netcdf_dir = r'ai4arctic-sea-ice-challenge-ready-to-train/v2/Cop_Matching/netcdfs/'
downsampled_dir = r'ai4arctic-sea-ice-challenge-ready-to-train/v2/Cop_Matching/downsampled/'

# Ensure the downsampled directory exists
os.makedirs(downsampled_dir, exist_ok=True)

# Downsample function
def downsample_image(image, scale_factor=2):
    """Downsamples the image using a 2x2 averaging kernel."""
    if image.shape[0] % scale_factor != 0 or image.shape[1] % scale_factor != 0:
        image = image[:image.shape[0] - image.shape[0] % scale_factor,
                      :image.shape[1] - image.shape[1] % scale_factor]

    downsampled_image = image.reshape(
        (image.shape[0] // scale_factor, scale_factor, 
         image.shape[1] // scale_factor, scale_factor)).mean(axis=(1, 3))
    
    return downsampled_image

# Loop through all NetCDF files
for file_name in os.listdir(netcdf_dir):
    if file_name.endswith('.nc'):
        netcdf_path = os.path.join(netcdf_dir, file_name)
        downsampled_netcdf_path = os.path.join(downsampled_dir, file_name)  # Save to downsampled folder
        
        try:
            # Open the original NetCDF file in read-only mode
            with xr.open_dataset(netcdf_path, mode='r') as ds:
                # Create a new dataset for downsampled data
                downsampled_ds = xr.Dataset()

                # Downsample and save the 'hh' and 'hv' datasets
                if 'hh' in ds.variables:
                    hh_image = ds['hh'].values  # Extract original image data
                    downsampled_hh = downsample_image(hh_image)  # Downsample it
                    # Create a new variable in the downsampled dataset with downsampled data
                    downsampled_ds['hh'] = (('y', 'x'), downsampled_hh)

                if 'hv' in ds.variables:
                    hv_image = ds['hv'].values  # Extract original image data
                    downsampled_hv = downsample_image(hv_image)  # Downsample it
                    # Create a new variable in the downsampled dataset with downsampled data
                    downsampled_ds['hv'] = (('y', 'x'), downsampled_hv)

                # Carry over the attributes from the original dataset (optional)
                downsampled_ds.attrs = ds.attrs

                # Optionally, copy over other metadata or dimensions if needed
                downsampled_ds['x'] = (('x',), np.arange(downsampled_hh.shape[1]))
                downsampled_ds['y'] = (('y',), np.arange(downsampled_hh.shape[0]))

                # Save the downsampled dataset to the new NetCDF file
                downsampled_ds.to_netcdf(downsampled_netcdf_path)
                print(f"Downsampled file saved as: {downsampled_netcdf_path}")
                
        except Exception as e:
            print(f"Error processing file {file_name}: {e}")

print("Downsampling completed and saved in the downsampled directory.")


Downsampled file saved as: ai4arctic-sea-ice-challenge-ready-to-train/v2/Cop_Matching/downsampled/S1A_EW_GRDM_1SDH_20210318T080139_20210318T080244_037050_045C47_5A55.SAFE.nc
Downsampled file saved as: ai4arctic-sea-ice-challenge-ready-to-train/v2/Cop_Matching/downsampled/S1A_EW_GRDM_1SDH_20200213T203546_20200213T203651_031239_0397B5_2082.SAFE.nc
Downsampled file saved as: ai4arctic-sea-ice-challenge-ready-to-train/v2/Cop_Matching/downsampled/S1B_EW_GRDM_1SDH_20210215T072003_20210215T072103_025614_030D70_5663.SAFE.nc
Downsampled file saved as: ai4arctic-sea-ice-challenge-ready-to-train/v2/Cop_Matching/downsampled/S1A_EW_GRDM_1SDH_20211029T203602_20211029T203706_040339_04C7D6_0B08.SAFE.nc
Downsampled file saved as: ai4arctic-sea-ice-challenge-ready-to-train/v2/Cop_Matching/downsampled/S1A_EW_GRDM_1SDH_20210411T174728_20210411T174833_037406_04689E_23A6.SAFE.nc
Downsampled file saved as: ai4arctic-sea-ice-challenge-ready-to-train/v2/Cop_Matching/downsampled/S1B_EW_GRDM_1SDH_20190831T130738

## Find Matching Files; Resize to 1024; Save them

In [3]:
import os
import xarray as xr
import numpy as np
from skimage.transform import resize
from scipy.ndimage import zoom

# Paths to your directories
train_dir = r'ai4arctic-sea-ice-challenge-ready-to-train/v2/ready-to-train_train/'
downsampled_dir = r'ai4arctic-sea-ice-challenge-ready-to-train/v2/Cop_Matching/downsampled/'
output_dir = r'ai4arctic-sea-ice-challenge-ready-to-train/v2/Cop_Matching/downsampled_sod_final/'

# Create the output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)

# Function to resize arrays (HH, HV)
def resize_array(data_array, new_shape=(1024, 1024)):
    return resize(data_array, new_shape, mode='reflect', anti_aliasing=True)

# Function to resize binary ice map (SOD)
def resize_array_binary(data_array, new_shape=(1024, 1024)):
    zoom_factors = (new_shape[0] / data_array.shape[0], new_shape[1] / data_array.shape[1])
    return zoom(data_array, zoom_factors, order=0)

# Get list of training and downsampled files
training_files = [f for f in os.listdir(train_dir) if f.endswith(".nc")]
downsampled_files = [f for f in os.listdir(downsampled_dir) if f.endswith(".nc")]

# Loop through training files
for training_file in training_files:
    # Extract the date part before the first underscore
    training_date = training_file.split('_')[0]

    # Find matching downsampled file
    matching_downsampled = None
    for downsampled_file in downsampled_files:
        if training_date in downsampled_file:
            matching_downsampled = downsampled_file
            break

    # If a matching downsampled file is found
    if matching_downsampled:
        print(f"Matching files found: {training_file} and {matching_downsampled}")
        
        # Open both NetCDF files
        training_nc = xr.open_dataset(os.path.join(train_dir, training_file))
        downsampled_nc = xr.open_dataset(os.path.join(downsampled_dir, matching_downsampled))

        # Extract SOD from training file
        if 'SOD' in training_nc:
            sod_data = training_nc['SOD'].values
            sod_resized = resize_array_binary(sod_data)

        # Extract HH and HV from downsampled file
        if 'hh' in downsampled_nc and 'hv' in downsampled_nc:
            hh_data = downsampled_nc['hh'].values
            hv_data = downsampled_nc['hv'].values
            hh_resized = resize_array(hh_data)
            hv_resized = resize_array(hv_data)

            # Create a new xarray Dataset
            new_ds = xr.Dataset({
                'hh': (['y', 'x'], hh_resized),
                'hv': (['y', 'x'], hv_resized),
                'SOD': (['y', 'x'], sod_resized)
            })

            # Define the output file path
            output_file = os.path.join(output_dir, f"{training_date}_resized.nc")

            # Save the new dataset to NetCDF
            new_ds.to_netcdf(output_file, mode='w')
            print(f"Saved resized NetCDF: {output_file}")
        
        # Close the datasets
        training_nc.close()
        downsampled_nc.close()
    else:
        print(f"No matching downsampled file found for {training_file}")

print("All matching files have been processed and saved in the output directory.")


Matching files found: 20201229T104119_dmi_prep.nc and S1B_EW_GRDM_1SDH_20201229T104119_20201229T104219_024916_02F703_C262.SAFE.nc
Saved resized NetCDF: ai4arctic-sea-ice-challenge-ready-to-train/v2/Cop_Matching/downsampled_sod_final/20201229T104119_resized.nc
Matching files found: 20190831T144552_cis_prep.nc and S1B_EW_GRDM_1SDH_20190831T144552_20190831T144652_017831_0218EB_1D2C.SAFE.nc
Saved resized NetCDF: ai4arctic-sea-ice-challenge-ready-to-train/v2/Cop_Matching/downsampled_sod_final/20190831T144552_resized.nc
Matching files found: 20190612T101220_cis_prep.nc and S1A_EW_GRDM_1SDH_20190612T101120_20190612T101220_027645_031ECD_9403.SAFE.nc
Saved resized NetCDF: ai4arctic-sea-ice-challenge-ready-to-train/v2/Cop_Matching/downsampled_sod_final/20190612T101220_resized.nc
Matching files found: 20190411T102726_cis_prep.nc and S1A_EW_GRDM_1SDH_20190411T102726_20190411T102827_026741_0300CC_648B.SAFE.nc
Saved resized NetCDF: ai4arctic-sea-ice-challenge-ready-to-train/v2/Cop_Matching/downsampl

## Modify SOD

In [5]:
import os
import xarray as xr
import numpy as np
from skimage.transform import resize
from scipy.ndimage import zoom

# Paths to your directories
train_dir = r'ai4arctic-sea-ice-challenge-ready-to-train/v2/ready-to-train_train/'
downsampled_dir = r'ai4arctic-sea-ice-challenge-ready-to-train/v2/Cop_Matching/downsampled/'
output_dir = r'ai4arctic-sea-ice-challenge-ready-to-train/v2/Cop_Matching/downsampled_sod_final/'

# Create the output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)

# Function to resize arrays (HH, HV)
def resize_array(data_array, new_shape=(1024, 1024)):
    return resize(data_array, new_shape, mode='reflect', anti_aliasing=True)

# Function to resize binary ice map (SOD)
def resize_array_binary(data_array, new_shape=(1024, 1024)):
    zoom_factors = (new_shape[0] / data_array.shape[0], new_shape[1] / data_array.shape[1])
    return zoom(data_array, zoom_factors, order=0)

# Function to modify SOD values
def modify_sod_values(sod_data):
    # Set values > 0 and < 255 to 1
    sod_data = np.where((sod_data > 0) & (sod_data < 255), 1, sod_data)
    # Set values of 255 to NaN
    sod_data = np.where(sod_data == 255, np.nan, sod_data)
    return sod_data

# Get list of training and downsampled files
training_files = [f for f in os.listdir(train_dir) if f.endswith(".nc")]
downsampled_files = [f for f in os.listdir(downsampled_dir) if f.endswith(".nc")]

# Loop through training files
for training_file in training_files:
    # Extract the date part before the first underscore
    training_date = training_file.split('_')[0]

    # Find matching downsampled file
    matching_downsampled = None
    for downsampled_file in downsampled_files:
        if training_date in downsampled_file:
            matching_downsampled = downsampled_file
            break

    # If a matching downsampled file is found
    if matching_downsampled:
        print(f"Matching files found: {training_file} and {matching_downsampled}")
        
        # Open both NetCDF files
        training_nc = xr.open_dataset(os.path.join(train_dir, training_file))
        downsampled_nc = xr.open_dataset(os.path.join(downsampled_dir, matching_downsampled))

        # Extract and modify SOD from training file
        if 'SOD' in training_nc:
            sod_data = training_nc['SOD'].values
            sod_modified = modify_sod_values(sod_data)
            sod_resized = resize_array_binary(sod_modified)

        # Extract HH and HV from downsampled file
        if 'hh' in downsampled_nc and 'hv' in downsampled_nc:
            hh_data = downsampled_nc['hh'].values
            hv_data = downsampled_nc['hv'].values
            hh_resized = resize_array(hh_data)
            hv_resized = resize_array(hv_data)

            # Create a new xarray Dataset
            new_ds = xr.Dataset({
                'hh': (['y', 'x'], hh_resized),
                'hv': (['y', 'x'], hv_resized),
                'SOD': (['y', 'x'], sod_resized)
            })

            # Define the output file path
            output_file = os.path.join(output_dir, f"{training_date}_resized.nc")

            # Save the new dataset to NetCDF
            new_ds.to_netcdf(output_file, mode='w')
            print(f"Saved modified and resized NetCDF: {output_file}")
        
        # Close the datasets
        training_nc.close()
        downsampled_nc.close()
    else:
        print(f"No matching downsampled file found for {training_file}")

print("All matching files have been processed and saved in the output directory.")


Matching files found: 20201229T104119_dmi_prep.nc and S1B_EW_GRDM_1SDH_20201229T104119_20201229T104219_024916_02F703_C262.SAFE.nc
Saved modified and resized NetCDF: ai4arctic-sea-ice-challenge-ready-to-train/v2/Cop_Matching/downsampled_sod_final/20201229T104119_resized.nc
Matching files found: 20190831T144552_cis_prep.nc and S1B_EW_GRDM_1SDH_20190831T144552_20190831T144652_017831_0218EB_1D2C.SAFE.nc
Saved modified and resized NetCDF: ai4arctic-sea-ice-challenge-ready-to-train/v2/Cop_Matching/downsampled_sod_final/20190831T144552_resized.nc
Matching files found: 20190612T101220_cis_prep.nc and S1A_EW_GRDM_1SDH_20190612T101120_20190612T101220_027645_031ECD_9403.SAFE.nc
Saved modified and resized NetCDF: ai4arctic-sea-ice-challenge-ready-to-train/v2/Cop_Matching/downsampled_sod_final/20190612T101220_resized.nc
Matching files found: 20190411T102726_cis_prep.nc and S1A_EW_GRDM_1SDH_20190411T102726_20190411T102827_026741_0300CC_648B.SAFE.nc
Saved modified and resized NetCDF: ai4arctic-sea-ic

## Modify SOD; Manipulate HH and Hv and set to 0

In [4]:
import os
import xarray as xr
import numpy as np
from skimage.transform import resize

# Function to resize arrays
def resize_array(data_array, new_shape=(1024, 1024)):
    return resize(data_array, new_shape, mode='reflect', anti_aliasing=True)

# Directories
train_dir = r'ai4arctic-sea-ice-challenge-ready-to-train/v2/ready-to-train_train'
output_dir = r'ai4arctic-sea-ice-challenge-ready-to-train/v2/Cop_Matching/downsampled_sod_final/'
new_output_dir = r'ai4arctic-sea-ice-challenge-ready-to-train/v2/Cop_Matching/downsampled_sod_modfinal/'

# Create the new output directory if it doesn't exist
os.makedirs(new_output_dir, exist_ok=True)

# Get list of training files
train_files = [f for f in os.listdir(train_dir) if f.endswith(".nc")]

# Loop through training files
for train_file in train_files:
    # Extract the date part between "clipped_" and the second underscore
    train_date = train_file.split('_')[0]

    # Find matching output file based on the date (assuming the output filenames contain the date)
    matching_output_file = None
    for output_file in os.listdir(output_dir):
        if train_date in output_file:
            matching_output_file = output_file
            break

    # If a matching output file is found
    if matching_output_file:
        print(f"Matching files found: {train_file} and {matching_output_file}")

        # Open the training and output files
        train_nc = xr.open_dataset(os.path.join(train_dir, train_file))
        output_nc = xr.open_dataset(os.path.join(output_dir, matching_output_file))

        # Resize the 'nersc_sar_primary' and 'nersc_sar_secondary' to 1024x1024
        resized_nersc_sar_primary = resize_array(train_nc['nersc_sar_primary'].values, new_shape=(1024, 1024))
        resized_nersc_sar_secondary = resize_array(train_nc['nersc_sar_secondary'].values, new_shape=(1024, 1024))

        # Modify hh based on 'nersc_sar_primary'
        hh = output_nc['hh'].data  # Extracting the underlying NumPy array
        hh = np.where(resized_nersc_sar_primary != 0, hh, 0)

        # Modify hv based on 'nersc_sar_secondary'
        hv = output_nc['hv'].data  # Extracting the underlying NumPy array
        hv = np.where(resized_nersc_sar_secondary != 0, hv, 0)

        # Get the 'sod' variable from the output file
        sod = output_nc['SOD'].data  # Extracting the underlying NumPy array

        # Create a new dataset with modified hh, hv, and existing sod
        new_output_nc = xr.Dataset(
            {
                'hh': (['x', 'y'], hh),
                'hv': (['x', 'y'], hv),
                'SOD': (['x', 'y'], sod)
            },
            coords={'x': output_nc['hh'].coords['x'], 'y': output_nc['hh'].coords['y']}
        )

        # Save the new dataset to the new output directory
        new_output_file = os.path.join(new_output_dir, matching_output_file)
        new_output_nc.to_netcdf(new_output_file)

        print(f"Saved modified file: {new_output_file}")

        # Close the datasets
        train_nc.close()
        output_nc.close()
        new_output_nc.close()

print("Processing complete.")


Matching files found: 20201229T104119_dmi_prep.nc and 20201229T104119_resized.nc
Saved modified file: ai4arctic-sea-ice-challenge-ready-to-train/v2/Cop_Matching/downsampled_sod_modfinal/20201229T104119_resized.nc
Matching files found: 20190831T144552_cis_prep.nc and 20190831T144552_resized.nc
Saved modified file: ai4arctic-sea-ice-challenge-ready-to-train/v2/Cop_Matching/downsampled_sod_modfinal/20190831T144552_resized.nc
Matching files found: 20190612T101220_cis_prep.nc and 20190612T101220_resized.nc
Saved modified file: ai4arctic-sea-ice-challenge-ready-to-train/v2/Cop_Matching/downsampled_sod_modfinal/20190612T101220_resized.nc
Matching files found: 20190411T102726_cis_prep.nc and 20190411T102726_resized.nc
Saved modified file: ai4arctic-sea-ice-challenge-ready-to-train/v2/Cop_Matching/downsampled_sod_modfinal/20190411T102726_resized.nc
Matching files found: 20210509T104806_dmi_prep.nc and 20210509T104806_resized.nc
Saved modified file: ai4arctic-sea-ice-challenge-ready-to-train/v2/