# Testing X to Zarr Conversion

Starting with `MIKEConverter` from `src/zarrcatalogue/converters/mike.py`

In [None]:
import sys
sys.path.append('../src')
from pathlib import Path
from zarrcatalogue.converters.mike import MIKEConverter
import mikeio
import numpy as np
import zarr

# Initialize converter
converter = MIKEConverter()

data_path = Path('../tests/testdata/oresundHD_run1.dfsu')

zarr_path = Path('oresundHD_run1.zarr')

In [2]:
# Convert file with custom chunks and compression
chunks = {'time': 2, 'elements': 320}  # Example chunking
metadata = converter.to_zarr(
    data_path, 
    zarr_path,
    chunks=chunks,
    compression_level=7
)

# Print metadata
print("\nConversion metadata:")
for key, value in metadata.items():
    print(f"{key}: {value}")

# Validate conversion
validation_results = converter.validate_conversion(data_path, zarr_path)
print("\nValidation results:")
for key, value in validation_results.items():
    print(f"{key}: {value}")

# Examine Zarr structure
store = zarr.open(zarr_path, 'r')
print("\nZarr structure:")
print(store.tree())

# Basic data validation
print("\nData validation:")
original_ds = mikeio.read(data_path)
zarr_store = zarr.open(zarr_path, 'r')

# Compare first timestep of first variable
var_name = original_ds.names[0]
original_data = original_ds[var_name].to_numpy()[0]
zarr_data = zarr_store[f'data/{var_name}'][0]

print(f"\nComparing {var_name} data:")
print(f"Original shape: {original_data.shape}")
print(f"Zarr shape: {zarr_data.shape}")
print(f"Max difference: {np.max(np.abs(original_data - zarr_data))}")

Exception: File not found /teamspace/studios/this_studio/data/oresundHD_run1.dfsu

# Adding to catalogue 

## single file

In [None]:
import sys
sys.path.append('../src')
# Example usage with proper JSON serialization
from pathlib import Path
from zarrcatalogue.catalog import SimulationCatalog
import json

# First, let's clean up any corrupted files
import shutil
from pathlib import Path

catalog_path = Path("/teamspace/studios/this_studio/catalog")
if catalog_path.exists():
    shutil.rmtree(catalog_path)

# Initialize catalog
catalog = SimulationCatalog(catalog_path)


In [None]:

# Add a simulation
simulation_entry = catalog.add_simulation(
    sim_id="basin_2dv_20241210",
    source_file=Path('/teamspace/studios/this_studio/data/basin_2dv.dfsu'),
    metadata={
        "scenario": "baseline",
        "model_version": "2.2.0",
        "description": "Vertical profile simulation"
    },
    tags=["vertical_profile", "baseline"]
)

# Print the entry using the custom encoder
print("Added simulation:")
print(json.dumps(simulation_entry, indent=2, cls=CustomJSONEncoder))

## Bulk

In [None]:
import sys
sys.path.append('../src')
# Example usage with proper JSON serialization
from pathlib import Path
from zarrcatalogue.catalog import SimulationCatalog
import json

from typing import Dict
from datetime import datetime


# Initialize catalog
catalog = SimulationCatalog(Path("/teamspace/studios/this_studio/catalog"))

# Optional: Define a metadata generator function
def generate_metadata(file_path: Path) -> Dict:
    """Generate metadata from file path."""
    return {
        "source_file": str(file_path),
        "scenario": file_path.stem.split('_')[0],
        "date_processed": datetime.now().isoformat()
    }

# Bulk import simulations
results = catalog.bulk_import(
    source_dir=Path("/teamspace/studios/this_studio/data"),
    pattern="*.dfsu",
    metadata_generator=generate_metadata,
    tags=["bulk_import", "2024"],
    parallel=True,
    max_workers=4,
    skip_existing=True
)

# Print successful imports
print("\nSuccessfully imported simulations:")
for entry in results["successful"]:
    print(f"- {entry['id']}: {entry['source_file']}")

# Search, Analyze catalogue

## summary / overview

In [None]:
from zarrcatalogue.catalog import SimulationCatalog

catalog = SimulationCatalog(Path("/teamspace/studios/this_studio/catalog"))

In [None]:
print(catalog.get_summary())

## search

In [None]:
# Search catalog
results = catalog.search(
    geometry_type="GeometryFM2D",
    #variables=["U velocity"],
    #tags=["baseline"]
)
results


# Zarr to MIKE Conversion

In [None]:
import sys
sys.path.append('../src')
from pathlib import Path
from zarrcatalogue.converters.mike import MIKEConverter


converter = MIKEConverter()
metadata = converter.from_zarr(
    #zarr_path=Path("/teamspace/studios/this_studio/catalog/simulations/basin_2dv/data.zarr"),
    #output_file=Path("/teamspace/studios/this_studio/data/basin_2dv_backconversion.dfsu")
    zarr_path=Path("/teamspace/studios/this_studio/catalog/simulations/oresundHD_run1/data.zarr"),
    output_file=Path("/teamspace/studios/this_studio/data/oresundHD_run1_backconversion.dfsu")
)

In [None]:
# reload the converted file and compare to original
import mikeio

ds = mikeio.read("/teamspace/studios/this_studio/data/oresundHD_run1.dfsu")
ds_backconversion = mikeio.read("/teamspace/studios/this_studio/data/oresundHD_run1_backconversion.dfsu")
display(ds, ds_backconversion)