# OPI Data Input Formats - Complete Guide

This notebook shows all supported data input methods.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sys
sys.path.insert(0, '..')

import opi
print("Setup complete!")

## Method 1: MATLAB .mat Files (Native Format)

In [None]:
# Load topography from MATLAB file
from opi.io import grid_read

try:
    x, y, h_grid = grid_read('../examples/data/gaussian_topo.mat')
    print(f"Loaded from .mat: shape={h_grid.shape}")
except:
    # Create if doesn't exist
    dem = opi.create_synthetic_dem(
        topo_type='gaussian', output_file='../examples/data/gaussian_topo.mat'
    )
    x, y, h_grid = dem['x'], dem['y'], dem['hGrid']

# Save results back to MATLAB format
from opi.io import save_opi_results
results = {
    'lon': opi.xy2lonlat(x, y, 0, 45)[0],
    'lat': opi.xy2lonlat(x, y, 0, 45)[1],
    'hGrid': h_grid,
    'pGrid': np.random.rand(*h_grid.shape),  # Example precipitation
}
save_opi_results('../examples/data/results.mat', results)
print("Saved to results.mat")

## Method 2: Excel Files (Sample Data)

In [None]:
# Create sample data in Excel format
n_samples = 20
samples_df = pd.DataFrame({
    'line': range(1, n_samples+1),
    'longitude': np.linspace(-2, 2, n_samples),
    'latitude': 45 + np.zeros(n_samples),
    'elevation': np.linspace(0, 2000, n_samples),
    'd2H': np.linspace(-80, -140, n_samples) + np.random.normal(0, 5, n_samples),
    'd18O': np.linspace(-10, -18, n_samples) + np.random.normal(0, 0.8, n_samples),
    'd_excess': 10 + np.random.normal(0, 2, n_samples),
    'sample_type': ['C'] * n_samples,
})

# Save to Excel
samples_df.to_excel('../examples/data/samples.xlsx', index=False, sheet_name='Samples')
print("Sample data saved to samples.xlsx")
samples_df.head()

In [None]:
# Read back from Excel
samples_read = pd.read_excel('../examples/data/samples.xlsx')
print(f"\nRead {len(samples_read)} samples from Excel")
print(f"Columns: {list(samples_read.columns)}")

## Method 3: CSV Files

In [None]:
# Save to CSV
samples_df.to_csv('../examples/data/samples.csv', index=False)

# Read from CSV
samples_csv = pd.read_csv('../examples/data/samples.csv')
print(f"Read from CSV: {len(samples_csv)} samples")

## Method 4: NumPy Arrays (Programmatic)

In [None]:
# Create grid programmatically
x = np.linspace(-250000, 250000, 250)
y = np.linspace(-250000, 250000, 250)
X, Y = np.meshgrid(x, y)

# Create various topography types
topo_types = {}

# Gaussian
topo_types['gaussian'] = 2000 * np.exp(-(X**2 + Y**2) / (2 * 50000**2))

# Sinusoidal ridge
topo_types['sinusoidal'] = 1500 * (1 + np.sin(2*np.pi*X/100000)) / 2

# Double peak
peak1 = 1800 * np.exp(-((X-50000)**2 + Y**2) / (2 * 30000**2))
peak2 = 1500 * np.exp(-((X+50000)**2 + Y**2) / (2 * 40000**2))
topo_types['double'] = peak1 + peak2

# Plot all
fig, axes = plt.subplots(1, 3, figsize=(15, 4))
for ax, (name, topo) in zip(axes, topo_types.items()):
    im = ax.pcolormesh(X/1000, Y/1000, topo, cmap='terrain')
    ax.set_title(f'{name.capitalize()} Topography')
    ax.set_xlabel('X (km)')
    ax.set_ylabel('Y (km)')
    plt.colorbar(im, ax=ax, label='Elevation (m)')
plt.tight_layout()
plt.show()

## Method 5: NetCDF (Climate Data)

In [None]:
# Save to NetCDF format (common for climate data)
try:
    import xarray as xr
    
    ds = xr.Dataset(
        {
            'topography': (['y', 'x'], topo_types['gaussian']),
            'precipitation': (['y', 'x'], np.random.rand(250, 250)),
        },
        coords={
            'x': x,
            'y': y,
        },
        attrs={
            'title': 'OPI Simulation Results',
            'source': 'OPI Python',
        }
    )
    
    ds.to_netcdf('../examples/data/opi_results.nc')
    print("Saved to NetCDF format")
    
    # Read back
    ds_read = xr.open_dataset('../examples/data/opi_results.nc')
    print(f"\nNetCDF variables: {list(ds_read.data_vars)}")
    
except ImportError:
    print("xarray not installed. Install with: pip install xarray netCDF4")

## Method 6: GeoTIFF (GIS Data)

In [None]:
# Working with GeoTIFF (requires rasterio)
try:
    import rasterio
    from rasterio.transform import from_bounds
    
    # Save to GeoTIFF
    height, width = topo_types['gaussian'].shape
    
    # Define geographic bounds
    west, south, east, north = -2.5, 42.5, 2.5, 47.5  # lon/lat bounds
    
    transform = from_bounds(west, south, east, north, width, height)
    
    with rasterio.open(
        '../examples/data/topo.tif', 'w',
        driver='GTiff',
        height=height,
        width=width,
        count=1,
        dtype=topo_types['gaussian'].dtype,
        crs='EPSG:4326',  # WGS84
        transform=transform,
    ) as dst:
        dst.write(topo_types['gaussian'], 1)
    
    print("Saved to GeoTIFF")
    
    # Read back
    with rasterio.open('../examples/data/topo.tif') as src:
        topo_tiff = src.read(1)
        print(f"GeoTIFF shape: {topo_tiff.shape}")
        print(f"CRS: {src.crs}")
        
except ImportError:
    print("rasterio not installed. Install with: pip install rasterio")

## Method 7: JSON (Configuration)

In [None]:
import json

# Save configuration as JSON
config = {
    'run_title': 'My OPI Simulation',
    'grid': {
        'size_x': 500000,
        'size_y': 500000,
        'resolution': 2000,
    },
    'center': {
        'lon': 0,
        'lat': 45,
    },
    'parameters': {
        'U': 10.0,
        'azimuth': 90.0,
        'T0': 290.0,
        'M': 0.25,
        'kappa': 0.0,
        'tau_c': 1000.0,
        'd2h0': -0.005,
        'd_d2h0_d_lat': -0.002,
        'f_p0': 0.7,
    },
}

with open('../examples/data/config.json', 'w') as f:
    json.dump(config, f, indent=2)

# Load configuration
with open('../examples/data/config.json', 'r') as f:
    config_loaded = json.load(f)

print("Configuration loaded:")
print(f"  Title: {config_loaded['run_title']}")
print(f"  Wind: {config_loaded['parameters']['U']} m/s from {config_loaded['parameters']['azimuth']}Â°")

## Summary: Supported Formats

| Format | Use Case | Read Function | Write Function |
|:-------|:---------|:--------------|:-------------|
| **MATLAB .mat** | Native OPI format | `opi.io.grid_read()` | `opi.io.save_opi_results()` |
| **Excel .xlsx** | Sample data | `pd.read_excel()` | `df.to_excel()` |
| **CSV** | Simple tables | `pd.read_csv()` | `df.to_csv()` |
| **NumPy arrays** | Programmatic | Direct | `np.save()` / `np.savez()` |
| **NetCDF** | Climate data | `xarray.open_dataset()` | `ds.to_netcdf()` |
| **GeoTIFF** | GIS data | `rasterio.open()` | `rasterio.open(..., 'w')` |
| **JSON** | Configuration | `json.load()` | `json.dump()` |
| **Run files** | OPI configuration | `opi.io.parse_run_file()` | `opi.io.write_run_file()` |