# Explore Accumulation Coretop Data

This notebook loads and visualizes the accumulation coretop data from the CDF file.

In [None]:
import xarray as xr
import matplotlib.pyplot as plt
import numpy as np
import cartopy.crs as ccrs
import cartopy.feature as cfeature

## Load Accumulation Coretop Data

In [None]:
# Load accumulation coretop data
accum_ds = xr.open_dataset('datasets/accum.coretop.cdf')
print(accum_ds)

In [None]:
# Display data variables and attributes
print("\nData variables:")
for var in accum_ds.data_vars:
    print(f"  {var}: {accum_ds[var].shape}")
    print(f"    Attributes: {dict(accum_ds[var].attrs)}")

print("\nCoordinates:")
for coord in accum_ds.coords:
    print(f"  {coord}: {accum_ds[coord].shape}, range: [{float(accum_ds[coord].min()):.2f}, {float(accum_ds[coord].max()):.2f}]")

print("\nGlobal attributes:")
for attr in accum_ds.attrs:
    print(f"  {attr}: {accum_ds.attrs[attr]}")

## Summary Statistics

In [None]:
# Summary statistics for accumulation data
print("Accumulation Data Summary:")
for var in accum_ds.data_vars:
    print(f"\n{var}:")
    data = accum_ds[var]
    
    # All data including invalid
    print(f"  Total points: {data.size}")
    
    # Filter valid data (positive, finite values)
    valid_data = data.where((data >= 0) & np.isfinite(data), drop=True)
    print(f"  Valid points (>=0, finite): {valid_data.size}")
    print(f"  Invalid points removed: {data.size - valid_data.size}")
    
    if valid_data.size > 0:
        print(f"  Min: {float(valid_data.min()):.4f}")
        print(f"  Max: {float(valid_data.max()):.4f}")
        print(f"  Mean: {float(valid_data.mean()):.4f}")
        print(f"  Median: {float(valid_data.median()):.4f}")
        print(f"  Std: {float(valid_data.std()):.4f}")

## Plot Accumulation Distribution as Map

In [None]:
# Get the main variable
var_name = list(accum_ds.data_vars)[0]
data = accum_ds[var_name]

# Filter out invalid values (negatives and infinities)
data_clean = data.where((data >= 0) & np.isfinite(data))

# Create map with cartopy
fig = plt.figure(figsize=(16, 10))
ax = plt.axes(projection=ccrs.Robinson())

# Add features
ax.add_feature(cfeature.LAND, facecolor='lightgray')
ax.add_feature(cfeature.COASTLINE, linewidth=0.5)
ax.add_feature(cfeature.BORDERS, linewidth=0.3, alpha=0.5)

# Plot data (X=longitude, Y=latitude)
if 'Y' in data_clean.dims and 'X' in data_clean.dims:
    im = data_clean.plot(ax=ax, transform=ccrs.PlateCarree(),
                    cmap='plasma',
                    cbar_kwargs={'label': f'{var_name}', 'shrink': 0.7})
    ax.set_title(f'Accumulation Rate Coretop Distribution', fontsize=14, fontweight='bold')
    ax.gridlines(draw_labels=False, alpha=0.3)
else:
    print(f"Data dimensions: {data_clean.dims}")
    print("Cannot create map - no X/Y dimensions found")

plt.tight_layout()
plt.show()

## Plot Accumulation Histogram

In [None]:
# Histogram of accumulation values
fig, ax = plt.subplots(figsize=(10, 6))

var_name = list(accum_ds.data_vars)[0]
data = accum_ds[var_name].values.flatten()

# Remove invalid values (NaN, negatives, infinities)
data = data[(~np.isnan(data)) & (data >= 0) & np.isfinite(data)]

ax.hist(data, bins=50, color='coral', edgecolor='black', alpha=0.7)
ax.set_xlabel(f'{var_name}', fontsize=12)
ax.set_ylabel('Frequency', fontsize=12)
ax.set_title('Distribution of Accumulation Rates in Coretop Samples', fontsize=14, fontweight='bold')
ax.grid(True, alpha=0.3)

# Add statistics text
stats_text = f'Mean: {np.mean(data):.2f}\nMedian: {np.median(data):.2f}\nStd: {np.std(data):.2f}\nN: {len(data):,}'
ax.text(0.95, 0.95, stats_text, transform=ax.transAxes, 
        verticalalignment='top', horizontalalignment='right',
        bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5),
        fontsize=10)

plt.tight_layout()
plt.show()

## Plot Log-scale Histogram

In [None]:
# Log-scale histogram (often useful for accumulation rates)
fig, ax = plt.subplots(figsize=(10, 6))

var_name = list(accum_ds.data_vars)[0]
data = accum_ds[var_name].values.flatten()

# Remove invalid values and zeros (can't take log of zero)
data = data[(~np.isnan(data)) & (data > 0) & np.isfinite(data)]

ax.hist(np.log10(data), bins=50, color='teal', edgecolor='black', alpha=0.7)
ax.set_xlabel(f'log10({var_name})', fontsize=12)
ax.set_ylabel('Frequency', fontsize=12)
ax.set_title('Distribution of Accumulation Rates (Log Scale)', fontsize=14, fontweight='bold')
ax.grid(True, alpha=0.3)

# Add statistics text
stats_text = f'Geometric Mean: {np.exp(np.mean(np.log(data))):.2f}\nMedian: {np.median(data):.2f}\nN: {len(data):,}'
ax.text(0.95, 0.95, stats_text, transform=ax.transAxes, 
        verticalalignment='top', horizontalalignment='right',
        bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5),
        fontsize=10)

plt.tight_layout()
plt.show()

## Plot Accumulation by Ocean Basin

In [None]:
# Create zonal mean plot
var_name = list(accum_ds.data_vars)[0]
data = accum_ds[var_name]

# Filter out invalid values (negatives and infinities)
data_clean = data.where((data >= 0) & np.isfinite(data))

if 'Y' in data_clean.dims:
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))
    
    # Zonal mean (average across longitudes)
    zonal_mean = data_clean.mean(dim='X', skipna=True)
    ax1.plot(zonal_mean.values, zonal_mean.Y.values, linewidth=2, color='navy')
    ax1.set_xlabel('Mean Accumulation Rate', fontsize=12)
    ax1.set_ylabel('Latitude', fontsize=12)
    ax1.set_title('Zonal Mean Accumulation', fontsize=12, fontweight='bold')
    ax1.grid(True, alpha=0.3)
    ax1.axhline(y=0, color='red', linestyle='--', alpha=0.5)
    
    # Meridional mean (average across latitudes)
    merid_mean = data_clean.mean(dim='Y', skipna=True)
    ax2.plot(merid_mean.X.values, merid_mean.values, linewidth=2, color='darkgreen')
    ax2.set_xlabel('Longitude', fontsize=12)
    ax2.set_ylabel('Mean Accumulation Rate', fontsize=12)
    ax2.set_title('Meridional Mean Accumulation', fontsize=12, fontweight='bold')
    ax2.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()
else:
    print("Cannot create zonal/meridional plots - no Y dimension found")

## Compare CaCO3 and Accumulation (if available)

In [None]:
# Load CaCO3 data for comparison
try:
    caco3_ds = xr.open_dataset('datasets/pct_CaCO3.coretop.cdf')
    caco3_var = list(caco3_ds.data_vars)[0]
    accum_var = list(accum_ds.data_vars)[0]
    
    # Get cleaned data
    caco3_clean = caco3_ds[caco3_var].where((caco3_ds[caco3_var] >= 0) & 
                                             (caco3_ds[caco3_var] <= 100) & 
                                             np.isfinite(caco3_ds[caco3_var]))
    accum_clean = accum_ds[accum_var].where((accum_ds[accum_var] >= 0) & 
                                             np.isfinite(accum_ds[accum_var]))
    
    # Flatten and align data
    caco3_flat = caco3_clean.values.flatten()
    accum_flat = accum_clean.values.flatten()
    
    # Remove NaN pairs
    mask = ~np.isnan(caco3_flat) & ~np.isnan(accum_flat)
    caco3_flat = caco3_flat[mask]
    accum_flat = accum_flat[mask]
    
    # Scatter plot
    fig, ax = plt.subplots(figsize=(10, 8))
    scatter = ax.scatter(caco3_flat, accum_flat, alpha=0.3, s=5, c=accum_flat, cmap='viridis')
    ax.set_xlabel('CaCO3 (%)', fontsize=12)
    ax.set_ylabel('Accumulation Rate', fontsize=12)
    ax.set_title('CaCO3 vs Accumulation Rate', fontsize=14, fontweight='bold')
    ax.grid(True, alpha=0.3)
    plt.colorbar(scatter, label='Accumulation Rate')
    
    # Calculate correlation
    corr = np.corrcoef(caco3_flat, accum_flat)[0, 1]
    ax.text(0.05, 0.95, f'Correlation: {corr:.3f}\nN: {len(caco3_flat):,}', 
            transform=ax.transAxes, verticalalignment='top',
            bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5),
            fontsize=10)
    
    plt.tight_layout()
    plt.show()
    
    caco3_ds.close()
except Exception as e:
    print(f"Could not load CaCO3 data for comparison: {e}")

## Export Data to CSV (Optional)

In [None]:
# Optionally export to CSV for easier analysis
# Uncomment to export
# import pandas as pd
# df = accum_ds.to_dataframe().reset_index()
# df.to_csv('datasets/accum_coretop_export.csv', index=False)
# print(f"Exported {len(df)} rows to CSV")

In [None]:
# Close dataset
accum_ds.close()