# Explore CaCO3 Coretop Data

This notebook loads and visualizes the CaCO3 coretop data from the CDF file.

In [None]:
import xarray as xr
import matplotlib.pyplot as plt
import numpy as np
import cartopy.crs as ccrs
import cartopy.feature as cfeature

## Load CaCO3 Coretop Data

In [None]:
# Load CaCO3 coretop data
caco3_ds = xr.open_dataset('datasets/pct_CaCO3.coretop.cdf')
print(caco3_ds)

In [None]:
# Display data variables and attributes
print("\nData variables:")
for var in caco3_ds.data_vars:
    print(f"  {var}: {caco3_ds[var].shape}")
    print(f"    Attributes: {dict(caco3_ds[var].attrs)}")

print("\nCoordinates:")
for coord in caco3_ds.coords:
    print(f"  {coord}: {caco3_ds[coord].shape}, range: [{float(caco3_ds[coord].min()):.2f}, {float(caco3_ds[coord].max()):.2f}]")

print("\nGlobal attributes:")
for attr in caco3_ds.attrs:
    print(f"  {attr}: {caco3_ds.attrs[attr]}")

## Summary Statistics

In [None]:
# Summary statistics for CaCO3 data
print("CaCO3 Data Summary:")
for var in caco3_ds.data_vars:
    print(f"\n{var}:")
    data = caco3_ds[var]
    # Handle NaN values
    valid_data = data.where(~np.isnan(data), drop=True)
    print(f"  Shape: {data.shape}")
    print(f"  Valid points: {valid_data.size}")
    print(f"  Min: {float(valid_data.min()):.2f}")
    print(f"  Max: {float(valid_data.max()):.2f}")
    print(f"  Mean: {float(valid_data.mean()):.2f}")
    print(f"  Median: {float(valid_data.median()):.2f}")
    print(f"  Std: {float(valid_data.std()):.2f}")

## Plot CaCO3 Distribution as Map

In [None]:
# Get the main variable
var_name = list(caco3_ds.data_vars)[0]
data = caco3_ds[var_name]

# Create map with cartopy
fig = plt.figure(figsize=(16, 10))
ax = plt.axes(projection=ccrs.Robinson())

# Add features
ax.add_feature(cfeature.LAND, facecolor='lightgray')
ax.add_feature(cfeature.COASTLINE, linewidth=0.5)
ax.add_feature(cfeature.BORDERS, linewidth=0.3, alpha=0.5)

# Plot data
if 'lat' in data.dims and 'lon' in data.dims:
    im = data.plot(ax=ax, transform=ccrs.PlateCarree(), 
                    cmap='viridis', 
                    cbar_kwargs={'label': f'{var_name} (%)', 'shrink': 0.7})
    ax.set_title(f'CaCO3 Coretop Distribution', fontsize=14, fontweight='bold')
    ax.gridlines(draw_labels=False, alpha=0.3)
else:
    print(f"Data dimensions: {data.dims}")
    print("Cannot create map - no lat/lon dimensions found")

plt.tight_layout()
plt.show()

## Plot CaCO3 Histogram

In [None]:
# Histogram of CaCO3 values
fig, ax = plt.subplots(figsize=(10, 6))

var_name = list(caco3_ds.data_vars)[0]
data = caco3_ds[var_name].values.flatten()
data = data[~np.isnan(data)]  # Remove NaN values

ax.hist(data, bins=50, color='steelblue', edgecolor='black', alpha=0.7)
ax.set_xlabel(f'{var_name} (%)', fontsize=12)
ax.set_ylabel('Frequency', fontsize=12)
ax.set_title('Distribution of CaCO3 in Coretop Samples', fontsize=14, fontweight='bold')
ax.grid(True, alpha=0.3)

# Add statistics text
stats_text = f'Mean: {np.mean(data):.1f}%\nMedian: {np.median(data):.1f}%\nStd: {np.std(data):.1f}%\nN: {len(data):,}'
ax.text(0.95, 0.95, stats_text, transform=ax.transAxes, 
        verticalalignment='top', horizontalalignment='right',
        bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5),
        fontsize=10)

plt.tight_layout()
plt.show()

## Plot CaCO3 by Ocean Basin

In [None]:
# Create zonal mean plot
var_name = list(caco3_ds.data_vars)[0]
data = caco3_ds[var_name]

if 'lat' in data.dims:
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))
    
    # Zonal mean
    zonal_mean = data.mean(dim='lon', skipna=True)
    ax1.plot(zonal_mean.values, zonal_mean.lat.values, linewidth=2, color='navy')
    ax1.set_xlabel('Mean CaCO3 (%)', fontsize=12)
    ax1.set_ylabel('Latitude', fontsize=12)
    ax1.set_title('Zonal Mean CaCO3', fontsize=12, fontweight='bold')
    ax1.grid(True, alpha=0.3)
    ax1.axhline(y=0, color='red', linestyle='--', alpha=0.5)
    
    # Meridional mean
    merid_mean = data.mean(dim='lat', skipna=True)
    ax2.plot(merid_mean.lon.values, merid_mean.values, linewidth=2, color='darkgreen')
    ax2.set_xlabel('Longitude', fontsize=12)
    ax2.set_ylabel('Mean CaCO3 (%)', fontsize=12)
    ax2.set_title('Meridional Mean CaCO3', fontsize=12, fontweight='bold')
    ax2.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()
else:
    print("Cannot create zonal/meridional plots - no lat dimension found")

## Export Data to CSV (Optional)

In [None]:
# Optionally export to CSV for easier analysis
# Uncomment to export
# import pandas as pd
# df = caco3_ds.to_dataframe().reset_index()
# df.to_csv('datasets/CaCO3_coretop_export.csv', index=False)
# print(f"Exported {len(df)} rows to CSV")

In [None]:
# Close dataset
caco3_ds.close()