# NetCDF Data Exploration
## File: age.2020.1.GTS2012.1m.nc

In [None]:
import xarray as xr
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Load the NetCDF file

In [None]:
ds = xr.open_dataset('datasets/age.2020.1.GTS2012.1m.nc')
ds

## Basic Information

In [None]:
# Display data dimensions
print("Dimensions:")
print(ds.dims)
print("\nCoordinates:")
print(list(ds.coords.keys()))
print("\nData variables:")
print(list(ds.data_vars.keys()))

## Attributes and Metadata

In [None]:
# Global attributes
print("Global attributes:")
for attr, value in ds.attrs.items():
    print(f"{attr}: {value}")

In [None]:
# Variable attributes
print("\nVariable details:")
for var in ds.data_vars:
    print(f"\n{var}:")
    print(f"  Shape: {ds[var].shape}")
    print(f"  Dtype: {ds[var].dtype}")
    if ds[var].attrs:
        print("  Attributes:")
        for attr, value in ds[var].attrs.items():
            print(f"    {attr}: {value}")

## Statistical Summary

In [None]:
# Summary statistics for each variable
for var in ds.data_vars:
    print(f"\n{var}:")
    data = ds[var].values
    if np.issubdtype(data.dtype, np.number):
        print(f"  Min: {np.nanmin(data)}")
        print(f"  Max: {np.nanmax(data)}")
        print(f"  Mean: {np.nanmean(data):.4f}")
        print(f"  Std: {np.nanstd(data):.4f}")
        print(f"  Missing values: {np.isnan(data).sum()}")
    else:
        print(f"  Unique values: {np.unique(data).shape[0]}")

## Visualizations

In [None]:
# Plot each variable
n_vars = len(ds.data_vars)
fig, axes = plt.subplots(n_vars, 1, figsize=(12, 4*n_vars))
if n_vars == 1:
    axes = [axes]

for ax, var in zip(axes, ds.data_vars):
    if len(ds[var].dims) == 1:
        ds[var].plot(ax=ax)
    elif len(ds[var].dims) == 2:
        ds[var].plot(ax=ax, cmap='viridis')
    ax.set_title(f"{var}")
    
plt.tight_layout()
plt.show()

## Sample Data

In [None]:
# Display first few values of each variable
for var in ds.data_vars:
    print(f"\n{var} (first 10 values):")
    if len(ds[var].dims) == 1:
        print(ds[var].values[:10])
    elif len(ds[var].dims) == 2:
        print(ds[var].values[:10, :5])  # First 10 rows, 5 columns

## Export to DataFrame (if applicable)

In [None]:
# Try to convert to pandas DataFrame for easier exploration
try:
    df = ds.to_dataframe()
    print(f"DataFrame shape: {df.shape}")
    print("\nFirst few rows:")
    display(df.head(10))
except Exception as e:
    print(f"Could not convert to DataFrame: {e}")

## Close the dataset

In [None]:
ds.close()