# **Example 3** Working with vertical profiles using the `ProfileData` class

In [None]:
import earthcarekit as eck
import numpy as np
import pandas as pd

## **3.1** Initializing `ProfileData`

In [None]:
# ProfileData has 3 required inputs:
# - values
# - height
# - time

p = eck.ProfileData(
    values=[[0, 0.4, 1, 1, 0.6, 0]],
    height=[0e3, 5e3, 10e3, 15e3, 20e3, 25e3],
    time=["2025-01-01T00:00"],
)
display(p)

# Plotting
pf = eck.ProfileFigure().plot(p)

In [None]:
# Note: This cell requires external input data!
# Instead of manually creating ProfileData objects,
# they can be crated from xarray datasets:

fp = r"./ECA_EXBA_ATL_EBD_2A_20240902T210023Z_20250721T110708Z_01508B.h5"  # Replace path with one of your local files
with eck.read_any(fp) as ds:
    display(ds)

    p = eck.ProfileData.from_dataset(ds, var="particle_backscatter_coefficient_355nm")  # Select a valid variable from the dataset

    # Plotting
    cf = eck.CurtainFigure().plot(p, log_scale=True, cmap="calipso", value_range=(1e-7, 1e-4), height_range=(0, 20e3))  # Custommize curtain plot settings
    pf = eck.ProfileFigure(height_range=(0, 20e3)).plot(p)

## **3.2** Single profiles

In [None]:
# Generating example data
nh = 1000  # Number of height bins
h = np.linspace(0, 40e3, nh)  # Height values in meters
v = np.abs(np.sin(np.linspace(np.pi*3, -np.pi, nh)) * h)  # Signal values
v = v / np.max(v)

p = eck.ProfileData(
    values=v,
    height=h,
    time=["2025-01-01T00:00"],
)

# Plotting
pf = eck.ProfileFigure().plot(p)

### **3.2.1** Select a height range

In [None]:
height_range = (5e3, 15e3)
p_selected = p.select_height_range(height_range)

# Plotting
pf = eck.ProfileFigure(label="The variable name", units="some units", show_legend=True, value_range=(0,1))
pf = pf.plot(p, linestyle="dotted", legend_label="original")
pf = pf.plot(p_selected, linewidth=3, legend_label="selected")

In [None]:
# Note: You can also mark the selected height range in the plot
pf = eck.ProfileFigure(value_range=(0,1))
pf = pf.plot(p, selection_height_range=(5e3, 15e3))

### **3.2.2** Rebin the profile to new height bin centers

In [None]:
new_height = np.linspace(0,40e3,11)

p_rebin_mean = p.rebin_height(new_height)
p_rebin_interp = p.rebin_height(new_height, method="interpolate")

# Plotting
pf = eck.ProfileFigure(label="The variable name", units="some units", show_legend=True, value_range=(0,1))
pf = pf.plot(p, legend_label="profile")
pf = pf.plot(p_rebin_mean, legend_label='rebin (method = "mean")')
pf = pf.plot(p_rebin_interp, legend_label='rebin (method = "interpolate")')

### **3.2.3** Calculate statistics and compare profiles

In [None]:
results = p.stats()
display(results)

results2 = p.stats(height_range=(7_500, 12_500))
display(results2)

results3 = p.stats(height_range=(12_500, 17_500))
display(results3)

print("Create a dataframe:")
df = pd.concat([
    results.to_dataframe(),
    results2.to_dataframe(),
    results3.to_dataframe(),
])
display(df)

In [None]:
# Compare 2 profiles
# Note: Here we compare the 2 rebinned profiles from section 3.1.2 above.

results = p_rebin_mean.compare_to(p_rebin_interp)
display(results)
display(results.to_dataframe())

# Plotting
pf = eck.ProfileFigure(label="The variable name", units="some units", show_legend=True, value_range=(0,1))
pf = pf.plot(p_rebin_mean, legend_label='rebin (method = "mean")',color="tab:orange")
pf = pf.plot(p_rebin_interp, legend_label='rebin (method = "interpolate")',color="tab:green")

## **3.3** Multiple profiles (curtain)

In [None]:
# Generating example data
nh = 100  # Number of height bins
h = np.linspace(0, 40e3, nh)

nt = 300  # Number of (temporal) samples
y = np.linspace(-0.5, 0.5, nh)
x = np.linspace(-1, 2, nt)
gx, gy = np.meshgrid(x, y)
r = np.sqrt(gx**2 + gy**2)
v = np.sin(50 * r).T
v = np.abs(v) * np.linspace(1, 0.1, nh)

p = eck.ProfileData(
    values=v,
    height=h,
    time=pd.date_range("20250101T00", "20250101T12", periods=nt),
)

# PLotting
cf = eck.CurtainFigure().plot(p, value_range=(0,1))
pf = eck.ProfileFigure().plot(p, value_range=(0,1))

### **3.3.1** Get mean profile

In [None]:
p_mean = p.mean()
print(f"{p.shape=}")
print(f"{p_mean.shape=}")

# Plotting
pf = eck.ProfileFigure().plot(p_mean, value_range=(0,1))

### **3.3.2** Apply rolling mean (or moving average) 

In [None]:
p_roll = p.rolling_mean(20, axis=0)
print(f"{p.shape=}")
print(f"{p_roll.shape=}")

# Plotting
cf = eck.CurtainFigure().plot(p_roll, value_range=(0,1))
pf = eck.ProfileFigure().plot(p_roll, value_range=(0,1))

### **3.3.3** Coarsen profiles

In [None]:
p_coarsened = p.coarsen_mean(20)
print(f"{p.shape=}")
print(f"{p_coarsened.shape=}")

# Plotting
cf = eck.CurtainFigure().plot(p_coarsened, value_range=(0,1))
pf = eck.ProfileFigure().plot(p_coarsened, value_range=(0,1))

### **3.3.4** Rebin to new height bins

In [None]:
height_bin_centers = [0, 5e3, 10e3, 15e3, 20e3, 25e3, 30e3, 35e3, 40e3]
p_rebinned_height_mean = p.rebin_height(height_bin_centers)
p_rebinned_height_interp = p.rebin_height(height_bin_centers, method="interpolate")

# Plotting
cf = eck.CurtainFigure().plot(p_rebinned_height_mean, value_range=(0,1))
cf = eck.CurtainFigure().plot(p_rebinned_height_interp, value_range=(0,1))

### **3.3.4** Rebin to new time bins

In [None]:
time_bin_centers = ["20250101T00", "20250101T04", "20250101T08", "20250101T12"]
p_rebinned_time_mean = p.rebin_time(time_bin_centers)
p_rebinned_time_interp = p.rebin_time(time_bin_centers, method="interpolate")

# Plotting
cf = eck.CurtainFigure().plot(p_rebinned_time_mean, value_range=(0,1))
cf = eck.CurtainFigure().plot(p_rebinned_time_interp, value_range=(0,1))