# Data processing - Level 2
Adding flags:
- Pollution flag
- Cloud (1 = in; 0 = out)
- Hovering flag (0=moving; 1 = hovering, more than 2 min)

**Not done in this case**
Time averaging:
- 10 sec all instruments, but
- mSEMS remains native)

In [None]:
from pathlib import Path

DATA_FLIGHT_DIR_BASENAME = "2025-02-12_A"

DATA_DIR = r"C:\Users\temel\Desktop\EERL\Campaigns\03_ORACLES\Neumayer_2024\Data"
DATA_DIRPATH = Path(DATA_DIR)
DATA_PROCESSING_DIRPATH = DATA_DIRPATH / "Processing"
DATA_LEVEL1_5_DIRPATH = DATA_PROCESSING_DIRPATH / "Level1.5"
DATA_LEVEL2_DIRPATH = DATA_PROCESSING_DIRPATH / "Level2"

## Load level 1.5 dataset

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from matplotlib.widgets import Button
from matplotlib.dates import AutoDateLocator, DateFormatter
import warnings

# Suppress specific warning
warnings.filterwarnings("ignore", category=DeprecationWarning)
# Suppress all warnings
warnings.filterwarnings("ignore")

In [None]:
""" CHANGE NAME OF INPUT FILE """

df_level1_5 = pd.read_csv(DATA_LEVEL1_5_DIRPATH / f"level1.5_{DATA_FLIGHT_DIR_BASENAME}.csv")
df_level1_5["datetime"] = pd.to_datetime(df_level1_5["datetime"])
df_level1_5.set_index("datetime", inplace=True)
df_level1_5

## Flags
### Pollution flag

In [None]:
df_level1 = df_level1.copy()
df_level1.insert(loc=df_level1.columns.get_loc('flight_nr'), column='flag_pollution', value=np.nan)   # Insert 'pollution_flag' column filled with NaN before 'flight_nr'
df_level1

### Hovering flag
(Code from Joanna)

In [None]:
%matplotlib widget
plt.close('all')

# Plot setup
fig, ax = plt.subplots(figsize=(12, 6))
palette = ["#F54B0F", "#415067"]

ax.plot(df_level1_5.index, df_level1_5["Altitude"], color=palette[0], linewidth=2)
ax.grid(True, ls="--", alpha=0.5)
ax.set_ylim(-2, 1000)
ax.set_ylabel("Altitude [m]")
ax.set_xlabel("Time")
plt.setp(ax.get_xticklabels(), rotation=45, ha='right')
ax.xaxis.set_major_locator(mdates.MinuteLocator(interval=10))
ax.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))

# Interaction logic
selected_points = []
stable_periods = []
span_artists = []

def onclick(event):
    if event.inaxes != ax:
        return

    # Skip if zoom or pan mode is active
    if plt.get_current_fig_manager().toolbar.mode != '':
        return

    click_time = mdates.num2date(event.xdata)
    selected_points.append(click_time)

    ax.plot(event.xdata, event.ydata, 'o', color=palette[1], markersize=8)
    fig.canvas.draw()

    if len(selected_points) == 2:
        start, end = sorted(selected_points)
        stable_periods.append((start, end))
        span = ax.axvspan(start, end, color=palette[1], alpha=0.2)
        span_artists.append(span)
        selected_points.clear()
        fig.canvas.draw()
results_df = None

def finish_selection(event):
    global results_df

    if stable_periods:
        results_df = pd.DataFrame(stable_periods, columns=['Start_Time', 'End_Time'])
        results_df['Duration'] = results_df['End_Time'] - results_df['Start_Time']

        # Optional formatting
        results_df['Start_Time'] = results_df['Start_Time'].dt.strftime('%Y-%m-%d %H:%M:%S')
        results_df['End_Time'] = results_df['End_Time'].dt.strftime('%Y-%m-%d %H:%M:%S')

        print("\nSelected Stable Periods:")
        print(results_df.to_string(index=False))

        # Copy to clipboard
        results_df.to_clipboard(index=False)
        print("\nResults copied to clipboard!")
    else:
        print("No stable periods selected")

# Add button
ax_button = plt.axes([0.82, 0.01, 0.15, 0.05])
btn = Button(ax_button, 'Finish Selection', color='lightgoldenrodyellow')
btn.on_clicked(finish_selection)

# Hook up click handler
fig.canvas.mpl_connect('button_press_event', onclick)

plt.title("Click to select stable periods")
plt.show()

print(results_df)

In [None]:
results_df

In [None]:
""" CHANGE NAME OF OUTPUT FILE """

#results_df.to_csv(DATA_LEVEL1_5_DIRPATH / f"level1.5_{DATA_FLIGHT_DIR_BASENAME}_hovering.csv", index=False)

results_df = pd.read_csv(DATA_LEVEL1_5_DIRPATH / f"level1.5_{DATA_FLIGHT_DIR_BASENAME}_hovering.csv")
results_df

In [None]:
# Add 'flag_hovering' column to 'df_level1_5'

results_df['Start_Time'] = pd.to_datetime(results_df['Start_Time'])
results_df['End_Time'] = pd.to_datetime(results_df['End_Time'])

df_level1_5['flag_hovering'] = 0
for _, row in results_df.iterrows():
    mask = (df_level1_5.index >= row['Start_Time']) & (df_level1_5.index <= row['End_Time'])
    df_level1_5.loc[mask, 'flag_hovering'] = 1

if 'flag_pollution' in df_level1_5.columns:
    cols = list(df_level1_5.columns)
    idx = cols.index('flag_pollution') + 1
    col = df_level1_5.pop('flag_hovering')
    df_level1_5.insert(idx, 'flag_hovering', col)

df_level1_5

In [None]:
# Check flag
start_time = "2025-02-12 10:08:30"
end_time = "2025-02-12 10:08:40"

filtered_hovering = df_level1_5.loc[(df_level1_5.index >= start_time) & (df_level1_5.index <= end_time), 'flag_hovering']
print(filtered_hovering)

### Cloud flag

In [None]:
%matplotlib widget
plt.close('all')

# Plot setup
fig, ax = plt.subplots(figsize=(12, 6))
palette = ["#F54B0F", "#415067"]

ax.plot(df_level1_5.index, df_level1_5["mCDA_total_N"], color=palette[0], linewidth=2)
ax.grid(True, ls="--", alpha=0.5)
ax.set_ylim(0, 60)
ax.set_ylabel("mCDA conc (cm$^{-3}$)")
ax.set_xlabel("Time")
plt.setp(ax.get_xticklabels(), rotation=45, ha='right')
ax.xaxis.set_major_locator(mdates.MinuteLocator(interval=10))
ax.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))

# Interaction logic
selected_points = []
stable_periods = []
span_artists = []

def onclick(event):
    if event.inaxes != ax:
        return

    # Skip if zoom or pan mode is active
    if plt.get_current_fig_manager().toolbar.mode != '':
        return

    click_time = mdates.num2date(event.xdata)
    selected_points.append(click_time)

    ax.plot(event.xdata, event.ydata, 'o', color=palette[1], markersize=8)
    fig.canvas.draw()

    if len(selected_points) == 2:
        start, end = sorted(selected_points)
        stable_periods.append((start, end))
        span = ax.axvspan(start, end, color=palette[1], alpha=0.2)
        span_artists.append(span)
        selected_points.clear()
        fig.canvas.draw()
results_df = None

def finish_selection(event):
    global results_df

    if stable_periods:
        results_df = pd.DataFrame(stable_periods, columns=['Start_Time', 'End_Time'])
        results_df['Duration'] = results_df['End_Time'] - results_df['Start_Time']

        # Optional formatting
        results_df['Start_Time'] = results_df['Start_Time'].dt.strftime('%Y-%m-%d %H:%M:%S')
        results_df['End_Time'] = results_df['End_Time'].dt.strftime('%Y-%m-%d %H:%M:%S')

        print("\nSelected Stable Periods:")
        print(results_df.to_string(index=False))

        # Copy to clipboard
        results_df.to_clipboard(index=False)
        print("\nResults copied to clipboard!")
    else:
        print("No stable periods selected")

# Add button
ax_button = plt.axes([0.82, 0.01, 0.15, 0.05])
btn = Button(ax_button, 'Finish Selection', color='lightgoldenrodyellow')
btn.on_clicked(finish_selection)

# Hook up click handler
fig.canvas.mpl_connect('button_press_event', onclick)

plt.title("Click to select stable periods")
plt.show()

print(results_df)

In [None]:
results_df

In [None]:
""" CHANGE NAME OF OUTPUT FILE """

results_df.to_csv(f"level1.5_{DATA_FLIGHT_DIR_BASENAME}_cloud.csv", index=False)

results_df = pd.read_csv(DATA_LEVEL1_5_DIRPATH / f"level1.5_{DATA_FLIGHT_DIR_BASENAME}_cloud.csv")
results_df

In [None]:
# Add 'flag_cloud' column to 'df_level1_5'

results_df['Start_Time'] = pd.to_datetime(results_df['Start_Time'])
results_df['End_Time'] = pd.to_datetime(results_df['End_Time'])

df_level1_5['flag_cloud'] = 0
for _, row in results_df.iterrows():
    mask = (df_level1_5.index >= row['Start_Time']) & (df_level1_5.index <= row['End_Time'])
    df_level1_5.loc[mask, 'flag_cloud'] = 1

if 'flag_hovering' in df_level1_5.columns:
    cols = list(df_level1_5.columns)
    idx = cols.index('flag_hovering') + 1
    col = df_level1_5.pop('flag_cloud')
    df_level1_5.insert(idx, 'flag_cloud', col)

df_level1_5

In [None]:
# If no clouds
df_level1_5.insert(loc=df_level1_5.columns.get_loc('flight_nr'), column='flag_cloud', value=0)   # Insert 'flag_cloud' column filled with 0s before 'flight_nr'
df_level1_5

In [None]:
start_time = "2025-02-12 09:11:05"
end_time = "2025-02-12 09:11:10"

filtered_cloud = df_level1_5.loc[(df_level1_5.index >= start_time) & (df_level1_5.index <= end_time), 'flag_cloud']
print(filtered_cloud)

## Data quicklooks

In [None]:
# df_level1_5 = pd.read_csv(DATA_LEVEL2_DIRPATH / "level2_{DATA_FLIGHT_DIR_BASENAME}.csv")
#df_level1_5["datetime"] = pd.to_datetime(df_level1_5["datetime"])
#df_level1_5.set_index("datetime", inplace=True)
df_level1_5

In [None]:
from helikite.processing.post.level1 import flight_profiles_2

# Limits for x-axis (T, RH, mSEMS, CPC, POPS, mCDA, WS, WD)
custom_xlim = {
    'ax1': (-6, 2),
    'ax2': (60, 100),
    'ax3': (0, 1200),
    'ax4': (0, 1200),
    'ax5': (0, 60),
    'ax6': (0, 60),
    'ax7': (0, 12)
}

custom_xticks = {
    'ax1': np.arange(-6, 3, 2),
    'ax2': np.arange(60, 101, 10),
    'ax3': np.arange(0, 1201, 200),
    'ax4': np.arange(0, 1201, 200),
    'ax5': np.arange(0, 61, 10),
    'ax6': np.arange(0, 61, 10),
    'ax7': np.arange(0, 13, 3)
}

# Plot title
custom_title = f'Flight {metadata.flight} ({metadata.flight_date}_A) [Level 2]'
#custom_title = f'Flight 48 (2025-02-03_A) [Level 2]'

fig = flight_profiles_2(df_level1_5, metadata, xlims=custom_xlim, xticks=custom_xticks, fig_title=custom_title)

# Save the figure after plotting
filename = f'Level2_{metadata.flight_date}_A_Flight_{metadata.flight}.png'
#filename = f'Level2_2025-02-03_A_Flight_48.png'
save_path = DATA_LEVEL2_DIRPATH / filename
print("Saving figure to:", save_path)
fig.savefig(save_path, dpi=300, bbox_inches='tight')

In [None]:
import numpy as np

bin_diameter_averages = np.array([
    8.209426, 8.652978, 9.120773, 9.614176, 10.134633, 10.683676, 11.262924,
    11.874097, 12.519017, 13.199618, 13.917952, 14.676198, 15.476672,
    16.319869, 17.212476, 18.155273, 19.151085, 20.203080, 21.314636,
    22.489353, 23.731074, 25.043907, 26.432241, 27.900779, 29.454554,
    31.098968, 32.839819, 34.683337, 36.636229, 38.705716, 40.899590,
    43.226267, 45.694846, 48.315185, 51.097974, 54.054825, 57.198371,
    60.542379, 64.101873, 67.893280, 71.934588, 76.245533, 80.847801,
    85.765265, 91.024253, 96.653846, 102.686223, 109.157047, 116.105907,
    123.576809, 131.618741, 140.286300, 149.640401, 159.749066, 170.688305,
    182.543091, 195.408432, 209.392962, 224.611976, 241.211823
])


In [None]:
# TEMPORAL PLOT OF FLIGHT with POPS and mSEMS HEAT MAPS

import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import numpy as np
import pandas as pd
from matplotlib.lines import Line2D
import matplotlib.ticker as ticker
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.colors as mcols
import matplotlib.colors as mcolors
import matplotlib.lines as mlines
from mpl_toolkits.axes_grid1 import make_axes_locatable
from mpl_toolkits.axes_grid1.inset_locator import inset_axes

%matplotlib inline
plt.close('all')

# Create figure with 3 subplots, sharing the same x-axis
fig, axes = plt.subplots(4, 1, figsize=(16, 12), sharex=True, gridspec_kw={'height_ratios': [1, 1, 1, 1]})
plt.subplots_adjust(hspace=0.1)

""" SET THE TITLE OF THE PLOT (FLIGHT N° with DATE_X) """
# 'i' will automatically be replaced by the set flight number
# '_X' has to be changed manually in function of the flight index of the day (A, B, ...)
fig.suptitle(f'Flight {metadata.flight} ({metadata.flight_date}_A) [Level 2]', fontsize=16, fontweight='bold', y=0.91)

### SUBPLOT 1: Altitude vs. Time
ax1 = axes[0]
ax1.plot(df_level1_5.index, df_level1_5['Altitude'], color='black', linewidth=2, label='Altitude')

ax1.set_ylabel('Altitude (m)', fontsize=12, fontweight='bold')
ax1.tick_params(axis='y', labelsize=11)
ax1.grid(True, linestyle='--', linewidth=0.5)
ax1.tick_params(axis='x', labelbottom=False)
ax1.set_ylim(-40, df_level1_5['Altitude'].max() * 1.04)

# Shade areas for flag_pollution == 1
pollution_times = df_level1_5[df_level1_5['flag_pollution'] == 1].index
if not pollution_times.empty:
    start = pollution_times[0]
    for i in range(1, len(pollution_times)):
        if (pollution_times[i] - pollution_times[i - 1]) > pd.Timedelta(seconds=1):
            ax1.axvspan(start, pollution_times[i - 1], color='lightcoral', alpha=0.8, label='Pollution')
            start = pollution_times[i]
    ax1.axvspan(start, pollution_times[-1], color='lightcoral', alpha=0.8, label='Pollution')

# Shade areas for flag_hovering == 1
hovering_times = df_level1_5[df_level1_5['flag_hovering'] == 1].index
if not hovering_times.empty:
    start = hovering_times[0]
    for i in range(1, len(hovering_times)):
        if (hovering_times[i] - hovering_times[i - 1]) > pd.Timedelta(seconds=1):
            ax1.axvspan(start, hovering_times[i - 1], color='beige', alpha=1, label='Hovering')
            start = hovering_times[i]
    ax1.axvspan(start, hovering_times[-1], color='beige', alpha=1, label='Hovering')

# Shade areas for flag_cloud == 1
cloud_times = df_level1_5[df_level1_5['flag_cloud'] == 1].index
if not cloud_times.empty:
    start = cloud_times[0]
    for i in range(1, len(cloud_times)):
        if (cloud_times[i] - cloud_times[i - 1]) > pd.Timedelta(seconds=1):
            ax1.axvspan(start, cloud_times[i - 1], color='lightblue', alpha=0.5, label='Cloud')
            start = cloud_times[i]
    ax1.axvspan(start, cloud_times[-1], color='lightblue', alpha=0.5, label='Cloud')

# Shade areas for Filter_position !== 1.0
filter_mask = df_level1_5['Filter_position'] != 1.0
filter_times = df_level1_5[filter_mask].index

if not filter_times.empty:
    start = filter_times[0]
    for i in range(1, len(filter_times)):
        if (filter_times[i] - filter_times[i - 1]) > pd.Timedelta(seconds=1):
            ax1.axvspan(start, filter_times[i - 1], facecolor='none', edgecolor='gray', hatch='////', alpha=0.8, label='Filter')
            start = filter_times[i]
    ax1.axvspan(start, filter_times[-1], facecolor='none', edgecolor='gray', hatch='////', alpha=0.8, label='Filter')


# Optional: Clean legend (avoid duplicates)
handles, labels = ax1.get_legend_handles_labels()
by_label = dict(zip(labels, handles))
ax1.legend(by_label.values(), by_label.keys(), fontsize=10)


### SUBPLOT 2: mSEMS heatmmap & total concentration
ax2 = axes[1]

# Get diameter bin averages
#start_dia = 'msems_inverted_Bin_Dia1'
#end_dia = 'msems_inverted_Bin_Dia60'
#bin_diameter_averages = df_level1_5.loc[:, start_dia:end_dia].mean()

# Get concentration data
start_conc = 'mSEMS_Bin_Conc1'
end_conc = 'mSEMS_Bin_Conc60'
counts = df_level1_5.loc[:, start_conc:end_conc]
counts.index = df_level1_5.index
counts = counts.astype(float).dropna(how='any')
counts = counts.clip(lower=1)

# Create 2D grid
xx, yy = np.meshgrid(counts.index.values, bin_diameter_averages)

# Contour plot
norm = mcolors.LogNorm(vmin=1, vmax=1000)
mesh = ax2.pcolormesh(xx, yy, counts.values.T, cmap='viridis', norm=norm, shading="gouraud")

# Colorbar
divider = make_axes_locatable(ax2)
cax = inset_axes(ax2, width="1.5%", height="100%", loc='lower left',
                 bbox_to_anchor=(1.08, -0.025, 1, 1), bbox_transform=ax2.transAxes)
cb = fig.colorbar(mesh, cax=cax, orientation='vertical')
cb.set_label('dN/dlogD$_p$ (cm$^{-3}$)', fontsize=13, fontweight='bold')
cb.ax.tick_params(labelsize=11)

# Add Secondary Y-axis for Total Concentration
ax2_right = ax2.twinx()
total_conc = df_level1_5['mSEMS_total_N']
ax2_right.scatter(df_level1_5.index, total_conc, color='red', marker='.')
ax2_right.set_ylabel('mSEMS conc (cm$^{-3}$)', fontsize=12, fontweight='bold', color='red', labelpad=8)
ax2_right.tick_params(axis='y', labelsize=11, colors='red')
#ax2_right.set_ylim(0, total_conc.max() * 1.1)

# Labels and limits
ax2.set_yscale('log')
ax2.set_ylabel('Part. Diameter (nm)', fontsize=12, fontweight='bold')
ax2.set_ylim(8, 236)
ax2.grid(True, linestyle='--', alpha=0.6, axis='x')


### SUBPLOT 3: POPS heatmap & total concentration
ax3 = axes[2]

# Define pops_dlogDp variable from Hendix documentation
pops_dia = [
    149.0801282, 162.7094017, 178.3613191, 195.2873341,
    212.890625, 234.121875, 272.2136986, 322.6106374,
    422.0817873, 561.8906456, 748.8896681, 1054.138693,
    1358.502538, 1802.347716, 2440.99162, 3061.590212
]

pops_dlogDp = [
    0.036454582, 0.039402553, 0.040330922, 0.038498955,
    0.036550107, 0.045593506, 0.082615487, 0.066315868,
    0.15575785, 0.100807113, 0.142865049, 0.152476328,
    0.077693935, 0.157186601, 0.113075192, 0.086705426
]

# Define the range of columns for POPS concentration
start_conc = 'POPS_b3'
end_conc = 'POPS_b15'

# Get POPS concentration data
pops_counts = df_level1_5.loc[:, start_conc:end_conc]
pops_counts = pops_counts.set_index(df_level1_5.index).astype(float)

# Create 2D grid
#pops_dia = np.logspace(np.log10(180), np.log10(3370), num=pops_counts.shape[1])
bin_diameters = pops_dia[3:16]
xx, yy = np.meshgrid(pops_counts.index.values, bin_diameters)

# Heatmap
norm = mcolors.LogNorm(vmin=1, vmax=300)
mesh = ax3.pcolormesh(xx, yy, pops_counts.values.T, cmap='viridis', norm=norm, shading="gouraud")

# Colorbar
divider = make_axes_locatable(ax3)
cax = inset_axes(ax3, width="1.5%", height="100%", loc='lower left',
                 bbox_to_anchor=(1.08, -0.025, 1, 1), bbox_transform=ax3.transAxes)
cb = fig.colorbar(mesh, cax=cax, orientation='vertical')
cb.set_label('dN/dlogD$_p$ (cm$^{-3}$)', fontsize=12, fontweight='bold')
cb.ax.tick_params(labelsize=11)

# Labels and grid
ax3.set_yscale('log')
ax3.set_ylabel('Part. Diameter (nm)', fontsize=12, fontweight='bold')
ax3.tick_params(axis='y', labelsize=11)
ax3.grid(True, linestyle='--', linewidth=0.5, axis='x')
ax3.grid(False, axis='y')
ax3.set_ylim(180, 3370)

# Add Secondary Y-axis for Total POPS Concentration
ax3_right = ax3.twinx()
ax3_right.plot(df_level1_5.index, df_level1_5['POPS_total_N'], color='red', linewidth=2, label='Total POPS Conc.')
ax3_right.set_ylabel('POPS conc (cm$^{-3}$)', fontsize=12, fontweight='bold', color='red', labelpad=8)
ax3_right.tick_params(axis='y', labelsize=11, colors='red')
ax3_right.spines['right'].set_color('red')
ax3_right.set_ylim(-20, df_level1_5['POPS_total_N'].max() * 1.1)


### Subplot 4: mCDA heatmap & total concentration
ax4 = axes[3]

# Midpoint diameters
Midpoint_diameter_list = np.array([
    0.244381, 0.246646, 0.248908, 0.251144, 0.253398, 0.255593, 0.257846, 0.260141, 0.262561, 0.265062, 0.267712, 0.270370, 0.273159, 0.275904, 0.278724, 0.281554, 0.284585, 0.287661, 0.290892, 0.294127, 0.297512, 0.300813, 0.304101, 0.307439,
    0.310919, 0.314493, 0.318336, 0.322265, 0.326283, 0.330307, 0.334409, 0.338478, 0.342743, 0.347102, 0.351648, 0.356225, 0.360972, 0.365856, 0.371028, 0.376344, 0.382058, 0.387995, 0.394223, 0.400632, 0.407341, 0.414345, 0.421740, 0.429371,
    0.437556, 0.446036, 0.454738, 0.463515, 0.472572, 0.481728, 0.491201, 0.500739, 0.510645, 0.520720, 0.530938, 0.541128, 0.551563, 0.562058, 0.572951, 0.583736, 0.594907, 0.606101, 0.617542, 0.628738, 0.640375, 0.652197, 0.664789, 0.677657,
    0.691517, 0.705944, 0.721263, 0.736906, 0.753552, 0.770735, 0.789397, 0.808690, 0.829510, 0.851216, 0.874296, 0.897757, 0.922457, 0.948074, 0.975372, 1.003264, 1.033206, 1.064365, 1.097090, 1.130405, 1.165455, 1.201346, 1.239589, 1.278023,
    1.318937, 1.360743, 1.403723, 1.446000, 1.489565, 1.532676, 1.577436, 1.621533, 1.667088, 1.712520, 1.758571, 1.802912, 1.847836, 1.891948, 1.937088, 1.981087, 2.027604, 2.074306, 2.121821, 2.168489, 2.216644, 2.263724, 2.312591, 2.361099,
    2.412220, 2.464198, 2.518098, 2.571786, 2.628213, 2.685162, 2.745035, 2.805450, 2.869842, 2.935997, 3.005175, 3.074905, 3.148598, 3.224051, 3.305016, 3.387588, 3.476382, 3.568195, 3.664863, 3.761628, 3.863183, 3.965651, 4.072830, 4.179050,
    4.289743, 4.400463, 4.512449, 4.621025, 4.731530, 4.839920, 4.949855, 5.057777, 5.169742, 5.281416, 5.395039, 5.506828, 5.621488, 5.734391, 5.849553, 5.962881, 6.081516, 6.200801, 6.322133, 6.441786, 6.565130, 6.686935, 6.813017, 6.938981,
    7.071558, 7.205968, 7.345185, 7.483423, 7.628105, 7.774385, 7.926945, 8.080500, 8.247832, 8.419585, 8.598929, 8.780634, 8.973158, 9.167022, 9.372760, 9.582145, 9.808045, 10.041607, 10.287848, 10.537226, 10.801172, 11.068405, 11.345135,
    11.621413, 11.910639, 12.200227, 12.492929, 12.780176, 13.072476, 13.359067, 13.651163, 13.937329, 14.232032, 14.523919, 14.819204, 15.106612, 15.402110, 15.695489, 15.998035, 16.297519, 16.610927, 16.926800, 17.250511,
    17.570901, 17.904338, 18.239874, 18.588605, 18.938763, 19.311505, 19.693678, 20.093464, 20.498208, 20.927653, 21.366609, 21.827923, 22.297936, 22.802929, 23.325426, 23.872344, 24.428708, 25.016547, 25.616663, 26.249815,
    26.888493, 27.563838, 28.246317, 28.944507, 29.626186, 30.323440, 31.005915, 31.691752, 32.353900, 33.030123, 33.692286, 34.350532, 34.984611, 35.626553, 36.250913, 36.878655, 37.489663, 38.121550, 38.748073, 39.384594,
    40.008540, 40.654627, 41.292757, 41.937789, 42.578436
])

# Prepare data
counts = df_level1_5.loc[:, 'mCDA_dataB1':'mCDA_dataB256']
counts = counts.set_index(df_level1_5.index)
counts = counts.astype(float)
counts[counts == 0] = np.nan

bin_diameters = Midpoint_diameter_list
xx, yy = np.meshgrid(counts.index.values, bin_diameters)
Z = counts.values.T

# Plot heatmap
norm = mcolors.LogNorm(vmin=1, vmax=50)
mesh = ax4.pcolormesh(xx, yy, Z, cmap='viridis', norm=norm, shading="gouraud")

# Colorbar
divider = make_axes_locatable(ax4)
cax = inset_axes(ax4, width="1.5%", height="100%", loc='lower left',
                 bbox_to_anchor=(1.08, -0.025, 1, 1), bbox_transform=ax4.transAxes)
cb = fig.colorbar(mesh, cax=cax, orientation='vertical')
cb.set_label('dN/dlogD$_p$ (cm$^{-3}$)', fontsize=12, fontweight='bold')
cb.ax.tick_params(labelsize=11)

# Total concentration
ax4_right = ax4.twinx()
total_conc = df_level1_5['mCDA_total_N']
ax4_right.plot(df_level1_5.index, total_conc, color='red', linewidth=2)
ax4_right.set_ylabel('mCDA conc (cm$^{-3}$)', fontsize=12, fontweight='bold', color='red', labelpad=15)
ax4_right.tick_params(axis='y', labelsize=11, colors='red')
#ax4_right.set_ylim(0, total_conc.max() * 2)
#ax4_right.set_xlim(ax4.get_xlim())

# Axis styling
ax4.set_yscale('log')
ax4.set_ylabel('Part. Diameter (μm)', fontsize=12, fontweight='bold')
ax4.set_ylim(0.4, 20)
ax4.grid(True, linestyle='--', linewidth=0.5, axis='x')
ax4.grid(False, axis='y')

# Legend for secondary y-axis
#ax2_right.legend(['mSEMS total conc.'], loc='upper right', fontsize=11, frameon=False)
#ax3_right.legend(['POPS total conc.'], loc='upper right', fontsize=11, frameon=False)
#ax4_right.legend(['mCDA total conc.'], loc='upper right', fontsize=11, frameon=False)

# X-axis formatting for all subplots
ax4.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
ax4.xaxis.set_major_locator(mdates.MinuteLocator(interval=10))
ax4.set_xlabel('Time', fontsize=13, fontweight='bold', labelpad=10)
ax4.tick_params(axis='x', rotation=90, labelsize=11)

""" SET TIME RANGE (DATE + TIME) """
#ax3.set_xlim(pd.Timestamp('2025-02-12T07:55:00'), pd.Timestamp('2025-02-12T10:20:00'))

""" SAVE PLOT """
filename = f'Level2_{metadata.flight_date}_A_Flight_{metadata.flight}_SizeDistr.png'
save_path = DATA_LEVEL2_DIRPATH / filename
print("Saving figure to:", save_path)
fig.savefig(save_path, dpi=300, bbox_inches='tight')

#plt.tight_layout()
plt.show()

## Final file
### 1 second resolution
**Save final processed file.**

In [None]:
""" CHANGE NAME OF OUTPUT FILE """

df_level1_5.to_csv(DATA_LEVEL2_DIRPATH / f"level2_{DATA_FLIGHT_DIR_BASENAME}.csv", index=True)
print(f"Saved Level2 file of Flight {metadata.flight}.")

### 10 second resolution
**Averaging over 10 seconds**

In [None]:
""" CHANGE NAME OF INPUT FILE """

df_level2 = pd.read_csv(DATA_LEVEL2_DIRPATH / f"level2_{DATA_FLIGHT_DIR_BASENAME}.csv")
df_level2.set_index("datetime", inplace=True)
df_level2

# Remove text columns
columns_to_remove = ['tapir_GL', 'tapir_Le', 'tapir_Lm', 'tapir_TP', 'tapir_TH', 'tapir_TB', 'campaign']
df_level2 = df_level2.drop(columns=columns_to_remove, errors='ignore')
df_level2

In [None]:
# AVERAGING
from scipy.stats import circmean

df_level2.index = pd.to_datetime(df_level2.index)

df_level2_average = df_level2.resample('10s').mean()                                                                        # Calculate the regular 10-second average
df_level2_average['WindDir'] = df_level2['WindDir'].resample('10s').apply(lambda x: circmean(x.dropna(), high=360, low=0))  # Correctly average wind direction using circular mean

flag_cols = ['flag_hovering', 'flag_cloud']
df_level2_average[flag_cols] = (df_level2_average[flag_cols] >= 0.5).astype(int)

# Round
df_level2_average['Lat'] = df_level2_average['Lat'].round(4)
df_level2_average['Long'] = df_level2_average['Long'].round(4)
df_level2_average['flight_nr'] = df_level2_average['flight_nr'].round(0).astype(int)
cols_to_round_2 = df_level2_average.select_dtypes(include='number').columns.difference(['Lat', 'Long', 'flight_nr'])
df_level2_average[cols_to_round_2] = df_level2_average[cols_to_round_2].round(2)
df_level2_average.loc[df_level2_average['Filter_position'] <= 1.5, 'Filter_position'] = 0
df_level2_average.loc[df_level2_average['Filter_position'] > 1.5, 'Filter_position'] = 1
#df_level2_average['flag_pollution'] = np.nan

df_level2_average['campaign'] = 'ORACLES'
df_level2_average

In [None]:
from helikite.processing.post.level1 import flight_profiles_2

# Limits for x-axis (T, RH, mSEMS, CPC, POPS, mCDA, WS, WD)
custom_xlim = {
    'ax1': (-6, 2),
    'ax2': (60, 100),
    'ax3': (0, 1200),
    'ax4': (0, 1200),
    'ax5': (0, 60),
    'ax6': (0, 60),
    'ax7': (0, 12)
}

custom_xticks = {
    'ax1': np.arange(-6, 3, 2),
    'ax2': np.arange(60, 101, 10),
    'ax3': np.arange(0, 1201, 200),
    'ax4': np.arange(0, 1201, 200),
    'ax5': np.arange(0, 61, 10),
    'ax6': np.arange(0, 61, 10),
    'ax7': np.arange(0, 13, 3)
}

# Plot title
custom_title = f'Flight {metadata.flight} ({metadata.flight_date}_B) [Level 2 - 10s]'
#custom_title = f'Flight 51 (2025-02-06_B) [Level 2]'

fig = flight_profiles_2(df_level2_average, metadata, xlims=custom_xlim, xticks=custom_xticks, fig_title=custom_title)

# Save the figure after plotting
filename = f'Level2_{metadata.flight_date}_A_10s_Flight_{metadata.flight}.png'
#filename = f'Level2_2025-02-06_B_Flight_51.png'
save_path = DATA_LEVEL2_DIRPATH / filename
print("Saving figure to:", save_path)
fig.savefig(save_path, dpi=300, bbox_inches='tight')

In [None]:
# TEMPORAL PLOT OF FLIGHT with POPS and mSEMS HEAT MAPS

import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import numpy as np
import pandas as pd
from matplotlib.lines import Line2D
import matplotlib.ticker as ticker
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.colors as mcols
import matplotlib.colors as mcolors
import matplotlib.lines as mlines
from mpl_toolkits.axes_grid1 import make_axes_locatable
from mpl_toolkits.axes_grid1.inset_locator import inset_axes

bin_diameter_averages = np.array([
    8.2, 8.7, 9.1, 9.6, 10.1, 10.7, 11.3,
    11.9, 12.5, 13.2, 13.9, 14.7, 15.5,
    16.3, 17.2, 18.2, 19.2, 20.2, 21.3,
    22.5, 23.7, 25.0, 26.4, 27.9, 29.5,
    31.1, 32.8, 34.7, 36.6, 38.7, 40.9,
    43.2, 45.7, 48.3, 51.1, 54.1, 57.2,
    60.5, 64.1, 67.9, 71.9, 76.2, 80.8,
    85.8, 91.0, 96.7, 102.7, 109.2, 116.1,
    123.6, 131.6, 140.3, 149.6, 159.7, 170.7,
    182.5, 195.4, 209.4, 224.6, 241.2
])


%matplotlib inline
plt.close('all')

# Create figure with 3 subplots, sharing the same x-axis
fig, axes = plt.subplots(4, 1, figsize=(16, 12), sharex=True, gridspec_kw={'height_ratios': [1, 1, 1, 1]})
plt.subplots_adjust(hspace=0.1)

""" SET THE TITLE OF THE PLOT (FLIGHT N° with DATE_X) """
# 'i' will automatically be replaced by the set flight number
# '_X' has to be changed manually in function of the flight index of the day (A, B, ...)
fig.suptitle(f'Flight {metadata.flight} ({metadata.flight_date}_A) [Level 2 - 10s]', fontsize=16, fontweight='bold', y=0.91)

### SUBPLOT 1: Altitude vs. Time
ax1 = axes[0]
ax1.plot(df_level2_average.index, df_level2_average['Altitude'], color='black', linewidth=2, label='Altitude')

ax1.set_ylabel('Altitude (m)', fontsize=12, fontweight='bold')
ax1.tick_params(axis='y', labelsize=11)
ax1.grid(True, linestyle='--', linewidth=0.5)
ax1.tick_params(axis='x', labelbottom=False)
ax1.set_ylim(-40, df_level2_average['Altitude'].max() * 1.04)

# Shade areas for flag_pollution == 1
pollution_times = df_level2_average[df_level2_average['flag_pollution'] == 1].index
if not pollution_times.empty:
    start = pollution_times[0]
    for i in range(1, len(pollution_times)):
        if (pollution_times[i] - pollution_times[i - 1]) > pd.Timedelta(seconds=1):
            ax1.axvspan(start, pollution_times[i - 1], color='lightcoral', alpha=0.8, label='Pollution')
            start = pollution_times[i]
    ax1.axvspan(start, pollution_times[-1], color='lightcoral', alpha=0.8, label='Pollution')

# Shade areas for flag_hovering == 1
hovering_times = df_level2_average[df_level2_average['flag_hovering'] == 1].index
if not hovering_times.empty:
    start = hovering_times[0]
    for i in range(1, len(hovering_times)):
        if (hovering_times[i] - hovering_times[i - 1]) > pd.Timedelta(seconds=10):
            ax1.axvspan(start, hovering_times[i - 1], color='beige', alpha=1, label='Hovering')
            start = hovering_times[i]
    ax1.axvspan(start, hovering_times[-1], color='beige', alpha=1, label='Hovering')

# Shade areas for flag_cloud == 1
cloud_times = df_level2_average[df_level2_average['flag_cloud'] == 1].index
if not cloud_times.empty:
    start = cloud_times[0]
    for i in range(1, len(cloud_times)):
        if (cloud_times[i] - cloud_times[i - 1]) > pd.Timedelta(seconds=10):
            ax1.axvspan(start, cloud_times[i - 1], color='lightblue', alpha=0.5, label='Cloud')
            start = cloud_times[i]
    ax1.axvspan(start, cloud_times[-1], color='lightblue', alpha=0.5, label='Cloud')

# Shade areas for Filter_position !== 0.0
filter_times = df_level2_average[df_level2_average['Filter_position'] == 1].index
if not filter_times.empty:
    start = filter_times[0]
    for i in range(1, len(filter_times)):
        if (filter_times[i] - filter_times[i - 1]) > pd.Timedelta(seconds=10):
            ax1.axvspan(start, filter_times[i - 1], facecolor='none', edgecolor='gray', hatch='////', alpha=0.8, label='Filter')
            start = filter_times[i]
    ax1.axvspan(start, filter_times[-1], facecolor='none', edgecolor='gray', hatch='////', alpha=0.8, label='Filter')


# Optional: Clean legend (avoid duplicates)
handles, labels = ax1.get_legend_handles_labels()
by_label = dict(zip(labels, handles))
ax1.legend(by_label.values(), by_label.keys(), fontsize=10)


### SUBPLOT 2: mSEMS heatmmap & total concentration
ax2 = axes[1]

# Get diameter bin averages
#start_dia = 'msems_inverted_Bin_Dia1'
#end_dia = 'msems_inverted_Bin_Dia60'
#bin_diameter_averages = df_level2_average.loc[:, start_dia:end_dia].mean()

# Get concentration data
start_conc = 'mSEMS_Bin_Conc1'
end_conc = 'mSEMS_Bin_Conc60'
counts = df_level2_average.loc[:, start_conc:end_conc]
counts.index = df_level2_average.index
counts = counts.astype(float).dropna(how='any')
counts = counts.clip(lower=1)

# Create 2D grid
xx, yy = np.meshgrid(counts.index.values, bin_diameter_averages)

# Contour plot
norm = mcolors.LogNorm(vmin=1, vmax=1000)
mesh = ax2.pcolormesh(xx, yy, counts.values.T, cmap='viridis', norm=norm, shading="gouraud")

# Colorbar
divider = make_axes_locatable(ax2)
cax = inset_axes(ax2, width="1.5%", height="100%", loc='lower left',
                 bbox_to_anchor=(1.08, -0.025, 1, 1), bbox_transform=ax2.transAxes)
cb = fig.colorbar(mesh, cax=cax, orientation='vertical')
cb.set_label('dN/dlogD$_p$ (cm$^{-3}$)', fontsize=13, fontweight='bold')
cb.ax.tick_params(labelsize=11)

# Add Secondary Y-axis for Total Concentration
ax2_right = ax2.twinx()
total_conc = df_level2_average['mSEMS_total_N']
ax2_right.scatter(df_level2_average.index, total_conc, color='red', marker='.')
ax2_right.set_ylabel('mSEMS conc (cm$^{-3}$)', fontsize=12, fontweight='bold', color='red', labelpad=8)
ax2_right.tick_params(axis='y', labelsize=11, colors='red')
ax2_right.set_ylim(0, total_conc.max() * 1.1)

# Labels and limits
ax2.set_yscale('log')
ax2.set_ylabel('Part. Diameter (nm)', fontsize=12, fontweight='bold')
ax2.set_ylim(8, 236)
ax2.grid(True, linestyle='--', alpha=0.6, axis='x')


### SUBPLOT 3: POPS heatmap & total concentration
ax3 = axes[2]

# Define pops_dlogDp variable from Hendix documentation
pops_dia = [
    149.0801282, 162.7094017, 178.3613191, 195.2873341,
    212.890625, 234.121875, 272.2136986, 322.6106374,
    422.0817873, 561.8906456, 748.8896681, 1054.138693,
    1358.502538, 1802.347716, 2440.99162, 3061.590212
]

pops_dlogDp = [
    0.036454582, 0.039402553, 0.040330922, 0.038498955,
    0.036550107, 0.045593506, 0.082615487, 0.066315868,
    0.15575785, 0.100807113, 0.142865049, 0.152476328,
    0.077693935, 0.157186601, 0.113075192, 0.086705426
]

# Define the range of columns for POPS concentration
start_conc = 'POPS_b3'
end_conc = 'POPS_b15'

# Get POPS concentration data
pops_counts = df_level2_average.loc[:, start_conc:end_conc]
pops_counts = pops_counts.set_index(df_level2_average.index).astype(float)

# Create 2D grid
#pops_dia = np.logspace(np.log10(180), np.log10(3370), num=pops_counts.shape[1])
bin_diameters = pops_dia[3:16]
xx, yy = np.meshgrid(pops_counts.index.values, bin_diameters)

# Heatmap
norm = mcolors.LogNorm(vmin=1, vmax=300)
mesh = ax3.pcolormesh(xx, yy, pops_counts.values.T, cmap='viridis', norm=norm, shading="gouraud")

# Colorbar
divider = make_axes_locatable(ax3)
cax = inset_axes(ax3, width="1.5%", height="100%", loc='lower left',
                 bbox_to_anchor=(1.08, -0.025, 1, 1), bbox_transform=ax3.transAxes)
cb = fig.colorbar(mesh, cax=cax, orientation='vertical')
cb.set_label('dN/dlogD$_p$ (cm$^{-3}$)', fontsize=12, fontweight='bold')
cb.ax.tick_params(labelsize=11)

# Labels and grid
ax3.set_yscale('log')
ax3.set_ylabel('Part. Diameter (nm)', fontsize=12, fontweight='bold')
ax3.tick_params(axis='y', labelsize=11)
ax3.grid(True, linestyle='--', linewidth=0.5, axis='x')
ax3.grid(False, axis='y')
ax3.set_ylim(180, 3370)

# Add Secondary Y-axis for Total POPS Concentration
ax3_right = ax3.twinx()
ax3_right.plot(df_level2_average.index, df_level2_average['POPS_total_N'], color='red', linewidth=2, label='Total POPS Conc.')
ax3_right.set_ylabel('POPS conc (cm$^{-3}$)', fontsize=12, fontweight='bold', color='red', labelpad=8)
ax3_right.tick_params(axis='y', labelsize=11, colors='red')
ax3_right.spines['right'].set_color('red')
ax3_right.set_ylim(-20, df_level2_average['POPS_total_N'].max() * 1.1)


### Subplot 4: mCDA heatmap & total concentration
ax4 = axes[3]

# Midpoint diameters
Midpoint_diameter_list = np.array([
    0.244381, 0.246646, 0.248908, 0.251144, 0.253398, 0.255593, 0.257846, 0.260141, 0.262561, 0.265062, 0.267712, 0.270370, 0.273159, 0.275904, 0.278724, 0.281554, 0.284585, 0.287661, 0.290892, 0.294127, 0.297512, 0.300813, 0.304101, 0.307439,
    0.310919, 0.314493, 0.318336, 0.322265, 0.326283, 0.330307, 0.334409, 0.338478, 0.342743, 0.347102, 0.351648, 0.356225, 0.360972, 0.365856, 0.371028, 0.376344, 0.382058, 0.387995, 0.394223, 0.400632, 0.407341, 0.414345, 0.421740, 0.429371,
    0.437556, 0.446036, 0.454738, 0.463515, 0.472572, 0.481728, 0.491201, 0.500739, 0.510645, 0.520720, 0.530938, 0.541128, 0.551563, 0.562058, 0.572951, 0.583736, 0.594907, 0.606101, 0.617542, 0.628738, 0.640375, 0.652197, 0.664789, 0.677657,
    0.691517, 0.705944, 0.721263, 0.736906, 0.753552, 0.770735, 0.789397, 0.808690, 0.829510, 0.851216, 0.874296, 0.897757, 0.922457, 0.948074, 0.975372, 1.003264, 1.033206, 1.064365, 1.097090, 1.130405, 1.165455, 1.201346, 1.239589, 1.278023,
    1.318937, 1.360743, 1.403723, 1.446000, 1.489565, 1.532676, 1.577436, 1.621533, 1.667088, 1.712520, 1.758571, 1.802912, 1.847836, 1.891948, 1.937088, 1.981087, 2.027604, 2.074306, 2.121821, 2.168489, 2.216644, 2.263724, 2.312591, 2.361099,
    2.412220, 2.464198, 2.518098, 2.571786, 2.628213, 2.685162, 2.745035, 2.805450, 2.869842, 2.935997, 3.005175, 3.074905, 3.148598, 3.224051, 3.305016, 3.387588, 3.476382, 3.568195, 3.664863, 3.761628, 3.863183, 3.965651, 4.072830, 4.179050,
    4.289743, 4.400463, 4.512449, 4.621025, 4.731530, 4.839920, 4.949855, 5.057777, 5.169742, 5.281416, 5.395039, 5.506828, 5.621488, 5.734391, 5.849553, 5.962881, 6.081516, 6.200801, 6.322133, 6.441786, 6.565130, 6.686935, 6.813017, 6.938981,
    7.071558, 7.205968, 7.345185, 7.483423, 7.628105, 7.774385, 7.926945, 8.080500, 8.247832, 8.419585, 8.598929, 8.780634, 8.973158, 9.167022, 9.372760, 9.582145, 9.808045, 10.041607, 10.287848, 10.537226, 10.801172, 11.068405, 11.345135,
    11.621413, 11.910639, 12.200227, 12.492929, 12.780176, 13.072476, 13.359067, 13.651163, 13.937329, 14.232032, 14.523919, 14.819204, 15.106612, 15.402110, 15.695489, 15.998035, 16.297519, 16.610927, 16.926800, 17.250511,
    17.570901, 17.904338, 18.239874, 18.588605, 18.938763, 19.311505, 19.693678, 20.093464, 20.498208, 20.927653, 21.366609, 21.827923, 22.297936, 22.802929, 23.325426, 23.872344, 24.428708, 25.016547, 25.616663, 26.249815,
    26.888493, 27.563838, 28.246317, 28.944507, 29.626186, 30.323440, 31.005915, 31.691752, 32.353900, 33.030123, 33.692286, 34.350532, 34.984611, 35.626553, 36.250913, 36.878655, 37.489663, 38.121550, 38.748073, 39.384594,
    40.008540, 40.654627, 41.292757, 41.937789, 42.578436
])

# Prepare data
counts = df_level2_average.loc[:, 'mCDA_dataB1':'mCDA_dataB256']
counts = counts.set_index(df_level2_average.index)
counts = counts.astype(float)
counts[counts == 0] = np.nan

bin_diameters = Midpoint_diameter_list
xx, yy = np.meshgrid(counts.index.values, bin_diameters)
Z = counts.values.T

# Plot heatmap
norm = mcolors.LogNorm(vmin=1, vmax=50)
mesh = ax4.pcolormesh(xx, yy, Z, cmap='viridis', norm=norm, shading="gouraud")

# Colorbar
divider = make_axes_locatable(ax4)
cax = inset_axes(ax4, width="1.5%", height="100%", loc='lower left',
                 bbox_to_anchor=(1.08, -0.025, 1, 1), bbox_transform=ax4.transAxes)
cb = fig.colorbar(mesh, cax=cax, orientation='vertical')
cb.set_label('dN/dlogD$_p$ (cm$^{-3}$)', fontsize=12, fontweight='bold')
cb.ax.tick_params(labelsize=11)

# Total concentration
ax4_right = ax4.twinx()
total_conc = df_level2_average['mCDA_total_N']
ax4_right.plot(df_level2_average.index, total_conc, color='red', linewidth=2)
ax4_right.set_ylabel('mCDA conc (cm$^{-3}$)', fontsize=12, fontweight='bold', color='red', labelpad=15)
ax4_right.tick_params(axis='y', labelsize=11, colors='red')
ax4_right.set_ylim(0, total_conc.max() * 2)
ax4_right.set_xlim(ax4.get_xlim())

# Axis styling
ax4.set_yscale('log')
ax4.set_ylabel('Part. Diameter (μm)', fontsize=12, fontweight='bold')
ax4.set_ylim(0.4, 20)
ax4.grid(True, linestyle='--', linewidth=0.5, axis='x')
ax4.grid(False, axis='y')

# Legend for secondary y-axis
#ax2_right.legend(['mSEMS total conc.'], loc='upper right', fontsize=11, frameon=False)
#ax3_right.legend(['POPS total conc.'], loc='upper right', fontsize=11, frameon=False)
#ax4_right.legend(['mCDA total conc.'], loc='upper right', fontsize=11, frameon=False)

# X-axis formatting for all subplots
ax4.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
ax4.xaxis.set_major_locator(mdates.MinuteLocator(interval=10))
ax4.set_xlabel('Time', fontsize=13, fontweight='bold', labelpad=10)
ax4.tick_params(axis='x', rotation=90, labelsize=11)

""" SET TIME RANGE (DATE + TIME) """
#ax3.set_xlim(pd.Timestamp('2025-02-12T07:55:00'), pd.Timestamp('2025-02-12T10:20:00'))

""" SAVE PLOT """
filename = f'Level2_{metadata.flight_date}_A_10s_Flight_{metadata.flight}_SizeDistr.png'
save_path = DATA_LEVEL2_DIRPATH / filename
print("Saving figure to:", save_path)
fig.savefig(save_path, dpi=300, bbox_inches='tight')

#plt.tight_layout()
plt.show()

In [None]:
""" CHANGE NAME OF OUTPUT FILE """

df_level2_average.to_csv(DATA_LEVEL2_DIRPATH / "Averaged" / f"level2_{DATA_FLIGHT_DIR_BASENAME}_10s.csv", index=True)
print(f"Saved Level2 (averaged) file of Flight {metadata.flight}.")

**FINISHED**

# Final file
Combine all csv files into one big txt file.

In [None]:
import pandas as pd
import glob
import os

# Define the folder containing your CSV files
folder = DATA_LEVEL2_DIRPATH / "Averaged"

# Find all CSV files in the folder
csv_files = glob.glob(os.path.join(folder, "*.csv"))

df_list = []

for i, file in enumerate(csv_files):
    # Read all files the same way; we'll parse datetime afterward
    df = pd.read_csv(file)
    df_list.append(df)

# Combine all files
combined_df = pd.concat(df_list, ignore_index=True)

# Parse the first column as datetime
datetime_col = combined_df.columns[0]
combined_df[datetime_col] = pd.to_datetime(combined_df[datetime_col], errors='coerce')

# Sort by the datetime column
combined_df = combined_df.sort_values(by=datetime_col)
combined_df = combined_df.set_index('datetime')

# Save as a single comma-separated text file
output_path = folder / "ORACLES_64Helikite_2024.txt"
combined_df.to_csv(output_path, index=True)
combined_df

In [None]:
# TEMPORAL PLOT OF FLIGHT with POPS and mSEMS HEAT MAPS

import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import numpy as np
import pandas as pd
from matplotlib.lines import Line2D
import matplotlib.ticker as ticker
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.colors as mcols
import matplotlib.colors as mcolors
import matplotlib.lines as mlines
from mpl_toolkits.axes_grid1 import make_axes_locatable
from mpl_toolkits.axes_grid1.inset_locator import inset_axes

bin_diameter_averages = np.array([
    8.2, 8.7, 9.1, 9.6, 10.1, 10.7, 11.3,
    11.9, 12.5, 13.2, 13.9, 14.7, 15.5,
    16.3, 17.2, 18.2, 19.2, 20.2, 21.3,
    22.5, 23.7, 25.0, 26.4, 27.9, 29.5,
    31.1, 32.8, 34.7, 36.6, 38.7, 40.9,
    43.2, 45.7, 48.3, 51.1, 54.1, 57.2,
    60.5, 64.1, 67.9, 71.9, 76.2, 80.8,
    85.8, 91.0, 96.7, 102.7, 109.2, 116.1,
    123.6, 131.6, 140.3, 149.6, 159.7, 170.7,
    182.5, 195.4, 209.4, 224.6, 241.2
])


%matplotlib inline
plt.close('all')

# Create figure with 3 subplots, sharing the same x-axis
fig, axes = plt.subplots(4, 1, figsize=(16, 12), sharex=True, gridspec_kw={'height_ratios': [1, 1, 1, 1]})
plt.subplots_adjust(hspace=0.1)

""" SET THE TITLE OF THE PLOT (FLIGHT N° with DATE_X) """
# 'i' will automatically be replaced by the set flight number
# '_X' has to be changed manually in function of the flight index of the day (A, B, ...)
fig.suptitle(f'Big helikite flights', fontsize=16, fontweight='bold', y=0.91)

### SUBPLOT 1: Altitude vs. Time
ax1 = axes[0]
ax1.plot(combined_df.index, combined_df['Altitude'], color='black', linewidth=2, label='Altitude')

ax1.set_ylabel('Altitude (m)', fontsize=12, fontweight='bold')
ax1.tick_params(axis='y', labelsize=11)
ax1.grid(True, linestyle='--', linewidth=0.5)
ax1.tick_params(axis='x', labelbottom=False)
ax1.set_ylim(-40, combined_df['Altitude'].max() * 1.04)

# Shade areas for flag_pollution == 1
pollution_times = combined_df[combined_df['flag_pollution'] == 1].index
if not pollution_times.empty:
    start = pollution_times[0]
    for i in range(1, len(pollution_times)):
        if (pollution_times[i] - pollution_times[i - 1]) > pd.Timedelta(seconds=1):
            ax1.axvspan(start, pollution_times[i - 1], color='lightcoral', alpha=0.8, label='Pollution')
            start = pollution_times[i]
    ax1.axvspan(start, pollution_times[-1], color='lightcoral', alpha=0.8, label='Pollution')

# Shade areas for flag_hovering == 1
combined_df.index = pd.to_datetime(combined_df.index, errors='coerce')
hovering_times = combined_df[combined_df['flag_hovering'] == 1].index
if not hovering_times.empty:
    start = hovering_times[0]
    for i in range(1, len(hovering_times)):
        if (hovering_times[i] - hovering_times[i - 1]) > pd.Timedelta(seconds=10):
            ax1.axvspan(start, hovering_times[i - 1], color='beige', alpha=1, label='Hovering')
            start = hovering_times[i]
    ax1.axvspan(start, hovering_times[-1], color='beige', alpha=1, label='Hovering')

# Shade areas for flag_cloud == 1
cloud_times = combined_df[combined_df['flag_cloud'] == 1].index
if not cloud_times.empty:
    start = cloud_times[0]
    for i in range(1, len(cloud_times)):
        if (cloud_times[i] - cloud_times[i - 1]) > pd.Timedelta(seconds=10):
            ax1.axvspan(start, cloud_times[i - 1], color='lightblue', alpha=0.5, label='Cloud')
            start = cloud_times[i]
    ax1.axvspan(start, cloud_times[-1], color='lightblue', alpha=0.5, label='Cloud')

# Shade areas for Filter_position !== 0.0
filter_times = combined_df[combined_df['Filter_position'] == 1].index
if not filter_times.empty:
    start = filter_times[0]
    for i in range(1, len(filter_times)):
        if (filter_times[i] - filter_times[i - 1]) > pd.Timedelta(seconds=10):
            ax1.axvspan(start, filter_times[i - 1], facecolor='none', edgecolor='gray', hatch='////', alpha=0.8, label='Filter')
            start = filter_times[i]
    ax1.axvspan(start, filter_times[-1], facecolor='none', edgecolor='gray', hatch='////', alpha=0.8, label='Filter')


# Optional: Clean legend (avoid duplicates)
handles, labels = ax1.get_legend_handles_labels()
by_label = dict(zip(labels, handles))
ax1.legend(by_label.values(), by_label.keys(), fontsize=10)


### SUBPLOT 2: mSEMS heatmmap & total concentration
ax2 = axes[1]

# Get diameter bin averages
#start_dia = 'msems_inverted_Bin_Dia1'
#end_dia = 'msems_inverted_Bin_Dia60'
#bin_diameter_averages = combined_df.loc[:, start_dia:end_dia].mean()

# Get concentration data
start_conc = 'mSEMS_Bin_Conc1'
end_conc = 'mSEMS_Bin_Conc60'
counts = combined_df.loc[:, start_conc:end_conc]
counts.index = combined_df.index
counts = counts.astype(float).dropna(how='any')
counts = counts.clip(lower=1)

# Create 2D grid
xx, yy = np.meshgrid(counts.index.values, bin_diameter_averages)

# Contour plot
norm = mcolors.LogNorm(vmin=1, vmax=1000)
mesh = ax2.pcolormesh(xx, yy, counts.values.T, cmap='viridis', norm=norm, shading="gouraud")

# Colorbar
divider = make_axes_locatable(ax2)
cax = inset_axes(ax2, width="1.5%", height="100%", loc='lower left',
                 bbox_to_anchor=(1.08, -0.025, 1, 1), bbox_transform=ax2.transAxes)
cb = fig.colorbar(mesh, cax=cax, orientation='vertical')
cb.set_label('dN/dlogD$_p$ (cm$^{-3}$)', fontsize=13, fontweight='bold')
cb.ax.tick_params(labelsize=11)

# Add Secondary Y-axis for Total Concentration
ax2_right = ax2.twinx()
total_conc = combined_df['mSEMS_total_N']
ax2_right.scatter(combined_df.index, total_conc, color='red', marker='.')
ax2_right.set_ylabel('mSEMS conc (cm$^{-3}$)', fontsize=12, fontweight='bold', color='red', labelpad=8)
ax2_right.tick_params(axis='y', labelsize=11, colors='red')
#ax2_right.set_ylim(0, total_conc.max() * 1.1)

# Labels and limits
ax2.set_yscale('log')
ax2.set_ylabel('Part. Diameter (nm)', fontsize=12, fontweight='bold')
ax2.set_ylim(8, 250)
ax2.grid(True, linestyle='--', alpha=0.6, axis='x')


### SUBPLOT 3: POPS heatmap & total concentration
ax3 = axes[2]

# Define pops_dlogDp variable from Hendix documentation
pops_dia = [
    149.0801282, 162.7094017, 178.3613191, 195.2873341,
    212.890625, 234.121875, 272.2136986, 322.6106374,
    422.0817873, 561.8906456, 748.8896681, 1054.138693,
    1358.502538, 1802.347716, 2440.99162, 3061.590212
]

pops_dlogDp = [
    0.036454582, 0.039402553, 0.040330922, 0.038498955,
    0.036550107, 0.045593506, 0.082615487, 0.066315868,
    0.15575785, 0.100807113, 0.142865049, 0.152476328,
    0.077693935, 0.157186601, 0.113075192, 0.086705426
]

# Define the range of columns for POPS concentration
start_conc = 'POPS_b3'
end_conc = 'POPS_b15'

# Get POPS concentration data
pops_counts = combined_df.loc[:, start_conc:end_conc]
pops_counts = pops_counts.set_index(combined_df.index).astype(float)

# Create 2D grid
#pops_dia = np.logspace(np.log10(180), np.log10(3370), num=pops_counts.shape[1])
bin_diameters = pops_dia[3:16]
xx, yy = np.meshgrid(pops_counts.index.values, bin_diameters)

# Heatmap
norm = mcolors.LogNorm(vmin=1, vmax=300)
mesh = ax3.pcolormesh(xx, yy, pops_counts.values.T, cmap='viridis', norm=norm, shading="gouraud")

# Colorbar
divider = make_axes_locatable(ax3)
cax = inset_axes(ax3, width="1.5%", height="100%", loc='lower left',
                 bbox_to_anchor=(1.08, -0.025, 1, 1), bbox_transform=ax3.transAxes)
cb = fig.colorbar(mesh, cax=cax, orientation='vertical')
cb.set_label('dN/dlogD$_p$ (cm$^{-3}$)', fontsize=12, fontweight='bold')
cb.ax.tick_params(labelsize=11)

# Labels and grid
ax3.set_yscale('log')
ax3.set_ylabel('Part. Diameter (nm)', fontsize=12, fontweight='bold')
ax3.tick_params(axis='y', labelsize=11)
ax3.grid(True, linestyle='--', linewidth=0.5, axis='x')
ax3.grid(False, axis='y')
ax3.set_ylim(180, 3370)

# Add Secondary Y-axis for Total POPS Concentration
ax3_right = ax3.twinx()
ax3_right.plot(combined_df.index, combined_df['POPS_total_N'], color='red', linewidth=2, label='Total POPS Conc.')
ax3_right.set_ylabel('POPS conc (cm$^{-3}$)', fontsize=12, fontweight='bold', color='red', labelpad=8)
ax3_right.tick_params(axis='y', labelsize=11, colors='red')
ax3_right.spines['right'].set_color('red')
ax3_right.set_ylim(-20, combined_df['POPS_total_N'].max() * 1.1)


### Subplot 4: mCDA heatmap & total concentration
ax4 = axes[3]

# Midpoint diameters
Midpoint_diameter_list = np.array([
    0.244381, 0.246646, 0.248908, 0.251144, 0.253398, 0.255593, 0.257846, 0.260141, 0.262561, 0.265062, 0.267712, 0.270370, 0.273159, 0.275904, 0.278724, 0.281554, 0.284585, 0.287661, 0.290892, 0.294127, 0.297512, 0.300813, 0.304101, 0.307439,
    0.310919, 0.314493, 0.318336, 0.322265, 0.326283, 0.330307, 0.334409, 0.338478, 0.342743, 0.347102, 0.351648, 0.356225, 0.360972, 0.365856, 0.371028, 0.376344, 0.382058, 0.387995, 0.394223, 0.400632, 0.407341, 0.414345, 0.421740, 0.429371,
    0.437556, 0.446036, 0.454738, 0.463515, 0.472572, 0.481728, 0.491201, 0.500739, 0.510645, 0.520720, 0.530938, 0.541128, 0.551563, 0.562058, 0.572951, 0.583736, 0.594907, 0.606101, 0.617542, 0.628738, 0.640375, 0.652197, 0.664789, 0.677657,
    0.691517, 0.705944, 0.721263, 0.736906, 0.753552, 0.770735, 0.789397, 0.808690, 0.829510, 0.851216, 0.874296, 0.897757, 0.922457, 0.948074, 0.975372, 1.003264, 1.033206, 1.064365, 1.097090, 1.130405, 1.165455, 1.201346, 1.239589, 1.278023,
    1.318937, 1.360743, 1.403723, 1.446000, 1.489565, 1.532676, 1.577436, 1.621533, 1.667088, 1.712520, 1.758571, 1.802912, 1.847836, 1.891948, 1.937088, 1.981087, 2.027604, 2.074306, 2.121821, 2.168489, 2.216644, 2.263724, 2.312591, 2.361099,
    2.412220, 2.464198, 2.518098, 2.571786, 2.628213, 2.685162, 2.745035, 2.805450, 2.869842, 2.935997, 3.005175, 3.074905, 3.148598, 3.224051, 3.305016, 3.387588, 3.476382, 3.568195, 3.664863, 3.761628, 3.863183, 3.965651, 4.072830, 4.179050,
    4.289743, 4.400463, 4.512449, 4.621025, 4.731530, 4.839920, 4.949855, 5.057777, 5.169742, 5.281416, 5.395039, 5.506828, 5.621488, 5.734391, 5.849553, 5.962881, 6.081516, 6.200801, 6.322133, 6.441786, 6.565130, 6.686935, 6.813017, 6.938981,
    7.071558, 7.205968, 7.345185, 7.483423, 7.628105, 7.774385, 7.926945, 8.080500, 8.247832, 8.419585, 8.598929, 8.780634, 8.973158, 9.167022, 9.372760, 9.582145, 9.808045, 10.041607, 10.287848, 10.537226, 10.801172, 11.068405, 11.345135,
    11.621413, 11.910639, 12.200227, 12.492929, 12.780176, 13.072476, 13.359067, 13.651163, 13.937329, 14.232032, 14.523919, 14.819204, 15.106612, 15.402110, 15.695489, 15.998035, 16.297519, 16.610927, 16.926800, 17.250511,
    17.570901, 17.904338, 18.239874, 18.588605, 18.938763, 19.311505, 19.693678, 20.093464, 20.498208, 20.927653, 21.366609, 21.827923, 22.297936, 22.802929, 23.325426, 23.872344, 24.428708, 25.016547, 25.616663, 26.249815,
    26.888493, 27.563838, 28.246317, 28.944507, 29.626186, 30.323440, 31.005915, 31.691752, 32.353900, 33.030123, 33.692286, 34.350532, 34.984611, 35.626553, 36.250913, 36.878655, 37.489663, 38.121550, 38.748073, 39.384594,
    40.008540, 40.654627, 41.292757, 41.937789, 42.578436
])

# Prepare data
counts = combined_df.loc[:, 'mCDA_dataB1':'mCDA_dataB256']
counts = counts.set_index(combined_df.index)
counts = counts.astype(float)
counts[counts == 0] = np.nan

bin_diameters = Midpoint_diameter_list
xx, yy = np.meshgrid(counts.index.values, bin_diameters)
Z = counts.values.T

# Plot heatmap
norm = mcolors.LogNorm(vmin=1, vmax=50)
mesh = ax4.pcolormesh(xx, yy, Z, cmap='viridis', norm=norm, shading="gouraud")

# Colorbar
divider = make_axes_locatable(ax4)
cax = inset_axes(ax4, width="1.5%", height="100%", loc='lower left',
                 bbox_to_anchor=(1.08, -0.025, 1, 1), bbox_transform=ax4.transAxes)
cb = fig.colorbar(mesh, cax=cax, orientation='vertical')
cb.set_label('dN/dlogD$_p$ (cm$^{-3}$)', fontsize=12, fontweight='bold')
cb.ax.tick_params(labelsize=11)

# Total concentration
ax4_right = ax4.twinx()
total_conc = combined_df['mCDA_total_N']
ax4_right.plot(combined_df.index, total_conc, color='red', linewidth=2)
ax4_right.set_ylabel('mCDA conc (cm$^{-3}$)', fontsize=12, fontweight='bold', color='red', labelpad=15)
ax4_right.tick_params(axis='y', labelsize=11, colors='red')
ax4_right.set_ylim(0, total_conc.max() * 2)
ax4_right.set_xlim(ax4.get_xlim())

# Axis styling
ax4.set_yscale('log')
ax4.set_ylabel('Part. Diameter (μm)', fontsize=12, fontweight='bold')
ax4.set_ylim(0.4, 20)
ax4.grid(True, linestyle='--', linewidth=0.5, axis='x')
ax4.grid(False, axis='y')

# Legend for secondary y-axis
#ax2_right.legend(['mSEMS total conc.'], loc='upper right', fontsize=11, frameon=False)
#ax3_right.legend(['POPS total conc.'], loc='upper right', fontsize=11, frameon=False)
#ax4_right.legend(['mCDA total conc.'], loc='upper right', fontsize=11, frameon=False)

# X-axis formatting for all subplots
ax4.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
ax4.xaxis.set_major_locator(mdates.MinuteLocator(interval=1440))
ax4.set_xlabel('Time', fontsize=13, fontweight='bold', labelpad=10)
ax4.tick_params(axis='x', rotation=90, labelsize=11)

""" SET TIME RANGE (DATE + TIME) """
#ax3.set_xlim(pd.Timestamp('2025-02-12T07:55:00'), pd.Timestamp('2025-02-12T10:20:00'))

""" SAVE PLOT """
filename = f'BigHelikiteFlights_SizeDistr.png'
save_path = DATA_LEVEL2_DIRPATH / filename
print("Saving figure to:", save_path)
fig.savefig(save_path, dpi=300, bbox_inches='tight')

#plt.tight_layout()
plt.show()

# Random code bits

### Remove WS and WD outliers after data processing

In [None]:
file_path = DATA_LEVEL2_DIRPATH / f"level2_{metadata.flight_date}_B.csv"

flight51 = pd.read_csv(file_path)
flight51

In [None]:
# Replace WindSpeed > 15 or < 0.1 with NaN
mask = (flight51['WindSpeed'] > 5) | (flight51['WindSpeed'] < 0.1)
flight51.loc[mask, 'WindSpeed'] = np.nan

# Replace corresponding WindDir with NaN
flight51.loc[mask, 'WindDir'] = np.nan

# Plot WindSpeed vs Altitude
plt.figure(figsize=(6,8))
plt.plot(flight51['WindSpeed'], flight51['Altitude'], color='blue', linewidth=2)

plt.xlabel("Wind Speed [m/s]", fontsize=12, fontweight='bold')
plt.ylabel("Altitude [m]", fontsize=12, fontweight='bold')
plt.title("Wind Speed Profile (NaN > 15 or < 0.1) - Flight 51", fontsize=14, fontweight='bold')
plt.grid(True, linestyle="--", alpha=0.5)
plt.show()

In [None]:
flight51

In [None]:
file_path = DATA_LEVEL2_DIRPATH / f"level2_{metadata.flight_date}_B.csv"

# Save cleaned flight51
flight51.to_csv(file_path, index=False)

print(f"Flight 51 saved to {file_path}")