# Data processing - Level 0

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import datetime
from pathlib import Path

FLIGHT = "6"
FLIGHT_DATE = datetime.date(2025, 2, 12)

DATA_DIRPATH = Path(r"C:\Users\temel\Desktop\EERL\Campaigns\03_ORACLES\Neumayer_2024\Data")
DATA_FLIGHT_DIRPATH = DATA_DIRPATH / "2024-2025_Sorted" / f"{FLIGHT_DATE}_A"

If needed, reorder columns for FC.

In [None]:
import pandas as pd

df1 = pd.read_csv(DATA_FLIGHT_DIRPATH / "HFC_250215_1.csv")  # replace with your actual filename

cols_to_move = [
    "MSsheath_rh", "MSsheath_temp", "MSpressure", "MSlfe_temp", "MSsheath_flow",
    "MSsheath_pwr", "MSimpct_prs", "MShv_volts", "MShv_dac", "MSsd_install",
    "MSext_volts", "MSmsems_errs", "MSmcpc_hrtb", "MSmcpc_smpf", "MSmcpc_satf",
    "MSmcpc_cndt", "MSmcpc_satt", "MSmcpc_sn", "MSmcpc_errs", "MSmcpcpwr",
    "MSmcpcpmp", "MSsd_save", "MSsave_flag"
]

cols_to_move = [col for col in cols_to_move if col in df1.columns]  # Ensure only existing columns are selected
first_col = ['Time'] if 'Time' in df1.columns else []  # Keep 'Time' at the start if present
other_cols = [col for col in df1.columns if col not in first_col + cols_to_move]  # Get all other columns not being moved
new_order = first_col + other_cols + cols_to_move  # Define the new column order
df2 = df1[new_order]  # Reorder the DataFrame

df2.to_csv(DATA_FLIGHT_DIRPATH / "HFC_250215_1_reordered.csv", index=False)
df2.head()

### Load flight data

In [None]:
from helikite import Cleaner
from helikite.instruments import flight_computer_v1, flight_computer_v2, smart_tether, pops

detected_instruments = Cleaner.detect_instruments(DATA_FLIGHT_DIRPATH)
reference_instrument_candidates = [flight_computer_v1, flight_computer_v2, smart_tether, pops]
reference_instrument = Cleaner.choose_reference_instrument(detected_instruments, reference_instrument_candidates)

In [None]:
from helikite import Cleaner, instruments

import datetime

cleaner = Cleaner(
    instruments=detected_instruments,
    reference_instrument=reference_instrument,
    input_folder=DATA_FLIGHT_DIRPATH,
    flight_date=FLIGHT_DATE,
    # time_takeoff=datetime.datetime(2024,4,2,10,0,35),
    # time_landing=datetime.datetime(2024,4,2,13,4,4),
    flight=FLIGHT,
    time_offset=datetime.time(0),
)

cleaner.state()
#print(cleaner.flight_computer.df.head())

In [None]:
cleaner.set_time_as_index()
cleaner.flight_computer.df = cleaner.fill_missing_timestamps(cleaner.flight_computer.df, freq="1s", fill_method="ffill")
#cleaner.pops.df = cleaner.fill_missing_timestamps(cleaner.pops.df, freq="1s", fill_method="ffill")
cleaner.msems_inverted.df['scan_direction'] = cleaner.msems_scan.df['scan_direction']  # To have 0 / 1 values when changing scans --> for removal of duplicates
cleaner.data_corrections()
cleaner.set_pressure_column()

**If needed, manually shift instrument by X seconds**

In [None]:
#cleaner.smart_tether.df = cleaner.smart_tether.df.shift(freq="-2s")       # ST
cleaner.pops.df = cleaner.pops.df.shift(freq="-50s")                       # POPS
#cleaner.msems_readings.df = cleaner.msems_readings.df.shift(freq="-7s")   # mSEMS readings
#cleaner.msems_inverted.df = cleaner.msems_inverted.df.shift(freq="-14s")  # mSEMS inverted
#cleaner.msems_scan.df = cleaner.msems_scan.df.shift(freq="-14s")          # mSEMS scan
#cleaner.mcda.df = cleaner.mcda.df.shift(freq="-1s")                       # mCDA

### Flight time definition
Chose start and end of the flight.  
2 cases for ORACLES :  
- Start and stop at sledge height
- Start and stop at balloon height (addition of 3.5 m later in the processing)

In [None]:
cleaner.define_flight_times()

In [None]:
print(cleaner.flight_computer.df.index.dtype)
#print(cleaner.smart_tether.df.index.dtype)
print(cleaner.pops.df.index.dtype)
print(cleaner.msems_readings.df.index.dtype)
print(cleaner.msems_inverted.df.index.dtype)
print(cleaner.msems_scan.df.index.dtype)
#print(cleaner.stap.df.index.dtype)
#print(cleaner.mcda.df.index.dtype)

### Cross correlation
Time synchronisation based on the pressure measured by the different instruments.

In [None]:
cleaner.correct_time_and_pressure(max_lag=180)

In [None]:
cleaner.plot_pressure()

In [None]:
# MANUAL TIME CORRECTION

cleaner.flight_computer.df = cleaner.flight_computer.df.shift(freq="2s")  # FC
cleaner.smart_tether.df = cleaner.smart_tether.df.shift(freq="-6s")       # ST
#cleaner.pops.df = cleaner.pops.df.shift(freq="64s")                       # POPS
#cleaner.msems_readings.df = cleaner.msems_readings.df.shift(freq="-7s")   # mSEMS readings
#cleaner.msems_inverted.df = cleaner.msems_inverted.df.shift(freq="-14s")  # mSEMS inverted
#cleaner.msems_scan.df = cleaner.msems_scan.df.shift(freq="-14s")          # mSEMS scan
#cleaner.mcda.df = cleaner.mcda.df.shift(freq="-1s")                       # mCDA

In [None]:
print(cleaner.df_corr.head())

### Dataframe creation
Removal of duplicates and merging instruments into one dataframe.

In [None]:
cleaner.remove_duplicates()
cleaner.merge_instruments()
# cleaner.export_data()

### Overview plot of time synchronisation

In [None]:
import matplotlib
import numpy as np
import matplotlib.colors as mcols
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import make_axes_locatable
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
import matplotlib.dates as mdates

%matplotlib ipympl
plt.close('all')
fig, (ax) = plt.subplots(1, 1, figsize=(10, 8))

# Flight Computer (FC) pressure data
ax.plot(cleaner.flight_computer.df_before_timeshift.index, cleaner.flight_computer.df_before_timeshift['pressure'], linestyle='dashed', color='C0', label='FC init')
ax.plot(cleaner.flight_computer.df.index, cleaner.flight_computer.df['pressure'], label='FC', color='C0')

# mSEMS Readings pressure data (before and after correction)
ax.plot(cleaner.msems_readings.df_before_timeshift.index, cleaner.msems_readings.df_before_timeshift['pressure'], linestyle='dashed', color='C3', label='mSEMS init')
ax.plot(cleaner.msems_readings.df.index, cleaner.msems_readings.df['pressure'], color='C3', label='mSEMS corr')

# Smart Tether pressure data (before and after correction)
ax.plot(cleaner.smart_tether.df_before_timeshift.index, cleaner.smart_tether.df_before_timeshift['pressure'], linestyle='dashed', color='C1', label='ST init')
#ax.plot(cleaner.smart_tether.df.index, cleaner.smart_tether.df['pressure'], label='ST corr', color='C1')

# POPS pressure data (before and after correction)
ax.plot(cleaner.pops.df_before_timeshift.index, cleaner.pops.df_before_timeshift['pressure'], linestyle='dashed', color='C2', label='POPS init')
ax.plot(cleaner.pops.df.index, cleaner.pops.df['pressure'], color='C2', label='POPS corr')

# mSEMS Inverted pressure data (before and after correction)
ax.plot(cleaner.msems_inverted.df_before_timeshift.index, cleaner.msems_inverted.df_before_timeshift['pressure'], linestyle='dashed', color='C6', label='mSEMS_inv init')
pressure_filled = cleaner.msems_inverted.df['pressure'].ffill()
ax.plot(cleaner.msems_inverted.df.index, pressure_filled, color='C6', label='mSEMS_inv corr')

# MCDA pressure data (before and after correction)
ax.plot(cleaner.mcda.df_before_timeshift.index, cleaner.mcda.df_before_timeshift['pressure'], linestyle='dashed', color='C4', label='mCDA init')
pressure_filled_mcda = cleaner.mcda.df['pressure'].ffill()
ax.plot(cleaner.mcda.df.index, pressure_filled_mcda, color='C4', label='mCDA corr')

ax.set_xlabel("Time", fontsize=10, labelpad=15, fontweight='bold')
ax.set_ylabel("Pressure (hPa)", fontsize=10, labelpad=15, fontweight='bold')
ax.set_title(f'Flight {cleaner.flight} ({cleaner.flight_date}_B) [Level 0]', fontsize=12, fontweight='bold', pad=15)
ax.grid(ls='--')
ax.legend(ncols=2)

# Show the plot
plt.show()

""" SAVE PLOT """
folder_path = r"C:\Users\temel\Desktop\EERL\Campaigns\01_Turtmann\data\Helikite\Processing\Level0"
filename = f'Level0_{cleaner.flight_date}_B_Flight_{cleaner.flight}_TimeSync.png'
save_path = f'{folder_path}\\{filename}'
print("Saving figure to:", save_path)
fig.savefig(save_path, dpi=300, bbox_inches='tight')

## Level 0

In [None]:
cleaner.master_df = cleaner.shift_msems_columns_by_90s(cleaner.master_df)
#master_df_joined = cleaner.shift_msems_columns_by_90s(master_df_joined)
#cleaner.master_df = master_df_joined
# cleaner.export_data()

In [None]:
cleaner.export_data(filename=DATA_DIRPATH / "Processing" / "Level0" / f"level0_{FLIGHT_DATE}_B")