# Data processing - Level 1.5

- Remove data before and after landing – the data processor needs to take an expert judgement (first couple of meters)
- Definition of columns, order them specifically for final output, 1 file per campaign
- Only report instruments that were flown in that campaign

Timestamp (native), altitude, lat, lon, pressure, Temp, RH, wind speed, wind direction
Total conc: POPS, mSEMS, miniCDA, CPC, ‘other’ (e.g., partector, LOAC)
Other total variables: absorption coefficients, eBC concentration, CO2, CO, O3
Size distributions: usable POPS bins; mSEMS bins, mCDA bins, other (partector bins, LOAC bins)
Pollution flag*, Flight number, campaign name

*: if pollution was not targeted in the science and not representative of the local environment (e.g., pollution in ALPACA = not to be flagged as pollution; during ArtofMelt = pollution has to be flagged)

In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib ipympl

## Load level 1 dataset

In [None]:
from helikite.classes.data_processing_level1 import DataProcessorLevel1
from helikite.constants import constants
from helikite.metadata.utils import load_parquet

df_level1 = DataProcessorLevel1.read_data(constants.LEVEL1_DIRPATH / f"level1_{constants.FLIGHT_BASENAME}.csv")

_, metadata = load_parquet(constants.LEVEL0_DIRPATH / f"{constants.LEVEL0_FILE_BASENAME}.parquet")

In [None]:
from helikite.classes.data_processing_level1_5 import DataProcessorLevel1_5
from helikite.classes.base import OutputSchemas

data_processor = DataProcessorLevel1_5(getattr(OutputSchemas, constants.OUTPUT_SCHEMA), df_level1, metadata)
output_dir = constants.OUTPUTS_FOLDER / "Processing" / "Level1.5"

## Fill in msems values at takeoff and landing times

'takeoff_time' and 'landing_time' selected in level 0 and stored in the metadata.

mSEMS scan and inverted data are available every 3 minutes. To avoid losing data points at the beginning and end of the flight (due to cutting the DataFrame at takeoff and landing), the empty timestamps at takeoff and landing are filled with the closest available mSEMS data within a 90-second window.

In [None]:
data_processor.fill_msems_takeoff_landing(time_window_seconds=90)

## Remove data from before takeoff and after landing time

'takeoff_time' and 'landing_time' selected in level 0 and stored in the metadata.

In [None]:
data_processor.remove_before_takeoff_and_after_landing()
data_processor.df.iloc[[0, -1]]

## Columns - rename and select defined columns
**Column list and names for final data file**

datetime,Altitude,Lat,Long,P,TEMP,RH,WindSpeed,WindDir,
POPS_total_N,mSEMS_total_N, mCDA_total_N, CPC_total_N,
Filter_position,Filter_flow,
POPS_b3,POPS_b4,POPS_b5,POPS_b6,POPS_b7,POPS_b8,POPS_b9,POPS_b10,POPS_b11,POPS_b12,POPS_b13,POPS_b14,POPS_b15,
mSEMS_Bin_Conc1,mSEMS_Bin_Conc2,mSEMS_Bin_Conc3,mSEMS_Bin_Conc4,mSEMS_Bin_Conc5,mSEMS_Bin_Conc6,mSEMS_Bin_Conc7,mSEMS_Bin_Conc8,mSEMS_Bin_Conc9,mSEMS_Bin_Conc10,
mSEMS_Bin_Conc11,mSEMS_Bin_Conc12,mSEMS_Bin_Conc13,mSEMS_Bin_Conc14,mSEMS_Bin_Conc15,mSEMS_Bin_Conc16,mSEMS_Bin_Conc17,mSEMS_Bin_Conc18,mSEMS_Bin_Conc19,mSEMS_Bin_Conc20,
mSEMS_Bin_Conc21,mSEMS_Bin_Conc22,mSEMS_Bin_Conc23,mSEMS_Bin_Conc24,mSEMS_Bin_Conc25,mSEMS_Bin_Conc26,mSEMS_Bin_Conc27,mSEMS_Bin_Conc28,mSEMS_Bin_Conc29,mSEMS_Bin_Conc30,
mSEMS_Bin_Conc31,mSEMS_Bin_Conc32,mSEMS_Bin_Conc33,mSEMS_Bin_Conc34,mSEMS_Bin_Conc35,mSEMS_Bin_Conc36,mSEMS_Bin_Conc37,mSEMS_Bin_Conc38,mSEMS_Bin_Conc39,mSEMS_Bin_Conc40,
mSEMS_Bin_Conc41,mSEMS_Bin_Conc42,mSEMS_Bin_Conc43,mSEMS_Bin_Conc44,mSEMS_Bin_Conc45,mSEMS_Bin_Conc46,mSEMS_Bin_Conc47,mSEMS_Bin_Conc48,mSEMS_Bin_Conc49,mSEMS_Bin_Conc50,
mSEMS_Bin_Conc51,mSEMS_Bin_Conc52,mSEMS_Bin_Conc53,mSEMS_Bin_Conc54,mSEMS_Bin_Conc55,mSEMS_Bin_Conc56,mSEMS_Bin_Conc57,mSEMS_Bin_Conc58,mSEMS_Bin_Conc59,mSEMS_Bin_Conc60,
mCDA_dataB1, ... , mCDA_dataB256,
tapir_GL,tapir_Lat,tapir_Le,tapir_Lon,tapir_Lm,tapir_speed,tapir_route,tapir_TP,tapir_Tproc1,tapir_Tproc2,tapir_Tproc3,tapir_Tproc4,tapir_TH,tapir_Thead1,tapir_Thead2,tapir_Thead3,tapir_Thead4,tapir_TB,tapir_Tbox,
flag_pollution,flag_hovering,flag_cloud,flight_nr,campaign

In [None]:
data_processor.filter_columns()
data_processor.rename_columns()
data_processor.round_flightnbr_campaign(decimals=2)

## Flag detection
- Pollution flag
- Cloud (1 = in; 0 = out)
- Hovering flag (0=moving; 1 = hovering, more than 2 min)

In [None]:
from helikite.processing.post.fda import FDAParameters
import numpy as np

### Pollution flag

In [None]:
params_pollution = FDAParameters(
    inverse=False,
    avg_time='1s',
    main_filter='power_law',
    use_neighbor_filter=True,
    use_median_filter=True,
    use_sparse_filter=True,
    pl_a=120,
    pl_m=0,
    iqr_window=None,
    iqr_factor=None,
    lower_thr=-np.inf,
    upper_thr=4000,
    median_window='1min',
    median_factor=2.0,
    sparse_window=30,
    sparse_thr=26
)
flag_name_pollution = "flag_pollution"
column_name_pollution = "CPC_total_N"

flag_pollution_auto_file = output_dir / f"level1.5_{constants.FLIGHT_BASENAME}_{flag_name_pollution}_auto.csv"
flag_pollution_corr_file = output_dir / f"level1.5_{constants.FLIGHT_BASENAME}_{flag_name_pollution}_corr.csv"

In [None]:
data_processor.detect_flag(flag_name_pollution, column_name_pollution, params_pollution, flag_pollution_auto_file, plot_detection=False)

In [None]:
data_processor.choose_flag(flag_name_pollution, column_name_pollution, flag_pollution_auto_file, flag_pollution_corr_file, yscale="log")

In [None]:
data_processor.set_flag(flag_name_pollution, column_name_pollution, flag_pollution_corr_file)

### Hovering flag
(Code from Joanna)

In [None]:
params_hovering = FDAParameters(
    inverse=True,
    avg_time='10s',
    main_filter='power_law',
    use_neighbor_filter=True,
    use_median_filter=True,
    use_sparse_filter=True,
    pl_a=0.8,
    pl_m=0,
    iqr_window=None,
    iqr_factor=None,
    lower_thr=-np.inf,
    upper_thr=-np.inf,
    median_window='1min',
    median_factor=4.0,
    sparse_window=30,
    sparse_thr=28,
)
flag_name_hovering = "flag_hovering"
column_name_hovering = "Altitude"
flag_hovering_auto_file = output_dir / f"level1.5_{constants.FLIGHT_BASENAME}_{flag_name_hovering}_auto.csv"
flag_hovering_corr_file = output_dir / f"level1.5_{constants.FLIGHT_BASENAME}_{flag_name_hovering}_corr.csv"

In [None]:
data_processor.detect_flag(flag_name_hovering, column_name_hovering, params_hovering, flag_hovering_auto_file, plot_detection=False)

In [None]:
data_processor.choose_flag(flag_name_hovering, column_name_hovering, flag_hovering_auto_file, flag_hovering_corr_file)

In [None]:
data_processor.set_flag(flag_name_hovering, column_name_hovering, flag_hovering_corr_file)

### Cloud flag

In [None]:
params_cloud = FDAParameters(
    inverse=False,
    avg_time='1s',
    main_filter='power_law',
    use_neighbor_filter=True,
    use_median_filter=False,
    use_sparse_filter=True,
    pl_a=1.8,
    pl_m=0,
    iqr_window=None,
    iqr_factor=None,
    lower_thr=-np.inf,
    upper_thr=1.2,
    median_window='1min',
    median_factor=np.inf,
    sparse_window=30,
    sparse_thr=26,
)
flag_name_cloud = "flag_cloud"
column_name_cloud = "mCDA_total_N"
flag_cloud_auto_file = output_dir / f"level1.5_{constants.FLIGHT_BASENAME}_{flag_name_cloud}_auto.csv"
flag_cloud_corr_file = output_dir / f"level1.5_{constants.FLIGHT_BASENAME}_{flag_name_cloud}_corr.csv"

In [None]:
data_processor.detect_flag(flag_name_cloud, column_name_cloud, params_cloud, flag_cloud_auto_file, plot_detection=False)

In [None]:
data_processor.choose_flag(flag_name_cloud, column_name_cloud, flag_cloud_auto_file, flag_cloud_corr_file)

In [None]:
data_processor.set_flag(flag_name_cloud, column_name_cloud, flag_cloud_corr_file)

## Level 1.5
**Save file with colums to keep and cut to takeoff and landing.**

In [None]:
save_path = constants.LEVEL1_5_DIRPATH / f'Level1.5_{constants.FLIGHT_BASENAME}_Flight_{constants.flight}.png'
custom_title = f'Flight {constants.flight} ({constants.FLIGHT_BASENAME}) [Level 1.5]'

# change if you want to use custom x-axis limits and ticks
use_custom_xlim = False

if use_custom_xlim:
    # Limits for x-axis (T, RH, mSEMS, CPC, POPS, mCDA, WS, WD)
    custom_xlims = {
        'ax1': (-6, 2),
        'ax2': (60, 100),
        'ax3': (0, 1200),
        'ax4': (0, 1200),
        'ax5': (0, 60),
        'ax6': (0, 60),
        'ax7': (0, 12)
    }

    custom_xticks = {
        'ax1': np.arange(-6, 3, 2),
        'ax2': np.arange(60, 101, 10),
        'ax3': np.arange(0, 1201, 200),
        'ax4': np.arange(0, 1201, 200),
        'ax5': np.arange(0, 61, 10),
        'ax6': np.arange(0, 61, 10),
        'ax7': np.arange(0, 13, 3)
    }
    data_processor.plot_flight_profiles(constants.FLIGHT_BASENAME, save_path, xlims=custom_xlims, xticks=custom_xticks)
else:
    data_processor.plot_flight_profiles(constants.FLIGHT_BASENAME, save_path)

In [None]:
save_path = constants.LEVEL1_5_DIRPATH / f'Level1.5_{constants.FLIGHT_BASENAME}_SizeDistr_Flight_{constants.flight}.png'
data_processor.plot_size_distr(constants.FLIGHT_BASENAME, save_path)

In [None]:
data_processor.export_data(constants.LEVEL1_5_DIRPATH / f"level1.5_{constants.FLIGHT_BASENAME}.csv")

In [None]:
data_processor.export_data(constants.LEVEL1_5_DIRPATH / f"level1.5_{constants.FLIGHT_BASENAME}.csv")