In [None]:
import pandas as pd
from pathlib import Path
import numpy as np

In [None]:
input_data_folder_base = Path("../data/preproc_data/")
output_data_folder_base = Path("../results/")
ardupilot_sub_folder = Path("./ardupilot")
measurement_sub_folder = Path("./measurements")

ardupilot_input_data_folder = Path.cwd() / input_data_folder_base / ardupilot_sub_folder 
measurement_input_data_folder = Path.cwd() / input_data_folder_base / measurement_sub_folder 
print(f"Reading raw pandas files from \n    {ardupilot_input_data_folder}")
print(f"Reading raw pandas files from \n    {measurement_input_data_folder}")

output_data_folder = Path.cwd() / output_data_folder_base  
print(f"Writing merged pandas file to \n    {output_data_folder}")

overall_collection_file_name = "overall_collection_2023"
key_parameter_file_name = "key_parameter_2023"

In [None]:
columns = ['CO2(ppm)', 'airtemp in(degreeC)', 'humidity in(rH)', 'pressure in(mbar)',
           'airtemp out(degreeC)', 'humidity out(rH)', 'pressure out(mbar)', 
           'PAR(umol m-2s-1)', 'H2O temp(degreeC)']

df_summary = pd.DataFrame()

for file in list(ardupilot_input_data_folder.rglob("*.p")):
    df_ap=pd.read_pickle(file)
    print(f"{file.name} - duration {df_ap.index.min()} - {df_ap.index.max()}")

    # Add state information and file 
    # Merge data from measurement files
    df_ap['meas_running'] = False
    df_ap['corresponding_meas_file'] = ""
    df_ap[columns]=np.nan

    for file in list(measurement_input_data_folder.rglob("*ADC.p")):
        df_meas=pd.read_pickle(file)
        if (df_meas.index.max() < df_ap.index.max()) & (df_meas.index.min() > df_ap.index.min()):
            print(f"     {file.name} - duration {df_meas.index.min()} - {df_meas.index.max()}")
            df_ap.loc[(df_ap.index < df_meas.index.max()) & (df_ap.index > df_meas.index.min()), "meas_running"] = True
            df_ap.loc[(df_ap.index < df_meas.index.max()) & (df_ap.index > df_meas.index.min()), "corresponding_meas_file"] = file.name

            for index, row in df_meas.iterrows():
                id = df_ap.index.searchsorted(index)
                for entry in columns:
                    df_ap.iloc[id, df_ap.columns.get_loc(entry)] = row[entry]

    df_summary = pd.concat([df_summary, df_ap])

result_file = output_data_folder / Path(overall_collection_file_name+".p") 
print(f"... saving to {  Path(overall_collection_file_name+'.p')}")
df_summary.to_pickle(result_file)


In [None]:
df_key_param = df_summary[df_summary.meas_running].reset_index()\
        .groupby('corresponding_meas_file')\
            .agg(start=('datetime_AMT', 'min'),
                 end=('datetime_AMT', 'max'),
                 experiment_location = ('experiment_location', 'first'),
                 lat_est_mean=('lat_est', 'mean'),
                 lon_est_mean=('lon_est', 'mean'),
                 CO2_min=('CO2(ppm)', 'min'),
                 CO2_max=('CO2(ppm)', 'max'),
                 humidity_in_mean=('humidity in(rH)', 'mean'),
                 humidity_out_mean=('humidity out(rH)', 'mean'),
                 airtemp_in_mean=('airtemp in(degreeC)', 'mean'),
                 PAR_in_mean=('PAR(umol m-2s-1)', 'mean'),
                 airtemp_out_mean=('airtemp out(degreeC)', 'mean'))

result_file = output_data_folder / Path(key_parameter_file_name+".p") 
print(f"    ... saving to \n        {result_file}")
df_key_param.to_pickle(result_file)