# Transforming Ardupilot-Log files to pandas Dataframes

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
from pathlib import Path
import pandas as pd
from multiprocessing import Process

module_path = str(Path.cwd().parents[0] / "src")
#module_path = str(Path.cwd(__file__).parents[0] / "py")

if module_path not in sys.path:
    sys.path.append(module_path)

from parser.DataflashParser import read_from_log, write_csv, interpolate_data
from data_aggregation.ardupilot_to_pandas import run_aggregation

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [3]:
input_data_folder_base = Path("../data/raw_data")
output_data_folder_base = Path("../data/preproc_data/")
ardupilot_sub_folder = Path("./ardupilot")

# folder structure containing all ardupilot data files
ardupilot_input_data_folder = Path.cwd() / input_data_folder_base / ardupilot_sub_folder 
print(f"Reading raw data files from \n    {ardupilot_input_data_folder}")

ardupilot_output_data_folder = Path.cwd() / output_data_folder_base / ardupilot_sub_folder 
print(f"Writing preprocessed pandas files to \n    {ardupilot_output_data_folder}")


Reading raw data files from 
    /home/sebastian/Nextcloud/Projekte/2021_DAAD_Brasilien/2023/RoBiMo_Trop_DataSet/notebooks/../data/raw_data/ardupilot
Writing preprocessed pandas files to 
    /home/sebastian/Nextcloud/Projekte/2021_DAAD_Brasilien/2023/RoBiMo_Trop_DataSet/notebooks/../data/preproc_data/ardupilot


In [4]:
# Collecting position attributes for assignments to ardupilot-Dataframes

locations = {
    'Balbina': ["00000038.log", "00000039.log", "00000040.log", "00000041.log", "00000042.log"],
    'Jandira': ["00000043.log", "00000044.log", "00000045.log"],
    'Caldeirao': ["00000046.log", "00000047.log", "00000048.log"],
    'Iranduba': ["00000049.log", "00000050.log", "00000051.log", "00000051.log", "00000052.log", "00000053.log", "00000054.log", "00000055.log"]
}

def assign_location(file_name):
    for location in locations:
        if file_name in locations[location]:
            return location
    return "not known"

In [5]:
file_list = list(ardupilot_input_data_folder.rglob("*.log"))
print(f"Found {len(file_list)} raw files.")

procs = []

def control_processing(file, place):
    df=run_aggregation(file)
    df = df.resample("1s").first()
    df['experiment_location'] = place
    df.drop(['index', 'datetime_UTC'], axis=1, inplace=True)
    
    filename = df.index.min().strftime('%d%m%y-%H%M%S-AP')
    result_file = ardupilot_output_data_folder / Path(filename+".p")   
    print(f"    ... saving to { Path(filename+'.p')}")
    df.to_pickle(result_file)

for index, file in enumerate(file_list):
    place = assign_location(file.name)
    print(f"({index:2d}/{len(file_list)-1}) - {file.name} / {place}")
    proc = Process(target=control_processing, args=(file, place))
    procs.append(proc)
    proc.start()

for proc in procs:
    proc.join()

Found 0 raw files.
