In [None]:
# This notebook contains the code to process the raw drone flight logs and wind data to
# generate the clean files to be processed and analyzed.

In [2]:
# load required packages
import os
import pandas as pd
import glob
from data_cleaning_functions import unit_conversion, haversine
# Working directory
os.getcwd()

'/Users/blaircostelloe/Dropbox/Publications/DroneDisturbance/drone-disturbance/supplement'

In [3]:
# Define relevant directories

# location where raw drone logs and wind data are stored
raw_log_directory = 'raw-data/drone-flight-files/' 
raw_wind_directory = 'raw-data/wind-data-csv/'

# location where cleaned logs should be stored
clean_log_directory = 'clean-data/drone-logs/' 
clean_wind_directory = 'clean-data/wind-data/'

In [4]:
# 1: CONVERT RAW DRONE LOGS TO METRIC UNITS, DISCARD UNNECESSARY DATA COLUMNS
unit_conversion(directory = raw_log_directory, new_directory = clean_log_directory)

In [5]:
# 2: CLEAN WIND DATA CSV FILES
# We scraped wind data tables for each flight from Airdata.com, but 
# the scraping was imperfect: there are unnecessary columns and 
# random garbage characters from the HTML code. Here we remove these,
# convert units as necessary and save as cleaned csv files.

# Get list of wind files
wind_files = glob.glob(raw_wind_directory + '*.csv')
if not os.path.exists(clean_wind_directory):
    os.makedirs(clean_wind_directory)

for i in wind_files:
    df = pd.read_csv(i)
    df.dropna(inplace = True)
    flight = os.path.splitext(os.path.splitext(os.path.basename(i))[0])[0]
    
    # Convert flightTime to milliseconds
    df['minutes'] = [int(i.split('m')[0]) for i in df['flightTime']]
    df['temp'] = [i.split(' ')[1] for i in df['flightTime']]
    df['seconds'] = [int(i.split('s')[0]) for i in df['temp']]
    df['milliseconds'] = [((a * 60) + b) * 1000 for a,b in zip(df['minutes'], df['seconds'])]
    
    # Drop first 2 garbage characters in windSpeed
    df['windspeed'] = [float(i[2 : : ]) for i in df['windSpeed']]
    
    # Rename windDirection column for consistency
    df['wind_direction'] = df['windDirection']
    
    # Keep only necessary columns & re-order
    cols_to_keep = ['milliseconds', 'wind_direction', 'windspeed']
    df.drop(df.columns.difference(cols_to_keep), axis = 1, inplace = True)
    df = df[cols_to_keep]
    
    # Save cleaned dataframe in new directory
    df.to_csv(clean_wind_directory + flight + '_wind.csv', index = False)

In [6]:
# 3: CORRECT LOG AND WIND DATA FOR FLIGHT 071-01
# The drone log for flight ob071-01 is anomalous. The video feed malfunctioned during flight so we briefly
# disconnected the iPad from the DJI controller. This interrupted the drone log recording, resulting in a drone
# log split into two parts. When logging resumed, the drone calculated its distance as its distance from its 
# location when the iPad was plugged back in, rather than its distance from the launch point. It also calculated 
# its maximum altitude, ascent, speed, and distance relative to the moment that the iPad was plugged back in. 
# This code corrects the affected columns in the second part of the drone log, appends the second part to the 
# first part, and saves the log as a single file.

# Read in log fragments
log1 = pd.read_csv(clean_log_directory + 'flightlog_ob071-01-1_metric.csv')
log2 = pd.read_csv(clean_log_directory + 'flightlog_ob071-01-2_metric.csv')

# Correct distance column in log2
# calculate distance between each location in log2 and the starting location in log 1
distance_km = [haversine(log1['latitude'][0], log1['longitude'][0], x, y) for x, y in zip(log2['latitude'], log2['longitude'])]
# convert km to m
distance_m_new = [i * 1000 for i in distance_km]
# replace distance_m column with new values
log2 = log2.assign(distance_m=distance_m_new)

# Correct max altitude, ascent, speed and distance columns in log2
# for each column, compare each value to the last value in log 1 and take the higher value
max_altitude_m_new = [max(log1['max_altitude_m'][6391], x) for x in log2['max_altitude_m']]
max_ascent_m_new = [max(log1['max_ascent_m'][6391], x) for x in log2['max_ascent_m']]
max_speed_kph_new = [max(log1['max_speed_kph'][6391], x) for x in log2['max_speed_kph']]
max_distance_m_new = [max(log1['max_distance_m'][6391], x) for x in log2['max_distance_m']]
# replace original columns with new values
log2 = log2.assign(max_altitude_m=max_altitude_m_new)
log2 = log2.assign(max_ascent_m=max_ascent_m_new)
log2 = log2.assign(max_speed_kph=max_speed_kph_new)
log2 = log2.assign(max_distance_m=max_distance_m_new)

# Combine the two log fragments
log1.append(log2, ignore_index = True)

# Save the new drone log and remove the fragment files
log1.to_csv(clean_log_directory + 'flightlog_ob071-01_metric.csv', index = False)
os.remove(clean_log_directory + 'flightlog_ob071-01-1_metric.csv')
os.remove(clean_log_directory + 'flightlog_ob071-01-2_metric.csv')

# Combine wind files for ob071-01
log1 = pd.read_csv(clean_wind_directory + 'ob071-01-1_wind.csv')
log2 = pd.read_csv(clean_wind_directory + 'ob071-01-2_wind.csv')
log1.append(log2, ignore_index = True)
log1.to_csv(clean_wind_directory + 'ob071-01_wind.csv', index = False)
os.remove(clean_wind_directory + 'ob071-01-1_wind.csv')
os.remove(clean_wind_directory + 'ob071-01-2_wind.csv')