# This notebook contains the code required to process the files in the raw-data folder into the dataframes used in the analyses in the paper.

In [None]:
# load packages
import os
import pandas as pd
import numpy as np
import glob
import seaborn as sns
import matplotlib.pyplot as plt
from data_processing_functions import convert_keypoints, extract_keypoints, compare_keypoints, get_offset

In [None]:
# working directory
os.getcwd()

## 1. Align drone logs with video keypoints
Here we use keypoints that occur within observation flights to align the drone logs and the video footage. The keypoints have been manually identified in the flight footage and the timestamps of the keypoints are stored in video_keypoints.csv.
This code first converts the video_keypoint timestamps from mm:ss to milliseconds. 
Then it identifies the corresponding keypoints within the drone logs.
Then it generates a graphical means of inspecting the correlation of the time gaps between each pair of video keypoints and the corresponding log keypoints. Points falling far from a 1:1 line are indicative of an erroneously identified video keypoint or a corrupted drone log. We generate a figure to illustrate this process.
Next it calculates the offset between the timestamps of the keypoints from the video and the timestamps in the drone logs. This offset value is used to identify data ranges within the drone logs that correspond to time periods within the flight videos.

It then saves a .csv file with the video and log keypoints in .csv, the offset value, and the data regarding
the selection and quality of the offset value for each flight.

In [None]:
# Define relevant directories
# location where clean drone logs are stored
log_directory = 'clean-data/drone-logs/'

# where to store generated files
processed_data = 'processed-data/'

# Import and clean up necessary data files
vid_kp = pd.read_csv('clean-data/video_keypoints.csv')
vid_kp = vid_kp.replace('TRUE', True)
vid_kp = vid_kp.replace('FALSE', False)
vid_kp = vid_kp.replace('NAN', np.nan)

In [None]:
# Convert video keypoint timestamps to milliseconds
vid_kp_ms = convert_keypoints(vid_kp)

In [None]:
# Identify keypoints in drone logs and extract time value in milliseconds
all_kp_ms = extract_keypoints(log_directory, vid_kp_ms)

### Visual inspection of correlation of time gaps between keypoints

In order to clean my data and verify that I had correctly identified the four keypoint when watching the videos,
I compared the time intervals between each keypoint between the log and video data. If, e.g. the interval between the launch and gimbal_down keypoints was roughly equivalent (within ~2 seconds) between the log and video sources, this indicates that I had correctly identified the keypoints and simply needed to calculate an "offset constant" in order to align the drone log and video timestamps. In some cases, I had misidentified or mistyped the timestamp for a key point. In such cases, most intervals agreed between the video and logs, but intervals starting or ending with an erroneous keypoint would be off by tens of seconds. I inspected each flight and reviewed videos as necessary to correct misidentified or mistyped keypoints.

**Step 1:** Create a new dataframe containing the intervals between each keypoint from both the video and the drone log.

In [None]:
# get list of flights
flight_list = all_kp_ms['flight'].unique()
# calculate intervals between video keypoints and store in a dataframe
keypoint_diffs = pd.DataFrame(np.nan, index = range(0, len(flight_list)), 
                              columns = ['flight', 'source', 'launch-gimbal', 'launch-home', 'launch-land', 
                               'gimbal-home', 'gimbal-land', 'home-land'])
keypoint_diffs['flight'] = flight_list
keypoint_diffs['source'] = 'video'
for i in flight_list:
    data = all_kp_ms[all_kp_ms['flight'] == i].iloc[0]
    keypoint_diffs.loc[keypoint_diffs['flight'] == i, 'launch-gimbal'] = data['gimbaldown_vid'] - data['launchtime_vid']
    keypoint_diffs.loc[keypoint_diffs['flight'] == i, 'launch-home'] = data['gohome_vid'] - data['launchtime_vid']
    keypoint_diffs.loc[keypoint_diffs['flight'] == i, 'launch-land'] = data['landed_vid'] - data['launchtime_vid']
    keypoint_diffs.loc[keypoint_diffs['flight'] == i, 'gimbal-home'] = data['gohome_vid'] - data['gimbaldown_vid']
    keypoint_diffs.loc[keypoint_diffs['flight'] == i, 'gimbal-land'] = data['landed_vid'] - data['gimbaldown_vid']
    keypoint_diffs.loc[keypoint_diffs['flight'] == i, 'home-land'] = data['landed_vid'] - data['gohome_vid']

keypoint_diffs2 = pd.DataFrame(np.nan, index = range(0, len(flight_list)), 
                              columns = ['flight', 'source', 'launch-gimbal', 'launch-home', 'launch-land', 
                                         'gimbal-home', 'gimbal-land', 'home-land'])
# calculate intervals between drone log keypoints and store them in a dataframe
keypoint_diffs2['flight'] = flight_list
keypoint_diffs2['source'] = 'log'
for i in flight_list:
    data = all_kp_ms[all_kp_ms['flight'] == i].iloc[0]
    keypoint_diffs2.loc[keypoint_diffs2['flight'] == i, 'launch-gimbal'] = data['gimbaldown_log'] - data['launchtime_log']
    keypoint_diffs2.loc[keypoint_diffs2['flight'] == i, 'launch-home'] = data['gohome_log'] - data['launchtime_log']
    keypoint_diffs2.loc[keypoint_diffs2['flight'] == i, 'launch-land'] = data['landed_log'] - data['launchtime_log']
    keypoint_diffs2.loc[keypoint_diffs2['flight'] == i, 'gimbal-home'] = data['gohome_log'] - data['gimbaldown_log']
    keypoint_diffs2.loc[keypoint_diffs2['flight'] == i, 'gimbal-land'] = data['landed_log'] - data['gimbaldown_log']
    keypoint_diffs2.loc[keypoint_diffs2['flight'] == i, 'home-land'] = data['landed_log'] - data['gohome_log'] 

# append the two dataframes & reshape
keypoint_differences = keypoint_diffs.append(keypoint_diffs2, ignore_index=True)
test_data = pd.melt(keypoint_differences, id_vars=['flight', 'source'],
                    var_name="keypoints", value_name="difference(milliseconds)")
plot_data = pd.pivot_table(test_data, values = 'difference(milliseconds)', index=['flight', 'keypoints'], 
                          columns = 'source')

**Step 2:** Visual comparison of keypoint gaps for each flight

Blue dots should fall approximately along the red line. Dots falling far away from the line, and large values in the keypoints table suggest that a keypoint has been misidentified in the video footage or the drone log is corrupted

In [None]:
compare_keypoints('025-02', plot_data)

In [None]:
# Generate figure for supplement explaining this visual method of drone log alignment

# create a misalignment
test_data.at[25, 'difference(milliseconds)'] = 46800
test_data.at[1375, 'difference(milliseconds)'] = 1093000
test_data.at[1825, 'difference(milliseconds)'] = 1218000

# create dataframe for plot
plot_data = pd.pivot_table(test_data, values = 'difference(milliseconds)', index=['flight', 'keypoints'], 
                          columns = 'source')

# create and save figure
sns.set_context('talk')
flight = '025-02'
df = plot_data.loc[flight]
df = df.reset_index()
plt.figure(figsize = (12,12))
fig = sns.lineplot([0,1300000], [0,1300000], color = 'red', alpha = 0.6)
fig = sns.scatterplot(df['log'], df['video'])
sns.despine()
plt.yticks(fig.get_yticks(), fig.get_yticks() /1000)
plt.xticks(fig.get_xticks(), fig.get_xticks() /1000)
plt.xlabel('Interval from flight log')
plt.ylabel('Interval from video footage')
plt.xlim(0,1300000)
plt.ylim(0,1300000)
for line in range(0, len(df)):
    fig.text(df.log[line] +30000, df.video[line]-12000, df.keypoints[line], horizontalalignment='left', size = 'medium', color = 'black', weight = 'normal')
plt.tight_layout()
    
print(abs(plot_data.loc[flight]['log'] - plot_data.loc[flight]['video'])/1000)
newfig = fig.get_figure()
#newfig.savefig('figures/alignment_error.png')

In [None]:
# Calculate offset constant for each flight and save alignment data for further analyses

# Now that all keypoints are clean, choose the most parsimonious offset to align drone log and video timestamps 
# for each flight. This value can be used to align times by SUBTRACTING it from the video time or ADDING it to 
# the log time.

alignment_df = get_offset(all_kp_ms)
if not os.path.exists(processed_data):
    os.makedirs(processed_data)
alignment_df.to_csv(processed_data + 'video_log_alignment.csv', index = False)