In [1]:
import numpy as np
import pandas as pd
from google.colab import files

In [2]:
# constants
earth_radius = 6371e3

In [3]:
'''helper function to find the point where the craft takes off
   uses the motor current as an indicator - marks take off as the 
   first momement that the current spikes'''
def find_launch_index(df: pd.DataFrame, cutoff = 5000):
    return df.where(df['pow_motorCurrentTotal'].gt(cutoff)).first_valid_index()

In [4]:
'''Helper function to create convert degrees lat/lon to radians
   The numpy trig functions use radians, so it is useful to convert
   all of our gps coordinates to radians'''
def create_gps_rads_columns(df):
    df['rtk_lat_rads'] = np.deg2rad(df['gps_gps2LatR']*10e-8, dtype=np.double)
    df['rtk_lon_rads'] = np.deg2rad(df['gps_gps2LonR']*10e-8, dtype=np.double)

In [5]:
'''Helper funtion that gets a (lat, lon) tuple for a given index in the dataframe'''
def get_lat_lon(df, index):
    return (df['rtk_lat_rads'].iloc[index], df['rtk_lon_rads'].iloc[index]) 

In [6]:
'''Function to calculate the straight line distance between
   the launch point of the craft and its current location.
   Creates a new column in the dataframe (rtk_delta_meters)
   that contains this distance in meters'''
def get_gps_meter_delta(df, tk_lat, tk_lon):
    df['delta_lat_rad'] = df['rtk_lat_rads'] - tk_lat
    df['delta_lon_rad'] = df['rtk_lon_rads'] - tk_lon

    
    df['rtk_a'] = (
        (np.sin(df['delta_lat_rad']/2.0) * np.sin(df['delta_lat_rad']/2.0)) + 
        (np.cos(tk_lat) * np.cos(df['rtk_lat_rads']) *
        (np.sin(df['delta_lon_rad']/2.0) * np.sin(df['delta_lon_rad']/2.0)))
    )
    df['rtk_c'] = (
        2 * np.arctan2(np.sqrt(df['rtk_a']), np.sqrt(1-df['rtk_a']))
    )
    df['rtk_delta_meters'] = (
        earth_radius * df['rtk_c']
    )

In [7]:
'''Function to calculate the bearing of the crafts current location
   relative to the launch location. Creates a new column in the 
   dataframe (rtk_theta) that contains this bearing in radians'''
def get_gps_angle_delta(df, tk_lat, tk_lon):
    df['rtk_theta_y'] = np.sin(df['delta_lon_rad']) * np.cos(df['rtk_lat_rads'])
    df['rtk_theta_x'] = ((np.cos(tk_lat) * np.sin(df['rtk_lat_rads'])) - 
                   (np.sin(tk_lat) * np.cos(df['rtk_lat_rads'] * np.cos(df['delta_lon_rad'])))
                   )
    df['rtk_theta'] = np.arctan2(df['rtk_theta_y'], df['rtk_theta_x'])

In [8]:
'''Functions to calculate the x/y distance of the craft from the launch location
   Assumes the existance of rtk_delta_meters ans rtk_theta, and creates
   two new columns (rtk_x_m and rtk_y_m) that are the x and y distances
   from launch in meters'''
def get_gps_xy_delta(df):
     df['rtk_x_m'] = np.cos(df['rtk_theta']) * df['rtk_delta_meters']
     df['rtk_y_m'] = np.sin(df['rtk_theta']) * df['rtk_delta_meters']

In [9]:
'''Helper function to create a cleaned up dataframe from the original
   bgu dataframe after is has been run through the process of finding
   distance from launch. The new dataframe truncates the data to only
   moments while the craft is flying (as indicated by the motor current)
   and drops all columns not used by the neural network.'''
def make_clean_dataframe(df: pd.DataFrame, motor_cutoff: int, data_collection_id: str):

    truncated_data = df.loc[(df['pow_motorCurrentTotal'] > 5000)]

    clean_data = truncated_data.filter(['alt_tetherPosition',
                            'wnd_windSpeed',
                            'wnd_windBearing',
                            'cft_craftHeading',
                            'cft_craftPitch',
                            'cft_craftRoll',
                            'ats_imuPitch',
                            'ats_imuRoll',
                            'alt_lidarAltitude',
                            'alt_sonarAltitude',
                            'rtk_x_m',
                            'rtk_y_m'], axis=1)
    # Training Features
    clean_data = clean_data.rename(columns={'alt_tetherPosition':'Tether_Length',
                        'wnd_windSpeed':'Wind_Speed',
                        'wnd_windBearing':'Wind_Direction',
                        'cft_craftHeading':'Craft_Direction',
                        'cft_craftPitch':'Craft_Pitch',
                        'cft_craftRoll':'Craft_Roll',
                        'ats_imuPitch':'ATS_Pitch',
                        'ats_imuRoll':'ATS_Roll',
                        'alt_lidarAltitude':'Lidar_Altitude',
                        'alt_sonarAltitude':'Sonar_Altitude',
                        'rtk_x_m':'True_Local_Position_X',
                        'rtk_y_m':'True_Local_Position_Y',
                    })

    # Metadata Label
    clean_data['Dataset'] = [data_collection_id] * len(clean_data)
    return clean_data

In [11]:
'''This function takes in a bgu csv file, and runs it through
   a preprocessor steps to prepare it for input into a neural
   network, then returns a cleaned up dataframe with the
   proccessed data'''
def bgu_to_tf(filename):
        df = pd.read_csv(filename, lines=True)
        df.columns = df.columns.str.replace(' ', '')
        launch_idx = find_launch_index(df)
        create_gps_rads_columns(df)
        tk_lat, tk_lon =  get_lat_lon(df, launch_idx)
        
        get_gps_meter_delta(df, tk_lat, tk_lon)
        get_gps_angle_delta(df, tk_lat, tk_lon)
        get_gps_xy_delta(df)

        return make_clean_dataframe(df, 5000, data_collection_id=filename)

In [12]:
'''This function allows you to upload a collection of bgu csv files to
   google colab, then run the preprocessing function over them, returning
   a single combined dataset at the end'''
def make_dataset_from_upload(logging: bool):
    csv_files = files.upload()
    end_df = pd.DataFrame()
    for indx, file in enumerate(csv_files.keys()):
        if logging:
            print("Starting File {} of {}".format(indx+1, len(csv_files)))
        end_df = pd.concat([end_df, bgu_to_tf(file)], ignore_index=True)

    return end_df

In [13]:
'''This function creates a csv file from a dataframe, and then
   downloads the csv file'''
def serialize_dataframe(df: pd.DataFrame, output_file):
    df.to_csv(output_file, index=False)
    files.download(output_file)

In [None]:
'''Run the collection code, uploading files and downloading to the specified file'''
output_file = 'NN_Dataset.csv'''

print("Starting Dataset Collection")
data_set = make_dataset_from_upload(logging=True)
serialize_dataframe(data_set, output_file)