# GPS Coordinate Conversion and Cleaning

In [13]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import sys
from keplergl import KeplerGl


### Coordinate conversion from DMS to DD

In [14]:
# Function for converting coordinate arrays from DMS to DD

def dms2dd(lat, dir_1, lon, dir_2):
    """
    Convert coordinate arrays from Degree-Minute-Second (DMS) format to Decimal Degree (DD) format.

    Parameters:
    lat (numpy.ndarray): Array of latitude values in DMS format.
    dir_1 (numpy.ndarray): Array of latitude direction values ('N' or 'S').
    lon (numpy.ndarray): Array of longitude values in DMS format.
    dir_2 (numpy.ndarray): Array of longitude direction values ('E' or 'W').

    Returns:
    tuple: A tuple containing the converted latitude and longitude arrays in DD format.

    Example:
    lat = np.array(['40', '41', '42'])
    dir_1 = np.array(['N', 'N', 'N'])
    lon = np.array(['73', '74', '75'])
    dir_2 = np.array(['W', 'W', 'W'])
    dms2dd(lat, dir_1, lon, dir_2)  # Output: (array([40.0, 41.0, 42.0]), array([-73.0, -74.0, -75.0]))
    """
    # Convert arrays to strings
    lat = lat.astype(str)
    lon = lon.astype(str)
    for i in range(len(lat)):

        if lon[i][2] == '.':  # 2 digit longitude
            # Degrees
            degrees_lat = lat[i][0:2]
            degrees_lon = 0
            # Minutes
            minutes_lat = lat[i][2:]
            minutes_lon = lon[i][0:]

        elif lon[i][3] == '.':  # 3 digit longitude
            # Degrees
            degrees_lat = lat[i][0:2]
            degrees_lon = lon[i][0:1]
            # Minutes
            minutes_lat = lat[i][2:]
            minutes_lon = lon[i][1:]

        elif lon[i][4] == '.':  # 4 digit longitude
            # Degrees
            degrees_lat = lat[i][0:2]
            degrees_lon = lon[i][0:2]
            # Minutes
            minutes_lat = lat[i][2:]
            minutes_lon = lon[i][2:]

        dd_lat_value = float(degrees_lat) + float(minutes_lat) / 60
        dd_lon_value = float(degrees_lon) + float(minutes_lon) / 60

        # Check direction and apply sign
        if dir_1[i] == ' S':
            dd_lat_value = dd_lat_value * -1
        if dir_2[i] == ' W':
            dd_lon_value = dd_lon_value * -1

        # Replace values in sample_data
        lat[i] = dd_lat_value
        lon[i] = dd_lon_value

    return lat, lon


# Clean unreliable GPS coordinates

In [15]:
def get_low_satellites(sats_arr):
    """
    Returns the indexes of satellites with signal strength less than 3.

    Parameters:
    sats_arr (numpy.ndarray): An array containing the signal strengths of satellites.

    Returns:
    numpy.ndarray: An array containing the indexes of satellites with signal strength less than 3.
    """
    # Find the indexes of satellites with signal strength less than 3
    indexes = np.where(sats_arr < 3)[0]
    
    return indexes

# Main Processing Script

In [18]:
def clean_gps_data(file_name):
    # load file into pandas dataframe called sample_data
    gps_data = pd.read_csv('sample_data/' + file_name + '.txt')

    #Add keys to the data
    gps_data.columns = ['datetime','millis','lat','dir1','lon','dir2','altitude','speed','angle','satellites']

    # Convert coordinates to decimal degrees
    dd_lat, dd_lon = dms2dd(gps_data['lat'], gps_data['dir1'], gps_data['lon'], gps_data['dir2'])

    # Replace values in sample_data
    gps_data['lat'] = dd_lat
    gps_data['lon'] = dd_lon

    # Delete rows with low number of satellites (< 3)
    sat_delete_indexes = get_low_satellites(gps_data['satellites'])
    gps_data = gps_data.drop(sat_delete_indexes)

    # Save sample_data to new csv file
    gps_data.to_csv('sample_data/processed_data/' + file_name + '_dd.csv')
    print('CSV file saved')

# Clean File

In [19]:
#Input file name
clean_gps_data('738')

CSV file saved


# Kepler Visualization

In [None]:
#not working still...
#map_1 = KeplerGl(height=500)
#map_1



User Guide: https://docs.kepler.gl/docs/keplergl-jupyter




KeplerGl(height=500)