In [159]:
import pandas as pd
import csv
import numpy as np

In [160]:
# Change the values that need to be changed manually here
log_date = '2023-11-09'
station_coord_rects = {
    'Xavier': { 
        'Latitude': { 
            'lower': 14.640004,
            'higher': 14.640371
        },
        'Longitude': { 
            'lower': 121.078251,
            'higher': 121.078789
        }
    },
    'Old Comm': { 
        'Latitude': { 
            'lower': 14.636606,
            'higher': 14.636749
        },
        'Longitude': { 
            'lower': 121.078116,
            'higher': 121.078324
        }
    },
    'LST': { 
        'Latitude': { 
            'lower': 14.636115,
            'higher': 14.636302
        },
        'Longitude': { 
            'lower': 121.080644,
            'higher': 121.080738
        }
    },
    'Grade School': { 
        'Latitude': { 
            'lower': 14.634715,
            'higher': 14.634715
        },
        'Longitude': { 
            'lower': 121.076171,
            'higher': 121.076326
        }
    },
    'JSEC': { 
        'Latitude': { 
            'lower': 14.637686,
            'higher': 14.637804
        },
        'Longitude': { 
            'lower': 121.076260,
            'higher': 121.076370
        }
    },
    'Gate 2.5': { 
        'Latitude': { 
            'lower': 14.637874,
            'higher': 14.637927
        },
        'Longitude': { 
            'lower': 121.074848,
            'higher': 121.075020
        }
    },
    'Leong': { 
        'Latitude': { 
            'lower': 14.640724,
            'higher': 14.640864
        },
        'Longitude': { 
            'lower': 121.076177,
            'higher': 121.076395
        }
    },
    
}

In [161]:
# Get raw csv
df = pd.read_csv('data_2023-11-09.csv')
df.tail(10)

df.columns = ['Type', 'Time', 'Lat', 'Long', 'Altitude', 'Humidity', 'Temperature']
df.drop(columns='Type', inplace=True)

# Drop rows with null at start and end
df = df.loc[df.notnull().all(axis=1).argmax():]

ids = df[df.notnull().all(axis=1)].index
last_id = ids[-1]

df = df.loc[:last_id]

# Fix time to PHT
df['Time'] = pd.to_datetime(df['Time'])
df['Time'] = df['Time'].dt.tz_convert('+08:00')
df['Time'] = pd.to_datetime(df['Time'].dt.strftime(f"{log_date} %H:%M:%S"))

# Add day of week
df['Day_of_Week'] = df['Time'].dt.day_name()

# Iterpolate values for NaN GPS values
df['Lat'] = df['Lat'].interpolate(method='linear', limit_direction='both') 
df['Long'] = df['Long'].interpolate(method='linear', limit_direction='both') 
df['Altitude'] = df['Altitude'].interpolate(method='linear', limit_direction='both') 

df

Unnamed: 0,Time,Lat,Long,Altitude,Humidity,Temperature,Day_of_Week
178,2023-11-09 05:55:33,1438.135370,12104.47970,51.50,70.900002,30.799999,Thursday
179,2023-11-09 05:55:34,1438.135170,12104.47929,50.90,70.900002,30.799999,Thursday
180,2023-11-09 05:55:35,1438.133900,12104.47805,50.40,70.900002,30.799999,Thursday
181,2023-11-09 05:55:36,1438.133620,12104.47788,50.00,70.900002,30.799999,Thursday
182,2023-11-09 05:55:37,1438.133290,12104.47742,48.60,70.900002,30.799999,Thursday
...,...,...,...,...,...,...,...
45042,2023-11-09 18:23:14,1438.012388,12104.50543,69.78,68.099998,32.500000,Thursday
45043,2023-11-09 18:23:15,1438.013184,12104.50721,69.74,68.099998,32.500000,Thursday
45044,2023-11-09 18:23:16,1438.013980,12104.50899,69.70,68.099998,32.500000,Thursday
45045,2023-11-09 18:23:17,1438.012000,12104.51368,69.70,68.099998,32.500000,Thursday


In [162]:
# Reformat Lat and Long to be decimal coordinates
def ddmm_mmmm_to_decimal(coord):
    # Assuming coord is in the format 'ddmm.mmmm'
    
    # Extract degrees and decimal minutes
    degrees = coord // 100
    decimal_minutes = coord % 100
    
    # Convert decimal minutes to decimal degrees
    decimal_degrees = degrees + decimal_minutes / 60
    
    return decimal_degrees

df['Latitude'] = df.apply(lambda row: ddmm_mmmm_to_decimal(row['Lat']), axis=1)
df['Longitude'] = df.apply(lambda row: ddmm_mmmm_to_decimal(row['Long']), axis=1)

# Drop old Lat / Long values
df.drop(columns=['Lat', 'Long'], inplace=True)
df

Unnamed: 0,Time,Altitude,Humidity,Temperature,Day_of_Week,Latitude,Longitude
178,2023-11-09 05:55:33,51.50,70.900002,30.799999,Thursday,14.635590,121.074662
179,2023-11-09 05:55:34,50.90,70.900002,30.799999,Thursday,14.635586,121.074655
180,2023-11-09 05:55:35,50.40,70.900002,30.799999,Thursday,14.635565,121.074634
181,2023-11-09 05:55:36,50.00,70.900002,30.799999,Thursday,14.635560,121.074631
182,2023-11-09 05:55:37,48.60,70.900002,30.799999,Thursday,14.635555,121.074624
...,...,...,...,...,...,...,...
45042,2023-11-09 18:23:14,69.78,68.099998,32.500000,Thursday,14.633540,121.075090
45043,2023-11-09 18:23:15,69.74,68.099998,32.500000,Thursday,14.633553,121.075120
45044,2023-11-09 18:23:16,69.70,68.099998,32.500000,Thursday,14.633566,121.075150
45045,2023-11-09 18:23:17,69.70,68.099998,32.500000,Thursday,14.633533,121.075228


In [163]:
# Label stations along the line that the ejeep is in based on coordinates
def coord_to_station(lat, long) -> str:
    stations = station_coord_rects.keys()

    for station in stations:
        lat_bounds, long_bounds = station_coord_rects[station]['Latitude'], station_coord_rects[station]['Longitude']

        if (lat >= lat_bounds['lower'] and lat <= lat_bounds['higher']) and (long >= long_bounds['lower'] and long <= long_bounds['higher']):
            return station
    else:
        return "En Route"

df['Station'] = df.apply(lambda row: coord_to_station(row['Latitude'], row['Longitude']), axis=1)
df


Unnamed: 0,Time,Altitude,Humidity,Temperature,Day_of_Week,Latitude,Longitude,Station
178,2023-11-09 05:55:33,51.50,70.900002,30.799999,Thursday,14.635590,121.074662,En Route
179,2023-11-09 05:55:34,50.90,70.900002,30.799999,Thursday,14.635586,121.074655,En Route
180,2023-11-09 05:55:35,50.40,70.900002,30.799999,Thursday,14.635565,121.074634,En Route
181,2023-11-09 05:55:36,50.00,70.900002,30.799999,Thursday,14.635560,121.074631,En Route
182,2023-11-09 05:55:37,48.60,70.900002,30.799999,Thursday,14.635555,121.074624,En Route
...,...,...,...,...,...,...,...,...
45042,2023-11-09 18:23:14,69.78,68.099998,32.500000,Thursday,14.633540,121.075090,En Route
45043,2023-11-09 18:23:15,69.74,68.099998,32.500000,Thursday,14.633553,121.075120,En Route
45044,2023-11-09 18:23:16,69.70,68.099998,32.500000,Thursday,14.633566,121.075150,En Route
45045,2023-11-09 18:23:17,69.70,68.099998,32.500000,Thursday,14.633533,121.075228,En Route


In [164]:
import math

# Function to calculate distance using Haversine formula
def haversine_distance(lat_diff, lon_diff):
    # Radius of the Earth in kilometers
    R = 6371.0

    # Convert latitude and longitude differences from degrees to radians
    lat_diff_rad = math.radians(lat_diff)
    lon_diff_rad = math.radians(lon_diff)

    # Haversine formula
    a = math.sin(lat_diff_rad / 2)**2 + math.cos(0) * math.cos(0) * math.sin(lon_diff_rad / 2)**2
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))

    # Calculate distance
    distance = R * c

    return distance * 1000

In [165]:
# Calculate speed (noted as distance since it's by second anyway) based on change in coordinates from the previous
# Helps establish if ejeep is truly within station

df['Lat Diff'] = df['Latitude'].diff()
df['Long Diff'] = df['Longitude'].diff()

df['Distance'] = df.apply(lambda row: haversine_distance(row['Lat Diff'], row['Long Diff']), axis=1)
df['Station'] = df.apply(lambda row: 'En Route' if row['Station'] != 'En Route' and row['Distance'] >= 0.5 else row['Station'], axis=1)

In [166]:
# Add distances (by road) to next station
df['Cum Distance'] = df[::-1].groupby((df['Station'] != 'En Route').cumsum())['Distance'].cumsum()
df.loc[df['Station'] != 'En Route', 'Cum Distance'] = 0

df.dtypes


Time            datetime64[ns]
Altitude               float64
Humidity               float64
Temperature            float64
Day_of_Week             object
Latitude               float64
Longitude              float64
Station                 object
Lat Diff               float64
Long Diff              float64
Distance               float64
Cum Distance           float64
dtype: object

In [167]:
# Add distances (as the bird flies) to next station

def station_centroids() -> dict:
    new_dict = dict()

    new_dict['En Route'] = { 
        'Latitude': None,
        'Longitude': None
    }

    for station in station_coord_rects:
        lat_ave = (station_coord_rects[station]['Latitude']['lower'] + station_coord_rects[station]['Latitude']['higher']) / 2
        long_ave = (station_coord_rects[station]['Longitude']['lower'] + station_coord_rects[station]['Longitude']['higher']) / 2

        new_dict[station] = { 
            'Latitude': lat_ave,
            'Longitude': long_ave
        }
    
    return new_dict

centers = station_centroids()

df['Next Station Lat'] = df.apply(lambda row: centers[row['Station']]['Latitude'], axis=1)
df['Next Station Long'] = df.apply(lambda row: centers[row['Station']]['Longitude'], axis=1)

df['Next Station Lat'] = df['Next Station Lat'].interpolate(method='bfill', limit_direction='backward') 
df['Next Station Long'] = df['Next Station Long'].interpolate(method='bfill', limit_direction='backward') 

df['Abs Distance'] = df.apply(lambda row: haversine_distance(row['Next Station Lat'] - row['Latitude'], row['Next Station Long'] - row['Longitude']), axis=1)

df

Unnamed: 0,Time,Altitude,Humidity,Temperature,Day_of_Week,Latitude,Longitude,Station,Lat Diff,Long Diff,Distance,Cum Distance,Next Station Lat,Next Station Long,Abs Distance
178,2023-11-09 05:55:33,51.50,70.900002,30.799999,Thursday,14.635590,121.074662,En Route,,,,,14.640187,121.07852,667.432115
179,2023-11-09 05:55:34,50.90,70.900002,30.799999,Thursday,14.635586,121.074655,En Route,-0.000003,-0.000007,0.845415,2103.025599,14.640187,121.07852,668.204555
180,2023-11-09 05:55:35,50.40,70.900002,30.799999,Thursday,14.635565,121.074634,En Route,-0.000021,-0.000021,3.289451,2102.180185,14.640187,121.07852,671.484858
181,2023-11-09 05:55:36,50.00,70.900002,30.799999,Thursday,14.635560,121.074631,En Route,-0.000005,-0.000003,0.607063,2098.890733,14.640187,121.07852,672.084800
182,2023-11-09 05:55:37,48.60,70.900002,30.799999,Thursday,14.635555,121.074624,En Route,-0.000006,-0.000008,1.049175,2098.283670,14.640187,121.07852,673.101512
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
45042,2023-11-09 18:23:14,69.78,68.099998,32.500000,Thursday,14.633540,121.075090,En Route,0.000013,0.000030,3.613605,24.011596,,,
45043,2023-11-09 18:23:15,69.74,68.099998,32.500000,Thursday,14.633553,121.075120,En Route,0.000013,0.000030,3.613605,20.397991,,,
45044,2023-11-09 18:23:16,69.70,68.099998,32.500000,Thursday,14.633566,121.075150,En Route,0.000013,0.000030,3.613605,16.784386,,,
45045,2023-11-09 18:23:17,69.70,68.099998,32.500000,Thursday,14.633533,121.075228,En Route,-0.000033,0.000078,9.434565,13.170781,,,


In [178]:
# Do percentage of distance remaining to next station

# Calculate the maximum value of C within each group of 'X'
max_distance_per_group = df[df['Station'] == 'En Route'].groupby((df['Station'] != 'En Route').cumsum())['Cum Distance'].transform('max')

# Add column D representing the percentage of each value in column C relative to the maximum value within its group
df['Percent Distance'] = df['Cum Distance'] / max_distance_per_group * 100

df.to_csv('Out.csv')

df.dtypes


Time                 datetime64[ns]
Altitude                    float64
Humidity                    float64
Temperature                 float64
Day_of_Week                  object
Latitude                    float64
Longitude                   float64
Station                      object
Lat Diff                    float64
Long Diff                   float64
Distance                    float64
Cum Distance                float64
Next Station Lat            float64
Next Station Long           float64
Abs Distance                float64
Percent Distance            float64
dtype: object

In [None]:
# Add distance (by road) to the previous station
df['Prev Cum Distance'] = df.groupby((df['Station'] != 'En Route').cumsum())['Distance'].cumsum()
df.loc[df['Station'] != 'En Route', 'Prev Cum Distance'] = 0

df

In [None]:
# Add distance (as the crow flies) to the previous station
df['Prev Station Lat'] = df.apply(lambda row: centers[row['Station']]['Latitude'], axis=1)
df['Prev Station Long'] = df.apply(lambda row: centers[row['Station']]['Longitude'], axis=1)

df['Prev Station Lat'] = df['Prev Station Lat'].interpolate(method='ffill', limit_direction='forward) 
df['Prev Station Long'] = df['Prev Station Long'].interpolate(method='ffill', limit_direction='forward') 

df['Prev Abs Distance'] = df.apply(lambda row: haversine_distance(row['Prev Station Lat'] - row['Latitude'], row['Prev Station Long'] - row['Longitude']), axis=1)