In [258]:
import pandas as pd
import csv
import numpy as np
import os

In [259]:
# Change the values that need to be changed manually here
station_coord_rects = {
    'Xavier': { 
        'Latitude': { 
            'lower': 14.640004,
            'higher': 14.640371
        },
        'Longitude': { 
            'lower': 121.078251,
            'higher': 121.078789
        }
    },
    'Old Comm': { 
        'Latitude': { 
            'lower': 14.636606,
            'higher': 14.636749
        },
        'Longitude': { 
            'lower': 121.078116,
            'higher': 121.078324
        }
    },
    'LST': { 
        'Latitude': { 
            'lower': 14.636115,
            'higher': 14.636302
        },
        'Longitude': { 
            'lower': 121.080644,
            'higher': 121.080738
        }
    },
    'Grade School': { 
        'Latitude': { 
            'lower': 14.634715,
            'higher': 14.634715
        },
        'Longitude': { 
            'lower': 121.076171,
            'higher': 121.076326
        }
    },
    'JSEC': { 
        'Latitude': { 
            'lower': 14.637686,
            'higher': 14.637804
        },
        'Longitude': { 
            'lower': 121.076260,
            'higher': 121.076370
        }
    },
    'Gate 2.5': { 
        'Latitude': { 
            'lower': 14.637874,
            'higher': 14.637927
        },
        'Longitude': { 
            'lower': 121.074848,
            'higher': 121.075020
        }
    },
    'Leong': { 
        'Latitude': { 
            'lower': 14.640724,
            'higher': 14.640864
        },
        'Longitude': { 
            'lower': 121.076177,
            'higher': 121.076395
        }
    },
    
}

In [260]:
next_station_dict = { 
    'Xavier': 'Old Comm',
    'Old Comm': 'LST',
    'LST': 'Grade School',
    'Grade School': 'Gate 2.5',
    'Gate 2.5': 'Leong',
    'Leong': 'Xavier'
}

# Raw Data
* Combine csv files from each rpi into one data frame
* Pre-process raw data

In [261]:
# Create a list to hold the dataframes
df_list = []

for i in range(3):
    i = i + 1
    folder_path = os.path.join('../E-Jeep Data', f"rpi{i}")
    all_files = os.listdir(folder_path)
    
    # Filter out non-CSV files
    csv_files = [f for f in all_files if f.endswith('.csv')]
    
    for csv in csv_files:
        file_path = os.path.join(folder_path, csv)
        file_path
        try:
            # Try reading the file using default UTF-8 encoding
            df = pd.read_csv(file_path)
            df.tail(10)

            df.columns = ['Type', 'Time', 'Lat', 'Long', 'Altitude', 'Humidity', 'Temperature']
            df.drop(columns='Type', inplace=True)

            # Drop rows with null at start and end
            df = df.loc[df.notnull().all(axis=1).argmax():]

            ids = df[df.notnull().all(axis=1)].index
            last_id = ids[-1]

            df = df.loc[:last_id]
            
            # Fix time to PHT
            log_date = os.path.basename(file_path).split('/')[-1]
            df['Time'] = pd.to_datetime(df['Time'])
            df['Time'] = df['Time'].dt.tz_convert('+08:00')
            df['Time'] = pd.to_datetime(df['Time'].dt.strftime(f"{log_date[5:15]} %H:%M:%S"))

            # Add day of week
            df['Day_of_Week'] = df['Time'].dt.day_name()

            # Iterpolate values for NaN GPS values
            df['Lat'] = df['Lat'].astype(float).interpolate(method='linear', limit_direction='both') 
            df['Long'] = df['Long'].astype(float).interpolate(method='linear', limit_direction='both') 
            df['Altitude'] = df['Altitude'].astype(float).interpolate(method='linear', limit_direction='both') 
            
            df_list.append(df)
        except UnicodeDecodeError:
            try:
                # If UTF-8 fails, try reading the file using UTF-16 encoding with tab separator
                df = pd.read_csv(file_path, sep='\t', encoding='utf-16')
                df_list.append(df)
            except Exception as e:
                print(f"Could not read file {csv} because of error: {e}")
        except Exception as e:
            print(f"Could not read file {csv} because of error: {e}")

# Concatenate all data into one DataFrame
df = pd.concat(df_list, ignore_index=True)
df

  df = pd.read_csv(file_path)


Could not read file data_2023-11-14_1.csv because of error: Length mismatch: Expected axis has 1 elements, new values have 7 elements
Could not read file data_2023-11-14.csv because of error: No columns to parse from file
Could not read file data_2023-11-11.csv because of error: No columns to parse from file
Could not read file data_2023-11-20_1.csv because of error: index -1 is out of bounds for axis 0 with size 0
Could not read file data_2023-11-20_2.csv because of error: index -1 is out of bounds for axis 0 with size 0


Unnamed: 0,Time,Lat,Long,Altitude,Humidity,Temperature,Day_of_Week
0,2023-11-17 05:38:34,1437.98141,12104.47769,84.8,83.199997,29.100000,Friday
1,2023-11-17 05:38:35,1437.98133,12104.47814,84.6,83.199997,29.100000,Friday
2,2023-11-17 05:38:36,1437.98115,12104.47881,84.0,83.199997,29.100000,Friday
3,2023-11-17 05:38:37,1437.98089,12104.47925,83.4,83.199997,29.100000,Friday
4,2023-11-17 05:38:38,1437.98052,12104.47947,83.1,83.199997,29.100000,Friday
...,...,...,...,...,...,...,...
520749,2023-11-14 00:56:36,1437.98425,12104.48160,107.0,76.000000,29.799999,Tuesday
520750,2023-11-14 00:56:37,1437.98409,12104.48159,106.6,76.000000,29.799999,Tuesday
520751,2023-11-14 00:56:38,1437.98388,12104.48164,106.1,76.000000,29.799999,Tuesday
520752,2023-11-14 00:56:39,1437.98349,12104.48167,105.6,76.000000,29.799999,Tuesday


In [262]:
# Reformat Lat and Long to be decimal coordinates
def ddmm_mmmm_to_decimal(coord):
    # Assuming coord is in the format 'ddmm.mmmm'
    
    # Extract degrees and decimal minutes
    degrees = coord // 100
    decimal_minutes = coord % 100
    
    # Convert decimal minutes to decimal degrees
    decimal_degrees = degrees + decimal_minutes / 60
    
    return decimal_degrees

df['Latitude'] = df.apply(lambda row: ddmm_mmmm_to_decimal(row['Lat']), axis=1)
df['Longitude'] = df.apply(lambda row: ddmm_mmmm_to_decimal(row['Long']), axis=1)

# Drop old Lat / Long values
df.drop(columns=['Lat', 'Long'], inplace=True)
df

Unnamed: 0,Time,Altitude,Humidity,Temperature,Day_of_Week,Latitude,Longitude
0,2023-11-17 05:38:34,84.8,83.199997,29.100000,Friday,14.633024,121.074628
1,2023-11-17 05:38:35,84.6,83.199997,29.100000,Friday,14.633022,121.074636
2,2023-11-17 05:38:36,84.0,83.199997,29.100000,Friday,14.633019,121.074647
3,2023-11-17 05:38:37,83.4,83.199997,29.100000,Friday,14.633015,121.074654
4,2023-11-17 05:38:38,83.1,83.199997,29.100000,Friday,14.633009,121.074658
...,...,...,...,...,...,...,...
520749,2023-11-14 00:56:36,107.0,76.000000,29.799999,Tuesday,14.633071,121.074693
520750,2023-11-14 00:56:37,106.6,76.000000,29.799999,Tuesday,14.633068,121.074693
520751,2023-11-14 00:56:38,106.1,76.000000,29.799999,Tuesday,14.633065,121.074694
520752,2023-11-14 00:56:39,105.6,76.000000,29.799999,Tuesday,14.633058,121.074694


In [263]:
# Label stations along the line that the ejeep is in based on coordinates
def coord_to_station(lat, long) -> str:
    stations = station_coord_rects.keys()

    for station in stations:
        lat_bounds, long_bounds = station_coord_rects[station]['Latitude'], station_coord_rects[station]['Longitude']

        if (lat >= lat_bounds['lower'] - 0.0003 and lat <= lat_bounds['higher'] + 0.0003) and (long >= long_bounds['lower'] - 0.0003 and long <= long_bounds['higher'] + 0.0003):
            return station
    else:
        return "En Route"

df['Station'] = df.apply(lambda row: coord_to_station(row['Latitude'], row['Longitude']), axis=1)
df

# df.to_csv('Out.csv')


In [264]:
import math

# Function to calculate distance using Haversine formula
def haversine_distance(lat_diff, lon_diff):
    # Radius of the Earth in kilometers
    R = 6371.0

    # Convert latitude and longitude differences from degrees to radians
    lat_diff_rad = math.radians(lat_diff)
    lon_diff_rad = math.radians(lon_diff)

    # Haversine formula
    a = math.sin(lat_diff_rad / 2)**2 + math.cos(0) * math.cos(0) * math.sin(lon_diff_rad / 2)**2
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))

    # Calculate distance
    distance = R * c

    return distance * 1000

In [265]:
# Calculate speed (noted as distance since it's by second anyway) based on change in coordinates from the previous
# Helps establish if ejeep is truly within station

df['Lat Diff'] = df['Latitude'].diff()
df['Long Diff'] = df['Longitude'].diff()

df['Distance'] = df.apply(lambda row: haversine_distance(row['Lat Diff'], row['Long Diff']), axis=1)
df['Station'] = df.apply(lambda row: 'En Route' if row['Station'] != 'En Route' and row['Distance'] >= 0.5 else row['Station'], axis=1)

In [266]:
# Add distances (by road) to next station
df['Cum Distance'] = df[::-1].groupby((df['Station'] != 'En Route').cumsum())['Distance'].cumsum()
df.loc[df['Station'] != 'En Route', 'Cum Distance'] = 0

df.dtypes


Time            datetime64[ns]
Altitude               float64
Humidity               float64
Temperature            float64
Day_of_Week             object
Latitude               float64
Longitude              float64
Station                 object
Lat Diff               float64
Long Diff              float64
Distance               float64
Cum Distance           float64
dtype: object

In [267]:
# Add distances (as the bird flies) to next station

def station_centroids() -> dict:
    new_dict = dict()

    new_dict['En Route'] = { 
        'Latitude': None,
        'Longitude': None
    }

    for station in station_coord_rects:
        lat_ave = (station_coord_rects[station]['Latitude']['lower'] + station_coord_rects[station]['Latitude']['higher']) / 2
        long_ave = (station_coord_rects[station]['Longitude']['lower'] + station_coord_rects[station]['Longitude']['higher']) / 2

        new_dict[station] = { 
            'Latitude': lat_ave,
            'Longitude': long_ave
        }
    
    return new_dict

centers = station_centroids()

df['Next Station Lat'] = df.apply(lambda row: centers[row['Station']]['Latitude'], axis=1)
df['Next Station Long'] = df.apply(lambda row: centers[row['Station']]['Longitude'], axis=1)

df['Next Station Lat'] = df['Next Station Lat'].interpolate(method='bfill', limit_direction='backward') 
df['Next Station Long'] = df['Next Station Long'].interpolate(method='bfill', limit_direction='backward') 

df['Abs Distance'] = df.apply(lambda row: haversine_distance(row['Next Station Lat'] - row['Latitude'], row['Next Station Long'] - row['Longitude']), axis=1)

df

Unnamed: 0,Time,Altitude,Humidity,Temperature,Day_of_Week,Latitude,Longitude,Station,Lat Diff,Long Diff,Distance,Cum Distance,Next Station Lat,Next Station Long,Abs Distance
0,2023-11-17 05:38:34,84.8,83.199997,29.100000,Friday,14.633024,121.074628,En Route,,,,,14.634715,121.076248,260.458596
1,2023-11-17 05:38:35,84.6,83.199997,29.100000,Friday,14.633022,121.074636,En Route,-1.333333e-06,7.500000e-06,0.847038,1410.173729,14.634715,121.076248,259.989719
2,2023-11-17 05:38:36,84.0,83.199997,29.100000,Friday,14.633019,121.074647,En Route,-3.000000e-06,1.116667e-05,1.285706,1409.326691,14.634715,121.076248,259.377196
3,2023-11-17 05:38:37,83.4,83.199997,29.100000,Friday,14.633015,121.074654,En Route,-4.333333e-06,7.333333e-06,0.947153,1408.040985,14.634715,121.076248,259.169243
4,2023-11-17 05:38:38,83.1,83.199997,29.100000,Friday,14.633009,121.074658,En Route,-6.166667e-06,3.666667e-06,0.797758,1407.093832,14.634715,121.076248,259.391665
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
520749,2023-11-14 00:56:36,107.0,76.000000,29.799999,Tuesday,14.633071,121.074693,En Route,-3.166667e-06,-1.666667e-07,0.352605,2.161636,,,
520750,2023-11-14 00:56:37,106.6,76.000000,29.799999,Tuesday,14.633068,121.074693,En Route,-2.666667e-06,-1.666667e-07,0.297098,1.809032,,,
520751,2023-11-14 00:56:38,106.1,76.000000,29.799999,Tuesday,14.633065,121.074694,En Route,-3.500000e-06,8.333333e-07,0.400061,1.511933,,,
520752,2023-11-14 00:56:39,105.6,76.000000,29.799999,Tuesday,14.633058,121.074694,En Route,-6.500000e-06,5.000000e-07,0.724902,1.111872,,,


In [268]:
# Do percentage of distance remaining to next station

# Calculate the maximum value of C within each group of 'X'
max_distance_per_group = df[df['Station'] == 'En Route'].groupby((df['Station'] != 'En Route').cumsum())['Cum Distance'].transform('max')

# Add column D representing the percentage of each value in column C relative to the maximum value within its group
df['Percent Distance'] = df['Cum Distance'] / max_distance_per_group * 100

df.to_csv('Out.csv')

df.dtypes


Time                 datetime64[ns]
Altitude                    float64
Humidity                    float64
Temperature                 float64
Day_of_Week                  object
Latitude                    float64
Longitude                   float64
Station                      object
Lat Diff                    float64
Long Diff                   float64
Distance                    float64
Cum Distance                float64
Next Station Lat            float64
Next Station Long           float64
Abs Distance                float64
Percent Distance            float64
dtype: object

In [269]:
# Add distance (by road) to the previous station
df['Prev Cum Distance'] = df.groupby((df['Station'] != 'En Route').cumsum())['Distance'].cumsum()
df.loc[df['Station'] != 'En Route', 'Prev Cum Distance'] = 0

df

Unnamed: 0,Time,Altitude,Humidity,Temperature,Day_of_Week,Latitude,Longitude,Station,Lat Diff,Long Diff,Distance,Cum Distance,Next Station Lat,Next Station Long,Abs Distance,Percent Distance,Prev Cum Distance
0,2023-11-17 05:38:34,84.8,83.199997,29.100000,Friday,14.633024,121.074628,En Route,,,,,14.634715,121.076248,260.458596,,
1,2023-11-17 05:38:35,84.6,83.199997,29.100000,Friday,14.633022,121.074636,En Route,-1.333333e-06,7.500000e-06,0.847038,1410.173729,14.634715,121.076248,259.989719,100.000000,0.847038
2,2023-11-17 05:38:36,84.0,83.199997,29.100000,Friday,14.633019,121.074647,En Route,-3.000000e-06,1.116667e-05,1.285706,1409.326691,14.634715,121.076248,259.377196,99.939934,2.132744
3,2023-11-17 05:38:37,83.4,83.199997,29.100000,Friday,14.633015,121.074654,En Route,-4.333333e-06,7.333333e-06,0.947153,1408.040985,14.634715,121.076248,259.169243,99.848760,3.079897
4,2023-11-17 05:38:38,83.1,83.199997,29.100000,Friday,14.633009,121.074658,En Route,-6.166667e-06,3.666667e-06,0.797758,1407.093832,14.634715,121.076248,259.391665,99.781594,3.877656
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
520749,2023-11-14 00:56:36,107.0,76.000000,29.799999,Tuesday,14.633071,121.074693,En Route,-3.166667e-06,-1.666667e-07,0.352605,2.161636,,,,0.203152,1062.558539
520750,2023-11-14 00:56:37,106.6,76.000000,29.799999,Tuesday,14.633068,121.074693,En Route,-2.666667e-06,-1.666667e-07,0.297098,1.809032,,,,0.170014,1062.855637
520751,2023-11-14 00:56:38,106.1,76.000000,29.799999,Tuesday,14.633065,121.074694,En Route,-3.500000e-06,8.333333e-07,0.400061,1.511933,,,,0.142092,1063.255699
520752,2023-11-14 00:56:39,105.6,76.000000,29.799999,Tuesday,14.633058,121.074694,En Route,-6.500000e-06,5.000000e-07,0.724902,1.111872,,,,0.104494,1063.980601


In [270]:
# Add distance (as the crow flies) to the previous station
df['Prev Station Lat'] = df.apply(lambda row: centers[row['Station']]['Latitude'], axis=1)
df['Prev Station Long'] = df.apply(lambda row: centers[row['Station']]['Longitude'], axis=1)

df['Prev Station Lat'] = df['Prev Station Lat'].interpolate(method='ffill', limit_direction='forward') 
df['Prev Station Long'] = df['Prev Station Long'].interpolate(method='ffill', limit_direction='forward') 

df['Prev Abs Distance'] = df.apply(lambda row: haversine_distance(row['Prev Station Lat'] - row['Latitude'], row['Prev Station Long'] - row['Longitude']), axis=1)

df

Unnamed: 0,Time,Altitude,Humidity,Temperature,Day_of_Week,Latitude,Longitude,Station,Lat Diff,Long Diff,Distance,Cum Distance,Next Station Lat,Next Station Long,Abs Distance,Percent Distance,Prev Cum Distance,Prev Station Lat,Prev Station Long,Prev Abs Distance
0,2023-11-17 05:38:34,84.8,83.199997,29.100000,Friday,14.633024,121.074628,En Route,,,,,14.634715,121.076248,260.458596,,,,,
1,2023-11-17 05:38:35,84.6,83.199997,29.100000,Friday,14.633022,121.074636,En Route,-1.333333e-06,7.500000e-06,0.847038,1410.173729,14.634715,121.076248,259.989719,100.000000,0.847038,,,
2,2023-11-17 05:38:36,84.0,83.199997,29.100000,Friday,14.633019,121.074647,En Route,-3.000000e-06,1.116667e-05,1.285706,1409.326691,14.634715,121.076248,259.377196,99.939934,2.132744,,,
3,2023-11-17 05:38:37,83.4,83.199997,29.100000,Friday,14.633015,121.074654,En Route,-4.333333e-06,7.333333e-06,0.947153,1408.040985,14.634715,121.076248,259.169243,99.848760,3.079897,,,
4,2023-11-17 05:38:38,83.1,83.199997,29.100000,Friday,14.633009,121.074658,En Route,-6.166667e-06,3.666667e-06,0.797758,1407.093832,14.634715,121.076248,259.391665,99.781594,3.877656,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
520749,2023-11-14 00:56:36,107.0,76.000000,29.799999,Tuesday,14.633071,121.074693,En Route,-3.166667e-06,-1.666667e-07,0.352605,2.161636,,,,0.203152,1062.558539,14.640187,121.07852,898.481995
520750,2023-11-14 00:56:37,106.6,76.000000,29.799999,Tuesday,14.633068,121.074693,En Route,-2.666667e-06,-1.666667e-07,0.297098,1.809032,,,,0.170014,1062.855637,14.640187,121.07852,898.751940
520751,2023-11-14 00:56:38,106.1,76.000000,29.799999,Tuesday,14.633065,121.074694,En Route,-3.500000e-06,8.333333e-07,0.400061,1.511933,,,,0.142092,1063.255699,14.640187,121.07852,899.050905
520752,2023-11-14 00:56:39,105.6,76.000000,29.799999,Tuesday,14.633058,121.074694,En Route,-6.500000e-06,5.000000e-07,0.724902,1.111872,,,,0.104494,1063.980601,14.640187,121.07852,899.661406


In [271]:
# Add Previous Station, and Next Station (up to 4 ahead)
df['Prev Station'] = df.apply(lambda row: row['Station'] if row['Station'] != 'En Route' else None, axis=1)
# df['Prev Station'] = df['Prev Station'].interpolate(method='ffill', limit_direction='forward') 

df['Next Station'] = df.apply(lambda row: next_station_dict.get(row['Prev Station'], None), axis=1)
df['2Next Station'] = df.apply(lambda row: next_station_dict.get(row['Next Station'], None), axis=1)
df['3Next Station'] = df.apply(lambda row: next_station_dict.get(row['2Next Station'], None), axis=1)
df['4Next Station'] = df.apply(lambda row: next_station_dict.get(row['3Next Station'], None), axis=1)

df

Unnamed: 0,Time,Altitude,Humidity,Temperature,Day_of_Week,Latitude,Longitude,Station,Lat Diff,Long Diff,...,Percent Distance,Prev Cum Distance,Prev Station Lat,Prev Station Long,Prev Abs Distance,Prev Station,Next Station,2Next Station,3Next Station,4Next Station
0,2023-11-17 05:38:34,84.8,83.199997,29.100000,Friday,14.633024,121.074628,En Route,,,...,,,,,,,,,,
1,2023-11-17 05:38:35,84.6,83.199997,29.100000,Friday,14.633022,121.074636,En Route,-1.333333e-06,7.500000e-06,...,100.000000,0.847038,,,,,,,,
2,2023-11-17 05:38:36,84.0,83.199997,29.100000,Friday,14.633019,121.074647,En Route,-3.000000e-06,1.116667e-05,...,99.939934,2.132744,,,,,,,,
3,2023-11-17 05:38:37,83.4,83.199997,29.100000,Friday,14.633015,121.074654,En Route,-4.333333e-06,7.333333e-06,...,99.848760,3.079897,,,,,,,,
4,2023-11-17 05:38:38,83.1,83.199997,29.100000,Friday,14.633009,121.074658,En Route,-6.166667e-06,3.666667e-06,...,99.781594,3.877656,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
520749,2023-11-14 00:56:36,107.0,76.000000,29.799999,Tuesday,14.633071,121.074693,En Route,-3.166667e-06,-1.666667e-07,...,0.203152,1062.558539,14.640187,121.07852,898.481995,,,,,
520750,2023-11-14 00:56:37,106.6,76.000000,29.799999,Tuesday,14.633068,121.074693,En Route,-2.666667e-06,-1.666667e-07,...,0.170014,1062.855637,14.640187,121.07852,898.751940,,,,,
520751,2023-11-14 00:56:38,106.1,76.000000,29.799999,Tuesday,14.633065,121.074694,En Route,-3.500000e-06,8.333333e-07,...,0.142092,1063.255699,14.640187,121.07852,899.050905,,,,,
520752,2023-11-14 00:56:39,105.6,76.000000,29.799999,Tuesday,14.633058,121.074694,En Route,-6.500000e-06,5.000000e-07,...,0.104494,1063.980601,14.640187,121.07852,899.661406,,,,,


In [274]:
df['Next Arrival'] = None
df['2Next Arrival'] = None
df['3Next Arrival'] = None
df['4Next Arrival'] = None

# Get ground truth (time it arrived at the specified next stations)
for index, row in df.iterrows():
    # We didn't ffill the station names, so we can skip over the None ones
    # Less rows to have to compute for, meaning that we save comp time
    if row['Prev Station'] == None:
        continue

    future_rows = df.iloc[index + 1:]
    next_station_index = future_rows.index.where(future_rows['Station'] == row['Next Station']).dropna().min()
    if not pd.isnull(next_station_index):
        next_station_index = int(next_station_index)
        df.at[index, 'Next Arrival'] = df.at[next_station_index, 'Time']

    next2_station_index = future_rows.index.where(future_rows['Station'] == row['2Next Station']).dropna().min()
    if not pd.isnull(next2_station_index):
        next2_station_index = int(next2_station_index)
        df.at[index, '2Next Arrival'] = df.at[next2_station_index, 'Time']

    next3_station_index = future_rows.index.where(future_rows['Station'] == row['3Next Station']).dropna().min()
    if not pd.isnull(next3_station_index):
        next3_station_index = int(next3_station_index)
        df.at[index, '3Next Arrival'] = df.at[next3_station_index, 'Time']
    
    next4_station_index = future_rows.index.where(future_rows['Station'] == row['4Next Station']).dropna().min()
    if not pd.isnull(next4_station_index):
        next4_station_index = int(next4_station_index)
        df.at[index, '4Next Arrival'] = df.at[next4_station_index, 'Time']

    
    

In [276]:
# Fill up all the NA values

df['Prev Station'] = df['Prev Station'].interpolate(method='ffill', limit_direction='forward')

df['Next Station'] = df['Next Station'].interpolate(method='ffill', limit_direction='forward')
df['2Next Station'] = df['2Next Station'].interpolate(method='ffill', limit_direction='forward')
df['3Next Station'] = df['3Next Station'].interpolate(method='ffill', limit_direction='forward')
df['4Next Station'] = df['4Next Station'].interpolate(method='ffill', limit_direction='forward')

df['Next Arrival'] = df['Next Arrival'].interpolate(method='ffill', limit_direction='forward')
df['2Next Arrival'] = df['2Next Arrival'].interpolate(method='ffill', limit_direction='forward')
df['3Next Arrival'] = df['3Next Arrival'].interpolate(method='ffill', limit_direction='forward')
df['4Next Arrival'] = df['4Next Arrival'].interpolate(method='ffill', limit_direction='forward')

df

Unnamed: 0,Time,Altitude,Humidity,Temperature,Day_of_Week,Latitude,Longitude,Station,Lat Diff,Long Diff,...,Prev Abs Distance,Prev Station,Next Station,2Next Station,3Next Station,4Next Station,Next Arrival,2Next Arrival,3Next Arrival,4Next Arrival
0,2023-11-17 05:38:34,84.8,83.199997,29.100000,Friday,14.633024,121.074628,En Route,,,...,,,,,,,NaT,NaT,NaT,NaT
1,2023-11-17 05:38:35,84.6,83.199997,29.100000,Friday,14.633022,121.074636,En Route,-1.333333e-06,7.500000e-06,...,,,,,,,NaT,NaT,NaT,NaT
2,2023-11-17 05:38:36,84.0,83.199997,29.100000,Friday,14.633019,121.074647,En Route,-3.000000e-06,1.116667e-05,...,,,,,,,NaT,NaT,NaT,NaT
3,2023-11-17 05:38:37,83.4,83.199997,29.100000,Friday,14.633015,121.074654,En Route,-4.333333e-06,7.333333e-06,...,,,,,,,NaT,NaT,NaT,NaT
4,2023-11-17 05:38:38,83.1,83.199997,29.100000,Friday,14.633009,121.074658,En Route,-6.166667e-06,3.666667e-06,...,,,,,,,NaT,NaT,NaT,NaT
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
520749,2023-11-14 00:56:36,107.0,76.000000,29.799999,Tuesday,14.633071,121.074693,En Route,-3.166667e-06,-1.666667e-07,...,898.481995,Xavier,Old Comm,LST,Grade School,Gate 2.5,2023-11-12 09:48:32,2023-11-12 09:48:32,2023-11-12 09:48:32,2023-11-12 09:46:42
520750,2023-11-14 00:56:37,106.6,76.000000,29.799999,Tuesday,14.633068,121.074693,En Route,-2.666667e-06,-1.666667e-07,...,898.751940,Xavier,Old Comm,LST,Grade School,Gate 2.5,2023-11-12 09:48:32,2023-11-12 09:48:32,2023-11-12 09:48:32,2023-11-12 09:46:42
520751,2023-11-14 00:56:38,106.1,76.000000,29.799999,Tuesday,14.633065,121.074694,En Route,-3.500000e-06,8.333333e-07,...,899.050905,Xavier,Old Comm,LST,Grade School,Gate 2.5,2023-11-12 09:48:32,2023-11-12 09:48:32,2023-11-12 09:48:32,2023-11-12 09:46:42
520752,2023-11-14 00:56:39,105.6,76.000000,29.799999,Tuesday,14.633058,121.074694,En Route,-6.500000e-06,5.000000e-07,...,899.661406,Xavier,Old Comm,LST,Grade School,Gate 2.5,2023-11-12 09:48:32,2023-11-12 09:48:32,2023-11-12 09:48:32,2023-11-12 09:46:42
