In [None]:
import numpy as np
import pandas as pd
import math
from math import sin, cos, sqrt, atan2, radians
import googlemaps
from datetime import datetime
gmaps = googlemaps.Client(key = 'Your_API_key')

In [None]:
# import geocoordinates for participants and airports
df = pd.read_csv('Geocoordinate.csv')
df_airport = pd.read_csv('World60_US60_airport.csv')
print(df.head())
print(df_airport.head())

In [None]:
# Define a function for calculating the great-circle distance based on geocoordinates
def H_dist(origin_lat,origin_lon,dest_lat,dest_lon):
    R = 6378.137
    flattening = 1/298.257223563
    lat1 = atan2((1-flattening)*sin(origin_lat*math.pi/180), cos(origin_lat*math.pi/180))
    lon1 = origin_lon*math.pi/180
    lat2 = atan2((1-flattening)*sin(dest_lat*math.pi/180), cos(dest_lat*math.pi/180))
    lon2 = dest_lon*math.pi/180
    dlon = lon2 - lon1
    dlat = lat2 - lat1
    a = sin(dlat / 2)**2 + cos(lat1) * cos(lat2) * sin(dlon / 2)**2
    c = 2 * atan2(sqrt(a), sqrt(1 - a))
    distance = R * c
    return distance

# Define a function for finding the nearest airports to participants
def nearest_airport(origin_lat, origin_lon):
    airport_dist = []
    for i in range(df_airport.shape[0]):
        airport_dist.append(H_dist(origin_lat, origin_lon, df_airport.iloc[i,3], df_airport.iloc[i,4]))
    min_dist = min(airport_dist)
    min_index = airport_dist.index(min(airport_dist))
    del airport_dist
    return min_dist, min_index

# Define a function to execute the nearest_airport function in batch
def nearest_airport_batch(row):
    origin_lat = row['Source_lat']
    origin_lon = row['Source_long']
    min_dist,min_index = nearest_airport(origin_lat, origin_lon)
    return min_dist, min_index

# Create a list for the nearest airport index and distance
nearest_airport_list = df.apply(nearest_airport_batch, axis=1)    

In [None]:
# combine the nearest airports, their location, and their geocoordinates into df
nearest_airport_df = np.stack([nearest_airport_list[nearest_airport_list.index[i]] for i in range(df_airport.shape[0])], axis=0)
nearest_airport_dist = nearest_airport_df[:,0].flatten()
nearest_airport_index = nearest_airport_df[:,1].flatten()
df = pd.concat([df.reset_index(),df_airport.iloc[nearest_airport_index, 0:].reset_index()],axis=1)
df = df.iloc[:,[1,2,3,4,6,7,8,9,10]]
df = df.rename({'Rank': 'Dest_Airport_Rank', 'Country': 'Dest_Country', 'Airport': 'Dest_Airport', 
                'lat': 'Dest_lat', 'long': 'Dest_long'}, axis='columns')

In [None]:
# Define time for the depature time
dt = datetime.strptime('7 Jul 2021', '%d %b %Y')
newdatetime = dt.replace(hour=9, minute=0)
print(newdatetime)

mode = []
# Computation of travel distances between the participant's locations and their nearest airport
def calculate_dist(row):
    try:
        # For european countries, calculate both rail and car transportation distances
        origins = (row['Source_lat'], row['Source_long'])
        destinations = (row['Dest_lat'], row['Dest_long']) 
        # distance for rail transport in Europe
        if(row['Source_Country'] in ['BE','CH','DE','ES','RER / FR','GB','IT','NL','SE']):
            matrix1 = gmaps.distance_matrix(origins, destinations, mode = 'transit', transit_mode = 'rail', departure_time = newdatetime)
            matrixdf1 = pd.json_normalize(matrix1, ['rows','elements'])
            # distance for car transportation in Europe
            matrix2 = gmaps.distance_matrix(origins, destinations, mode = 'driving')
            matrixdf2 = pd.json_normalize(matrix2, ['rows','elements'])
            # Exception: if rail transport distance > 600 km and car transportation distance < 500 km, then choose driving instead of rail, else if rail transport distance < 600 km, choose rail.
            if((matrixdf1['distance.value'].values[0]>600000) & (matrixdf2['distance.value'].values[0]<=500000)):
                print(matrixdf1['distance.value'].values[0],'use driving instead of rail',matrixdf2['distance.value'].values[0])
                mode.append('drive')
                return matrixdf2['distance.value'].values[0]
            else:
                print(matrixdf1['distance.value'].values[0])
                mode.append('rail')
                return matrixdf1['distance.value'].values[0]
        # For regions outside the Europe, calculate car transportation distances only  
        else:
            matrix = gmaps.distance_matrix(origins, destinations, mode = 'driving')
            matrixdf = pd.json_normalize(matrix, ['rows','elements'])
            print(matrixdf['distance.value'].values[0])
            mode.append('drive')
            return matrixdf['distance.value'].values[0]
    except IndexError:
        print("Index was wrong...")
    except ValueError:
        print("Unexpected value...")
    except:
        print("zero results")
        mode.append('NaN')

In [None]:
dist = df.apply(calculate_dist, axis = 1)
df['Ground_dist'] = dist/1000
df['Mode'] = mode
df.to_csv(r'Participant_nearest_airport.csv')