#### Initial code for driving distances

In [15]:
import pandas as pd
import requests
import json


# Method for calling the API
def call_matrix_api(origins, destinations):
    url = 'https://maps.googleapis.com/maps/api/distancematrix/json'
    key = 'AIzaSyD-QE-jWmJ6yhNrIjkhusUwxCaP46JPSww'  # INSERT API KEY !!!
    params = {'key': key, 'origins': origins, 'destinations': destinations}

    req = requests.get(url=url, params=params)
    response = json.loads(req.content)
    return response


# Read the location data
df = pd.read_csv('datasets/train_routes.csv')

# Get the list of destinations
destinations = df['city_2'].value_counts().index

results = []
count_calls = 0
for destination in destinations:
    # Leave only rows with one destination
    df_temp = df[df['city_2'] == destination]
    df_temp = df_temp.reset_index()

    origins = ''
    origins_aray = []
    counter = 0

    # Put origins into right format: single string with | as separator
    for location in df_temp['city_1']:
        # Each string has at most 25 origins, in case of more create a list of strings
        if counter > 24:
            origins_aray.append(origins)
            origins = ''
            counter = 0

        origins += location + '|'
        counter += 1

    origins_aray.append(origins)

    distances = []
    times = []

    # For each string of origins, call the API
    for origins_string in origins_aray:
        api_response = call_matrix_api(origins_string, destination)
        count_calls += 1

        # Extract driving distance and time
        if api_response['status'] == 'OK':
            for row in api_response['rows']:
                for element in row['elements']:
                    if element['status'] == 'OK':
                        distances.append(element['distance']['value'])
                        times.append(element['duration']['value'])
                    else:
                        distances.append(-1)
                        times.append(-1)

    # Create result rows and append to the results list
    for i in range(len(distances)):
        results.append([df_temp['city_1'][i], destination, distances[i], times[i]])

# Create a dataframe out of the results list
results_df = pd.DataFrame(results, columns=['city_1', 'city_2', 'driving distance [m]', 'driving time [s]'])

# Export to CSV / Excel
results_df.to_csv('driving-distances.csv')

ModuleNotFoundError: No module named 'openpyxl'

#### Code for trains 

In [22]:
import pandas as pd
import requests
import json
import random
import time

MAX_RETRIES = 3
DELAY_SECONDS = 5

def call_matrix_api_with_retry(origins, destination, mode='transit'):
    for attempt in range(MAX_RETRIES):
        try:
            return call_matrix_api(origins, destination, mode)
        except ConnectionError as e:
            print(f"ConnectionError: {e}. Retrying in {DELAY_SECONDS} seconds...")
            time.sleep(DELAY_SECONDS)
    
    raise Exception("Maximum number of retries reached. Unable to establish a connection.")

# Read the location data
df = pd.read_csv('datasets/airport_cities_cleaned.csv')

# Randomize the order of rows for destinations
df['randomized_city'] = df['cleaned_city'].sample(frac=1).reset_index(drop=True)

# Get the list of destinations
destinations = df['cleaned_city'].value_counts().index

results = []

for destination in destinations:
    # Leave only rows with one destination
    df_temp = df[df['cleaned_city'] == destination]

    for index, row in df_temp.iterrows():
        origin = row['randomized_city']  # Use the randomized city for API request
        
        # Call the API for transit information (train mode)
        api_response = call_matrix_api(origin, destination, mode='transit')

        # Extract transit information
        if api_response['status'] == 'OK':
            for row in api_response['rows']:
                for element in row['elements']:
                    if element['status'] == 'OK':
                        transit_distance = element.get('distance', {}).get('value', -1)
                        transit_time = element.get('duration', {}).get('value', -1)  # in seconds

                        # Convert transit time from seconds to minutes
                        transit_time_minutes = transit_time / 60

                        results.append([origin, destination, transit_distance, transit_time, transit_time_minutes])
                    else:
                        results.append([origin, destination, -1, -1, -1])
        else:
            results.append([origin, destination, -1, -1, -1])

# Create a dataframe out of the results list
results_df = pd.DataFrame(results, columns=['randomized_city', 'cleaned_city', 'transit_distance [m]', 'transit_time [s]', 'transit_time [min]'])

# Export to CSV
results_df.to_csv('datasets/train-distances2.csv', index=False)

#### Rest

In [7]:
# Method for calling the API
def call_matrix_api(origins, destination, transit_mode=None):
    url = 'https://maps.googleapis.com/maps/api/distancematrix/json'
    key = 'AIzaSyD-QE-jWmJ6yhNrIjkhusUwxCaP46JPSww'  # INSERT API KEY !!!
    
    # Add transit_mode to the parameters if provided
    params = {'key': key, 'origins': origins, 'destinations': destination}
    
    if transit_mode:
        params['transit_mode'] = transit_mode

    req = requests.get(url=url, params=params)
    response = json.loads(req.content)
    return response


In [8]:
# Read the location data
df = pd.read_csv('datasets/match_airport_train.csv')
df

Unnamed: 0,city
0,Innsbruck
1,Salzburg
2,Barcelona
3,Timişoara
4,Bristol
...,...
67,Sevilla
68,Girona
69,Belfast
70,Napoli


In [9]:
# Get the list of destinations
destinations = df['city'].value_counts().index

In [10]:
results = []
count_calls = 0
for destination in destinations:
    # Leave only rows with one destination
    df_temp = df[df['city'] == destination]
    df_temp = df_temp.reset_index()

    origins = ''
    origins_array = []
    counter = 0

    # Put origins into the right format: single string with | as separator
    for location in df_temp['origin']:
        # Each string has at most 25 origins, in case of more create a list of strings
        if counter > 24:
            origins_array.append(origins)
            origins = ''
            counter = 0

        origins += location + '|'
        counter += 1

    origins_array.append(origins)

    distances = []
    times = []

    # For each string of origins, call the API with transit mode
    for origins_string in origins_array:
        api_response = call_matrix_api(origins_string, destination, transit_mode='transit')
        count_calls += 1

        # Extract transit distance and time
        if api_response['status'] == 'OK':
            for row in api_response['rows']:
                for element in row['elements']:
                    if element['status'] == 'OK':
                        distances.append(element.get('distance', {}).get('value', -1))
                        times.append(element.get('duration', {}).get('value', -1))
                    else:
                        distances.append(-1)
                        times.append(-1)

    # Create result rows and append to the results list
    for i in range(len(distances)):
        results.append([df_temp['origin'][i], destination, distances[i], times[i]])

KeyError: 'origin'

In [None]:
# Create a dataframe out of the results list
results_df = pd.DataFrame(results, columns=['origin', 'destination', 'transit distance [m]', 'transit time [s]'])

# Export to CSV / Excel
results_df.to_csv('transit-distances.csv')
#results_df.to_excel('transit-distances.xlsx')