In [23]:
import googlemaps
import pandas as pd
import time
import numpy as np
import matplotlib.pyplot as plt
import networkx as nx
import re

In [27]:
import sys
sys.path.append('/Users/biryani/Documents/QMUL/Sem C + Thesis/credentials')
import creds

In [7]:
tfl_df = pd.read_csv('../data/station-with-no-coordinates.csv')
tfl_df.head()

Unnamed: 0,Line,Station Name
0,Bakerloo,Baker Street
1,Bakerloo,Charing Cross
2,Bakerloo,Edgware Road
3,Bakerloo,Elephant & Castle
4,Bakerloo,Embankment


### In order to fetch the coordinates for each station we are using Google Maps API

In [26]:
gmaps = googlemaps.Client(key=creds.API_KEY)

In [9]:
def get_coordinates(station, line):
    try:
        # First, try with station name and line
        result = gmaps.geocode(f"{station} {line} Station, London")
        if result:
            location = result[0]['geometry']['location']
            return location['lat'], location['lng']
        
        # If not found, try with just station name
        result = gmaps.geocode(f"{station} Station, London")
        if result:
            location = result[0]['geometry']['location']
            return location['lat'], location['lng']
        
        # If still not found, return None
        return None, None
    except Exception as e:
        print(f"Error fetching coordinates for {station}: {str(e)}")
        return None, None
    
def add_coordinates_to_df(df):
    # Add new columns for latitude and longitude
    df['Latitude'] = None
    df['Longitude'] = None
    
    for index, row in df.iterrows():
        station = row['Station Name']
        line = row['Line']
        
        # Check if we already have coordinates for this station
        if pd.isnull(df.loc[index, 'Latitude']):
            lat, lng = get_coordinates(station, line)
            
            # Update all rows with this station name
            df.loc[df['Station Name'] == station, 'Latitude'] = lat
            df.loc[df['Station Name'] == station, 'Longitude'] = lng
            
            print(f"Added coordinates for {station}: {lat}, {lng}")
            
            # Sleep to avoid hitting API rate limits
            time.sleep(0.1)
    
    return df

In [10]:
tfl_df = add_coordinates_to_df(tfl_df)
tfl_df.head(10)

Added coordinates for Baker Street: 51.5231548, -0.156863
Added coordinates for Charing Cross: 51.50813729999999, -0.1247624
Added coordinates for Edgware Road: 51.5202914, -0.1701944
Added coordinates for Elephant & Castle: 51.5048541, -0.1136621
Added coordinates for Embankment: 51.5032466, -0.112293
Added coordinates for Harlesden: 51.5032466, -0.112293
Added coordinates for Harrow & Wealdstone: 51.59172359999999, -0.3343587
Added coordinates for Kensal Green: 51.5032466, -0.112293
Added coordinates for Kenton: 51.5032466, -0.112293
Added coordinates for Kilburn Park: 51.53504119999999, -0.1938988
Added coordinates for Lambeth North: 51.4989177, -0.1121086
Added coordinates for Maida Vale: 51.5032466, -0.112293
Added coordinates for Marylebone: 51.5032466, -0.112293
Added coordinates for North Wembley: 51.5625616, -0.3175595
Added coordinates for Oxford Circus: 51.51521169999999, -0.1418553
Added coordinates for Paddington: 51.5032466, -0.112293
Added coordinates for Piccadilly Circ

Unnamed: 0,Line,Station Name,Latitude,Longitude
0,Bakerloo,Baker Street,51.523155,-0.156863
1,Bakerloo,Charing Cross,51.508137,-0.124762
2,Bakerloo,Edgware Road,51.520291,-0.170194
3,Bakerloo,Elephant & Castle,51.504854,-0.113662
4,Bakerloo,Embankment,51.503247,-0.112293
5,Bakerloo,Harlesden,51.503247,-0.112293
6,Bakerloo,Harrow & Wealdstone,51.591724,-0.334359
7,Bakerloo,Kensal Green,51.503247,-0.112293
8,Bakerloo,Kenton,51.503247,-0.112293
9,Bakerloo,Kilburn Park,51.535041,-0.193899


In [12]:
def check_coordinates(df):
    missing_coords = df[df['Latitude'].isnull() | df['Longitude'].isnull()]
    
    if missing_coords.empty:
        print("All stations have coordinates.")
    else:
        print(f"{len(missing_coords)} stations are missing coordinates:")
        for _, row in missing_coords.iterrows():
            print(f"- {row['Station Name']} ({row['Line']})")
        
    return missing_coords

missing = check_coordinates(tfl_df)

All stations have coordinates.


In [13]:
tfl_df.to_csv('../data/tfl_stations_with_coordinates.csv', index=False)