In [69]:
import pandas as pd
import json
from bs4 import BeautifulSoup
import requests
import numpy as np
import pgeocode

In [46]:
def get_npl_data():
    wiki_html = requests.get(
        "https://en.wikipedia.org/wiki/Northern_Premier_League").text
    soup = BeautifulSoup(wiki_html, 'lxml')
    table_html = soup.find("table",{"class":"wikitable sortable"})
    df = pd.read_html(str(table_html))[0]
    df = df[['Club', 'Home ground']]
    df = df.replace("Warrington Rylands 1906", "Warrington Rylands")
    
    return df

In [6]:
npl_teams_postcodes = {
    'Ashton United': 'OL6 8DY',
    'Atherton Collieries': 'M46 9EY',
    'Bamber Bridge': 'PR5 6UX',
    'Belper Town': 'DE56 1BA',
    'FC United of Manchester': 'M40 0FJ',
    'Gainsborough Trinity': 'DN21 2QW',
    'Guiseley': 'LS20 8BT',
    'Hyde United': 'SK14 5PL',
    'Lancaster City': 'LA1 5PE',
    'Liversedge': 'WF15 8DF',
    'Marine': 'L23 3AS',
    'Marske United': 'TS11 7BW',
    'Matlock Town': 'DE4 3AR',
    'Morpeth Town': 'NE61 2YX',
    'Nantwich Town': 'CW5 5UP',
    'Radcliffe': 'M26 3PE',
    'South Shields': 'NE32 3UP',
    'Stafford Rangers': 'ST16 3UF',
    'Stalybridge Celtic': 'SK15 2RT',
    'Warrington Rylands': 'WA2 7RZ',
    'Warrington Town': 'WA4 2RS',
    'Whitby Town': 'YO21 3HZ',
    }

In [63]:
def convert_postcodes():

    nomi = pgeocode.Nominatim('GB')
    lat = []
    lon = []
    for i in npl_teams_postcodes.values():
        lat.append(nomi.query_postal_code(i).latitude)
        lon.append(nomi.query_postal_code(i).longitude)
    
    lat_lon = pd.DataFrame(np.column_stack([lat, lon]), 
                                   columns=['lat', 'long'])
    
    return lat_lon

In [55]:
def join_postcode_data(df, lat_long):
    
    team_postcode_df = pd.DataFrame(list(npl_teams_postcodes.items()),columns = ['Club','postcode'])
    df = df.merge(team_postcode_df, on='Club', how='left')
    final_df = pd.concat([df, lat_long], axis=1)

    return final_df

In [64]:
df = get_npl_data()
lat_long = convert_postcodes()

In [65]:
final_df = join_postcode_data(df,lat_long)

In [66]:
def get_distance(point1: dict, point2: dict) -> tuple:
    """Gets distance between two points en route using http://project-osrm.org/docs/v5.10.0/api/#nearest-service"""
    
    url = f"""http://router.project-osrm.org/route/v1/driving/{point1["long"]},{point1["lat"]};{point2["long"]},{point2["lat"]}?overview=false&alternatives=false"""
    r = requests.get(url)
    
    # get the distance from the returned values
    route = json.loads(r.content)["routes"][0]
    return (route["distance"], route["duration"])

In [67]:
def create_dist_array():
    
    dist_array = []
    for i , r in final_df.iterrows():
        point1 = {"lat": r["lat"], "long": r["long"]}
        for j, o in final_df[final_df.index != i].iterrows():
            point2 = {"lat": o["lat"], "long": o["long"]}
            dist, duration = get_distance(point1, point2)
            dist_array.append((i, j, duration, dist))
    
    return dist_array

In [70]:
dist_array = create_dist_array()

In [71]:
def create_distances_df():
    
    distances_df = pd.DataFrame(dist_array,columns=["origin","destination","duration(s)","distance(m)"])
    distances_df = distances_df.merge(final_df[["Club"]], left_on = "origin", right_index=True).rename(columns={"Club":"origin_name"})
    distances_df = distances_df.merge(final_df[["Club"]], left_on = "destination", right_index=True).rename(columns={"Club":"destination_name"})
    distances_df['distance(miles)']=distances_df['distance(m)'].apply(lambda x : x*0.000621371)
    distances_df['duration(hhmmss)']=pd.to_datetime(distances_df['duration(s)'], unit='s').dt.strftime("%H:%M:%S")
    
    return distances_df

In [72]:
create_distances_df()

Unnamed: 0,origin,destination,duration(s),distance(m),origin_name,destination_name,distance(miles),duration(hhmmss)
0,0,1,1933.8,36141.0,Ashton United,Atherton Collieries,22.456969,00:32:13
43,2,1,1749.6,32541.6,Bamber Bridge,Atherton Collieries,20.220407,00:29:09
64,3,1,6633.7,109098.5,Belper Town,Atherton Collieries,67.790644,01:50:33
85,4,1,1744.9,31017.4,FC United of Manchester,Atherton Collieries,19.273313,00:29:04
106,5,1,7769.9,165461.1,Gainsborough Trinity,Atherton Collieries,102.812729,02:09:29
...,...,...,...,...,...,...,...,...
357,17,0,4703.0,104238.2,Stafford Rangers,Ashton United,64.770595,01:18:23
378,18,0,349.2,2825.8,Stalybridge Celtic,Ashton United,1.755870,00:05:49
399,19,0,2311.3,47181.8,Warrington Rylands,Ashton United,29.317402,00:38:31
420,20,0,2082.8,45532.3,Warrington Town,Ashton United,28.292451,00:34:42
