In [79]:
import json

import pandas as pd
import requests


def load_club_data(file):
    """
    Loads club data from a CSV file.

    Arguments:
    - file (str): The file path of the CSV file.
    - rows (int): The number of rows to load from the file.

    Returns:
    - data (pd.DataFrame): The loaded club data as a pandas DataFrame.
    """
    data = pd.read_csv(file)
    return data


data = load_club_data("postcodes_master_2025_26_test_smaller.csv")

In [74]:
def get_distance(point1: dict, point2: dict) -> tuple:
    """
    Gets the driving distance and duration between two points using
    http://project-osrm.org/docs/v5.10.0/api/#nearest-service

    Arguments:
    - point1 (dict): A dictionary representing the latitude and longitude of the first point.
    - point2 (dict): A dictionary representing the latitude and longitude of the second point.

    Returns:
    - tuple: A tuple containing the distance (in meters) and duration (in seconds) of the route.
    """
    url = (
            "http://router.project-osrm.org/route/v1/driving/"
            f"{point1['longitude']},{point1['latitude']};"
            f"{point2['longitude']},{point2['latitude']}"
            "?overview=false&alternatives=false"
        )
    r = requests.get(url)

    # get the distance from the returned values
    route = json.loads(r.content)["routes"][0]
    return (route["distance"], route["duration"])

In [80]:
def create_dist_array():
    """
    Creates an array of distances between all combinations of points *within the same league*.

    Arguments:
    - step (str): The league step to filter the dataset by (e.g. 'Step 3').

    Returns:
    - dist_array (list): A list of tuples containing the origin index, destination index,
    duration (in seconds), and distance (in meters) between each pair of points.
    """
    dist_array = []

    # Filter the dataset by the given step
    step_data = data[data["step"] == step]

    for i, r in step_data.iterrows():
        point1 = {"latitude": r["latitude"], "longitude": r["longitude"]}
        league = r["league"]

        # Only compare with other teams in the same league
        same_league_df = step_data[(step_data.index != i) & (step_data["league"] == league)]

        for j, o in same_league_df.iterrows():
            point2 = {"latitude": o["latitude"], "longitude": o["longitude"]}
            dist, duration = get_distance(point1, point2)
            dist_array.append((i, j, duration, dist))

    return dist_array

dist_array = create_dist_array()

In [81]:
def create_distances_df():
    """
    Creates a DataFrame of distances between all combinations of points.

    Returns:
    - distances_df (pd.DataFrame): The DataFrame containing the distances between each pair of points,
    including origin and destination names, distance in miles, duration in HH:MM:SS format, and a fixture key.
    """

    distances_df = pd.DataFrame(dist_array, columns=["origin", "destination", "duration(s)", "distance(m)"])
    distances_df = distances_df.merge(data[["team","league"]], left_on="origin", right_index=True).rename(
        columns={"team": "origin_name", "league": "origin_league"},
    )
    distances_df = distances_df.merge(data[["team", "league"]], left_on="destination", right_index=True).rename(
        columns={"team": "destination_name", "league": "destination_league"},
    )

    distances_df = distances_df[
    distances_df["origin_league"] == distances_df["destination_league"]].reset_index(drop=True)
    
    distances_df["distance(miles)"] = distances_df["distance(m)"] * 0.000621371
    distances_df["duration(hhmmss)"] = pd.to_datetime(distances_df["duration(s)"], unit="s").dt.strftime("%H:%M:%S")

    distances_df["fixture_key"] = (
        distances_df["destination_name"].str.strip() + "-" + distances_df["origin_name"].str.strip().astype(str)
    )

    return distances_df

In [82]:
journeys_df = create_distances_df()
journeys_df

Unnamed: 0,origin,destination,duration(s),distance(m),origin_name,origin_league,destination_name,destination_league,distance(miles),duration(hhmmss),fixture_key
0,0,1,8783.6,180629.3,AFC Bournemouth,Premier League,Arsenal,Premier League,112.237809,02:26:23,Arsenal-AFC Bournemouth
1,2,1,8354.4,184985.1,Aston Villa,Premier League,Arsenal,Premier League,114.944377,02:19:14,Arsenal-Aston Villa
2,3,1,2309.7,25557.6,Brentford,Premier League,Arsenal,Premier League,15.880751,00:38:29,Arsenal-Brentford
3,4,1,6276.7,92712.0,Brighton & Hove Albion,Premier League,Arsenal,Premier League,57.608548,01:44:36,Arsenal-Brighton & Hove Albion
4,5,1,15785.2,368115.4,Burnley,Premier League,Arsenal,Premier League,228.736234,04:23:05,Arsenal-Burnley
...,...,...,...,...,...,...,...,...,...,...,...
2033,87,69,7596.3,155719.2,Shrewsbury Town,League Two,Accrington Stanley,League Two,96.759395,02:06:36,Accrington Stanley-Shrewsbury Town
2034,88,69,12966.5,309383.3,Swindon Town,League Two,Accrington Stanley,League Two,192.241811,03:36:06,Accrington Stanley-Swindon Town
2035,89,69,4315.4,83345.4,Tranmere Rovers,League Two,Accrington Stanley,League Two,51.788415,01:11:55,Accrington Stanley-Tranmere Rovers
2036,90,69,7216.6,171249.9,Walsall,League Two,Accrington Stanley,League Two,106.409722,02:00:16,Accrington Stanley-Walsall
