In [1]:
%reload_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
import numpy as np
import geopandas as geopd
from linear_prog_LEM_optimiser import prepare_data



In [3]:
bids_df = pd.read_csv(f'./data/bids.csv', index_col=0)

# extending a test dataset to contain a range of latitude and longnitude coordinates roughly of manchester
test_df = bids_df.query('`time` == 0').copy()
test_df['lat'] = np.random.random(size=len(test_df)) * 0.1 + 53.4
test_df['lon'] = np.random.random(size=len(test_df)) * - 0.2 - 2.2
test_df['max_distance_m'] = (np.random.random(size=len(test_df)) * 3 + 3) * 1000

In [4]:
def get_epsg(lat,lon):
    """
    Gets the epsg which is a localised area specific coordinate system which allows for precision things

    Parameters
    ----------
    lat : array_like
        the latitude
    lon :
        the longitude

    Returns
    -------
    int
        the espg code for the approach
    """ 
    utm_band = str(int(np.floor((lon + 180) / 6 ) % 60) + 1)
    
    if lat >= 0:
        epsg_code = '326' + utm_band
        return int(epsg_code)
    else:
        epsg_code = '327' + utm_band
        
        return int(epsg_code)

get_epsg(53.445, -2.081)

32630

In [5]:
def get_valid_distances(seller_df: pd.DataFrame, buyer_df: pd.DataFrame) -> np.array(int):
    """Determine which purchases are valid between seller and buyer pairs

    Parameters
    ----------
    seller_df : pandas.DataFrame
        Dataframe containing all the sell bids
    buyer_df : pandas.DataFrame
        Dataframe containing all the buy bids

    Returns
    -------
    numpy.array(int)
        2D array encoding valid purchases with a 1 indexed [buyer, seller]
    """
    valid_buy_distance = np.zeros((len(buyer_df), len(seller_df)), dtype='uint8')
    valid_sell_distance = np.zeros((len(buyer_df), len(seller_df)), dtype='uint8')

    # getting the correct coordinate system
    epsg = get_epsg(seller_df.iloc[0].lat, seller_df.iloc[0].lon)
    seller_geo_df = geopd.GeoDataFrame(seller_df, geometry=geopd.points_from_xy(seller_df.lon, seller_df.lat, crs="WGS84"))
    buyer_geo_df = geopd.GeoDataFrame(buyer_df, geometry=geopd.points_from_xy(buyer_df.lon, buyer_df.lat, crs="WGS84"))

    # converting coordinates to units of m 
    seller_geo_df.to_crs(epsg=epsg, inplace=True)
    buyer_geo_df.to_crs(epsg=epsg, inplace=True)


    for i, buy_bid in enumerate(buyer_geo_df.itertuples(index=False)):
        # finding all valid distances for each buy bid
        valid_dist_index = seller_geo_df[seller_geo_df.distance(buy_bid.geometry) < buy_bid.max_distance_m].index
        valid_buy_distance[i, valid_dist_index] = 1

    for i, sell_bid in enumerate(seller_geo_df.itertuples(index=False)):
        # finding all valid distances for each sell bid
        valid_dist_index = buyer_geo_df[buyer_geo_df.distance(sell_bid.geometry) < sell_bid.max_distance_m].index
        valid_sell_distance[valid_dist_index, i] = 1

    # taking only valid distances for both
    return valid_buy_distance * valid_sell_distance

In [6]:
test_seller_df, test_buyer_df = prepare_data(test_df)
test_valid_distance = get_valid_distances(test_seller_df, test_buyer_df)