In [1]:
from tqdm import tqdm
import geopandas as gpd
import argparse
from cartoframes import read_carto, to_carto
from configparser import ConfigParser
from cartoframes.auth import set_default_credentials, Credentials

In [5]:
# Read in and establish credentials
config = ConfigParser()
config.read('../../credentials.ini')

# Main user credentials
set_default_credentials(base_url='https://carto.tools.bain.com/user/{}'.format(config['on-prem']['user']),
                        username=config['on-prem']['user'],
                        api_key=config['on-prem']['api_key'])

# Data Observatory credentials
do_credentials = Credentials(username=config['cloud']['user'],
                             api_key=config['cloud']['api_key'])

In [6]:
!echo quit | openssl s_client -showcerts -connect carto.tools.bain.com:443 >> $(python -c "import certifi; print(certifi.where())")

depth=2 DC = com, DC = bain, CN = Bain Global Root CA
verify error:num=19:self signed certificate in certificate chain
verify return:1
depth=2 DC = com, DC = bain, CN = Bain Global Root CA
verify return:1
depth=1 DC = com, DC = BAIN, CN = Bain Americas Issuing CA
verify return:1
depth=0 C = US, ST = MA, L = Boston, O = Bain and Company, OU = Global TSG Services, CN = carto.tools.bain.com
verify return:1
DONE


In [7]:
ws_master = read_carto('seed_locations')

In [8]:
ws_master.head()

Unnamed: 0,cartodb_id,the_geom,label,lat,long,region,urbanicity,dma_name,dma_code,popcy_density,auv,ebitda,state,state_abb,cash_on_cash,max_single_impact,max_cumulative_impact,nearest_rural,nearest_sub_urban,nearest_urban
0,9647,POINT (-74.11963 41.00959),12,41.009591,-74.119635,NORTHEAST,Urban,New York NY,501,2765.475857,1986876.038,0.212982,New Jersey,NJ,0.201565,-3.657224,-3.657224,23.241927,5.059709,2.893134
1,9648,POINT (-74.47760 40.79767),13,40.797672,-74.477595,NORTHEAST,Sub-urban,New York NY,501,1477.870702,1684316.841,0.211503,New Jersey,NJ,0.169459,-6.952048,-14.459434,10.142509,1.176289,11.650195
2,9649,POINT (-74.12104 40.85469),14,40.854689,-74.121036,NORTHEAST,Urban,New York NY,501,10554.87033,2537548.638,0.238313,New Jersey,NJ,0.293863,-4.769272,-11.335854,27.452245,4.077246,0.196972
3,9650,POINT (-74.13834 40.98465),20,40.984648,-74.13834,NORTHEAST,Urban,New York NY,501,2334.173892,1900950.646,0.212982,New Jersey,NJ,0.192848,-2.155986,-2.011176,23.491135,6.145319,2.917591
4,9651,POINT (-73.95480 40.96890),26,40.968901,-73.954805,NORTHEAST,Urban,New York NY,501,1438.067684,1829164.764,0.223515,New Jersey,NJ,0.196486,-3.793384,-3.793384,31.759694,6.198946,4.294284


## Whitespace Analysis - Predicted Can

In [11]:
cash_on_cash = .17
buffer_urban = 2
buffer_suburban = 3.5
buffer_rural = 5

In [12]:
seed_table = 'seed_locations'
seed_id = 'label'

In [19]:
df_com_center_filt_geo = read_carto(f'''SELECT
            {seed_id}, auv,
            CASE
                WHEN urbanicity = 'Urban'
                THEN ST_BUFFER(
                    ST_SetSRID(ST_MakePoint(long, lat),4326)::geography, {buffer_urban} * 1609.34
                    )::geometry 
                WHEN urbanicity = 'Sub-urban'
                THEN ST_BUFFER(
                    ST_SetSRID(ST_MakePoint(long, lat),4326)::geography, {buffer_suburban} * 1609.34
                    )::geometry
                WHEN urbanicity = 'Rural'
                THEN ST_BUFFER(
                    ST_SetSRID(ST_MakePoint(long, lat),4326)::geography, {buffer_rural} * 1609.34
                    )::geometry
                END AS the_geom
            FROM {seed_table}
            WHERE nearest_rural >= {buffer_rural}
            AND nearest_sub_urban >= {buffer_suburban}
            AND nearest_urban >= {buffer_urban}''')

In [20]:
df_com_center_filt_geo

Unnamed: 0,label,auv,the_geom
0,12,1986876.038,"POLYGON ((-74.08136 41.00929, -74.08218 41.003..."
1,20,1900950.646,"POLYGON ((-74.10008 40.98436, -74.10089 40.978..."
2,26,1829164.764,"POLYGON ((-73.91656 40.96855, -73.91739 40.962..."
3,183,1571908.027,"POLYGON ((-75.06486 40.32337, -75.06612 40.313..."
4,233,1291431.059,"POLYGON ((-80.47354 41.92277, -80.47552 41.908..."
...,...,...,...
5698,156823,1110683.782,"POLYGON ((-88.01878 30.64668, -88.01981 30.636..."
5699,156825,1465894.661,"POLYGON ((-88.16900 30.76124, -88.17001 30.751..."
5700,156834,1904427.833,"POLYGON ((-122.26319 40.65822, -122.26517 40.6..."
5701,156876,1677462.534,"POLYGON ((-74.25317 40.37729, -74.25455 40.367..."


In [22]:
df_com_center_filt_geo_temp = read_carto(f'''SELECT
            {seed_id}, auv,
            ST_SetSRID(ST_MakePoint(long, lat),4326) AS the_geom
            FROM {seed_table}
            WHERE nearest_rural >= {buffer_rural}
            AND nearest_sub_urban >= {buffer_suburban}
            AND nearest_urban >= {buffer_urban}''')

In [29]:
def filter_by_buffer(df_com_center_filt_geo, df_com_center_filt_geo_temp):
        """
        Filters commercial centers using the buffer around each potential
        commercial center.
        Keeps the potential commercial centers with the highest predicted AUV.
        """

        # Create dataframe of potential commercial centers spatial joined to other commercial centers in radius
        df_cc_geo = gpd.sjoin(df_com_center_filt_geo, df_com_center_filt_geo_temp, how='inner', op='contains')

        # Remove self-join
        df_cc_geo = df_cc_geo[df_cc_geo['label_left'] != df_cc_geo['label_right']]

        # Sort seed locations by predicted AUV
        df_cc_final = df_com_center_filt_geo.sort_values(by='auv', ascending=False)

        # Generate list of commercial center locations that overlap
        # with higher-scored commercial centers
        remove_set = set()
        keep_set = set()

        for row in tqdm(df_cc_final.itertuples(), total=df_cc_final.shape[0], position=0, leave=True):
            # Loop through commercial centers prioritized by AUV

            if row.label not in remove_set:
                # If we have not removed this commercial center, look at all
                # commercial centers that could potentially be removed
                potential_remove = set(df_cc_geo[df_cc_geo['label_left'] == row.label]['label_right'])
                potential_remove.update(set(df_cc_geo[df_cc_geo['label_right'] == row.label]['label_left']))

                if potential_remove - keep_set == potential_remove:
                    # If we have not previously said to keep any of the commercial
                    # centers to potentially remove
                    # add commercial centers in question to the keep set and remove commercial centers
                    # in potential remove
                    keep_set.update(df_cc_geo[df_cc_geo['label_left'] == row.label]['label_left'])
                    keep_set.update(df_cc_geo[df_cc_geo['label_right'] == row.label]['label_right'])
                    remove_set.update(potential_remove)

                if potential_remove - keep_set != potential_remove:
                    # If we have previously said to keep any of the commercial centers in potential remove
                    # add commercial center in question to the remove set
                    remove_set.update(df_cc_geo[df_cc_geo['label_left'] == row.label]['label_left'])

        cc_final = df_cc_final[~df_cc_final['label'].isin(remove_set)].copy()

        return cc_final

In [30]:
cc_final = filter_by_buffer(df_com_center_filt_geo, df_com_center_filt_geo_temp)

100%|██████████| 5703/5703 [00:03<00:00, 1725.09it/s]


In [33]:
cc_final

Unnamed: 0,label,auv,the_geom
2146,58530,2967726.260,"POLYGON ((-77.01590 38.90498, -77.01645 38.899..."
939,26702,2674270.315,"POLYGON ((-73.73938 40.67941, -73.74022 40.673..."
5021,134724,2628038.385,"POLYGON ((-73.89369 40.58752, -73.89451 40.581..."
3241,91280,2596058.353,"POLYGON ((-74.10342 40.94015, -74.10423 40.934..."
2947,80959,2579976.359,"POLYGON ((-77.40239 38.82888, -77.40291 38.823..."
...,...,...,...
163,6257,1002889.002,"POLYGON ((-81.82622 33.77158, -81.82775 33.757..."
4210,109196,1002888.860,"POLYGON ((-85.81263 31.04848, -85.81442 31.034..."
3719,100099,1001861.133,"POLYGON ((-88.15429 34.65277, -88.15578 34.638..."
4045,105313,1001638.056,"POLYGON ((-88.68213 34.93585, -88.68353 34.921..."
