# 1 - Generate ws_seed_locations

Create seeds locations: cluster pois table and store them in a test seed_locations table 100m

In [6]:
import geopandas as gpd
import pandas as pd
import numpy as np
import os
import re
import cartoframes as cf
import sys

sys.path.insert(0,'../../') 

from utils.carto_helpers import get_creds

from enrichment.cf_model import CartoFramesModel, GeometryType
from constants.global_constants import meters_in_mile
from cartoframes import to_carto, create_table_from_query

carto = CartoFramesModel()

FileNotFoundError: [Errno 2] No such file or directory: '/Users/44371/Documents/Cases/V7FC/aag-vantage/src/notebooks/enrichment/enrichment_dbs.json'

## Method to create a buffer.

We are using PostGIS spatial operation ST_ClusterDBSCAN. An input geometry will be added to a cluster if it is either:
1. A "core" geometry, that is within eps distance of at least minpoints input geometries (including itself)
2. A "border" geometry, that is within eps distance of a core geometry.

We are using a distance of 1000 meters, and that's the reason we are using the_geom_webmercator, with srid 3875 and not in 4326.

As ST_ClusterDBSCAN returns a cluster number, we group by this cluster number, collect the geoms and calculate the centroid in 4326, returning a geodataframe.

Also, we are generating a cluster only from territory_id = '501' and limited to 50 results. As territory_id is a dma_cod, that means we are getting a New York cluster


In [2]:
def create_buffer(distance_in_meters=1000):
    sql = f'''
    select seed_id, st_centroid(st_collect(the_geom)) the_geom, array_agg(poi_id) pois from (
        SELECT 
            poi_id, 
            the_geom, 
            ST_ClusterDBSCAN(the_geom_webmercator, eps := {distance_in_meters}, minpoints := 1) over () AS seed_id
        FROM vtg_pois where territory_id = '501') geoms
    group by seed_id limit 50'''
    
    return carto.get_geopandas_from_query(sql)

In [3]:
def bain_method():
    cluster_df = create_buffer(1000)
    print(cluster_df)

    # Upload to carto
    to_carto(cluster_df, "vtg_test_ws_seed_locations", if_exists='replace')


In [8]:
bain_method()

    seed_id                    the_geom  \
0         0  POINT (-74.16614 40.80663)   
1         1  POINT (-74.22871 40.84860)   
2         2  POINT (-74.02816 40.92742)   
3         3  POINT (-74.17342 40.92073)   
4         4  POINT (-74.25988 40.81089)   
5         5  POINT (-74.07288 40.88122)   
6         6  POINT (-74.40949 40.75520)   
7         7  POINT (-74.74560 41.06476)   
8         8  POINT (-74.03164 41.03626)   
9         9  POINT (-74.05234 40.83559)   
10       10  POINT (-74.11979 41.01011)   
11       11  POINT (-74.48017 40.79848)   
12       12  POINT (-74.48109 40.89194)   
13       13  POINT (-74.03746 40.85550)   
14       14  POINT (-74.13906 40.98395)   
15       15  POINT (-74.22559 40.76390)   
16       16  POINT (-74.26910 40.93091)   
17       17  POINT (-74.63621 41.03501)   
18       18  POINT (-74.26166 40.72165)   
19       19  POINT (-73.95855 40.97061)   
20       20  POINT (-74.57990 41.11396)   
21       21  POINT (-74.10512 40.89651)   
22       22

[2021-02-05T12:48:28Z] (264519) {carto.py:130} INFO - Success! Data uploaded to table "vtg_test_ws_seed_locations" correctly
