In [1]:
import os
from pathlib import Path
import sys

import arcpy
from arcgis.features import GeoAccessor
from ba_tools import utils
import pandas as pd

In [2]:
data = Path(os.path.abspath('../data'))
interim_dir = data/'interim'
raw_dir = data/'raw'
raw_gdb = raw_dir/'raw.gdb'

competitor_location_layer = str(raw_gdb/'sea_ace_comp')
competitor_id_field = 'LOCNUM'
competitor_brand_name_field = 'CONAME'

In [3]:
comp_lst = [r for r in arcpy.da.SearchCursor(competitor_location_layer, [competitor_id_field, competitor_brand_name_field])]
comp_df = pd.DataFrame(comp_lst, columns=['competition_destination_id', 'competition_brand_name'])
comp_df.head()

Unnamed: 0,competition_destination_id,competition_brand_name
0,460555345,CAPITOL LUMBER & DOOR
1,502088164,BUILDERS FIRST SOURCE
2,718164872,RANDLES SURE GROW TOPSOIL
3,805003878,NORTHERN ACOUSTICAL SYSTS CO
4,216099309,LOWE'S HOME IMPROVEMENT


In [4]:
comp_cat_df = utils.add_store_name_category(comp_df, 'competition_brand_name', location_count_threshold=1)
comp_cat_df.rename({'dest_name_category': 'competition_name_category'}, axis=1, inplace=True)
comp_cat_df.head()

Unnamed: 0,competition_destination_id,competition_brand_name,competition_name_category
0,460555345,CAPITOL LUMBER & DOOR,INDEPENDENT
1,502088164,BUILDERS FIRST SOURCE,BUILDERS FIRST SOURCE
2,718164872,RANDLES SURE GROW TOPSOIL,INDEPENDENT
3,805003878,NORTHERN ACOUSTICAL SYSTS CO,INDEPENDENT
4,216099309,LOWE'S HOME IMPROVEMENT,LOWE'S HOME IMPROVEMENT


In [5]:
sum_df = comp_cat_df.groupby('competition_name_category').size().sort_values(ascending=False).to_frame()
sum_df.index.name = None
sum_df.columns = ['count']
sum_df

Unnamed: 0,count
INDEPENDENT,200
HOME DEPOT,24
LOWE'S HOME IMPROVEMENT,17
HARBOR FREIGHT TOOLS,8
BUILDERS FIRST SOURCE,6
MC LENDON HARDWARE,5
GTS INTERIOR SUPPLY,5
HD SUPPLY WHITE CAP,3
BUILDING SPECIALTIES,3
SNAP-ON TOOLS,3
