## Business Density

Generating measures of the number of businesses linked to a hex grid. Data for businesses comes from Statistics Canada's business register division

https://dataverse.scholarsportal.info/dataset.xhtml?persistentId=doi:10.5683/SP/FLLHOV&version=2.0

First we want to clean the business data and query it by NAICS code. This essentially removes factories / heavy industry, farms, wholesale trade, etc. - businesses that don't serve the public directly

In [1]:
import pandas as pd
import csv

# which 2-digit naics code to remove from the data
no_naics_2digit = [11,21,22,23,31,32,33,41,48,49]

# output array
out_data = []

# open up the csv
with open("input_data/tabular_data/business_data_dec2016/CBP/Ontario-Employees.csv") as csvfile:
    reader = csv.reader(csvfile)
   
    # loop over the rows
    r = 0
    for row in reader:
       
        # for the first row, with the NAICS code
        if r == 0:
            
            out_header = []
            remove_by_index = []
            c = 0
            
            # selecting the columns to keep with if statements
            # putting 
            for cell in row:
                if cell == "Unclassified":
                    outcell = "000000"
                    out_header.append(outcell)
                elif cell == "":
                    outcell = "dauid"
                    out_header.append(outcell)
                elif cell == "Total" or cell == "Sub-total, classified":
                    remove_by_index.append(c)
                else:
                    naics2 = (int(cell[0:2]))
                    if naics2 in no_naics_2digit:
                        remove_by_index.append(c)
                    else:
                        outcell = cell
                        out_header.append(cell[0:6])
                c += 1
            

        # the second row pertains to if there is employment / no employment - ignore for now
        elif r == 1:
            None
        
        # removing columns by the index in each row
        else:
            for index in sorted(remove_by_index, reverse=True):
                del row[index]
            
            int_row = []
            for cell in row:
                int_row.append(int(cell))
                
            out_data.append(int_row)
            
        r += 1

FileNotFoundError: [Errno 2] No such file or directory: 'tabular_data/business_data_dec2016/CBP/Ontario-Employees.csv'

In [None]:
# put this array into pandas data frame

df = pd.DataFrame(out_data, columns=out_header)

df = df.set_index("dauid")

In [None]:
# sum all the rows

dfs = pd.DataFrame(df.sum(axis=1))

# write to csv

dfs.to_csv("input_data/tabular_data/DA_2016_business_store_subset.csv")

We then use areal interpolation to link the business data from DAs ($B_d$) to the hex grid ($B_h$)

### $B_h = \sum B_{d} {w_{d,h}}$

### $B_h = \sum B_{d} \frac{A_{d \cap h}}{A_b}$

With postgreSQL and PostGIS ..

```sql
-- creating a spatial index for the hex grid to speed up the intersection

DROP INDEX IF EXISTS hex_grid_200m_gix;
CREATE INDEX hex_grid_200m_gix ON hex_grid_200m USING GIST (geom);

-- creating an area column $A_b$ for the dissemination area boundaries

ALTER TABLE zones_DA11 ADD COLUMN area double precision;
UPDATE zones_DA11 SET area = ST_AREA(geom);

-- Join the business count data to DAs (2011 geometry since this is how the data was coded)

DROP TABLE IF EXISTS temp_DA_business;
CREATE TABLE temp_DA_business AS ( 
    SELECT
    zones_DA11.geom AS geom,
    zones_DA11.dauid AS dauid,
    zones_DA11.area AS area,
    coalesce(table_da_business_2016_in_11da.business2016,0) AS business2016 -- nulls to 0
    FROM zones_DA11
    LEFT OUTER JOIN 
    table_da_business_2016_in_11da ON table_da_business_2016_in_11da.dauid = zones_DA11.dauid
);   

-- create a spatial index for this dauid polygon file

DROP INDEX IF EXISTS temp_DA_business_gix;
CREATE INDEX temp_DA_business_gix ON temp_DA_business USING GIST (geom); 

-- intersecting with the hex grid, then grouping to get the a set of weights of block to hex grid

CREATE TABLE temp_int_DA11_hex AS
(
SELECT
hex_grid_200m.id AS hexid,
temp_DA_business.dauid AS dauid,
temp_DA_business.area AS area_full,
ST_Intersection(hex_grid_200m.geom,temp_DA_business.geom) AS geom
FROM
hex_grid_200m INNER JOIN temp_DA_business ON ST_Intersects(hex_grid_200m.geom,temp_DA_business.geom)
);

-- update area of intersected geoms

ALTER TABLE temp_int_DA11_hex ADD COLUMN area_int double precision;
UPDATE temp_int_DA11_hex SET area_int = ST_AREA(geom);

ALTER TABLE temp_int_DA11_hex ADD COLUMN area_ratio double precision;
UPDATE temp_int_DA11_hex SET area_ratio = area_int / area_full;

-- grouping by hex and DA unique IDs - i.e. this is a weights table that can be used for apportioning data

CREATE TABLE weights_da11_hex AS
(
SELECT
hexid,
dauid,
sum(area_ratio) AS weight
FROM
temp_int_DA11_hex
GROUP BY dauid, hexid
ORDER BY dauid, hexid
);


```

We now use the weights table to apportion the business data to the hex grid

```sql
-- joining the businesses to the weights table
-- multiplying the businesses by the weight and aggregating by hex grid

DROP TABLE IF EXISTS out_data_hex_business2016;
CREATE TABLE out_data_hex_business2016 AS (
    WITH temp_weight_join AS (
        SELECT
        weights_da11_hex.dauid AS dauid,
        weights_da11_hex.hexid AS hexid,
        weights_da11_hex.weight AS weight,
        temp_DA_business.business2016 AS business2016
        FROM weights_da11_hex
        INNER JOIN temp_DA_business ON temp_DA_business.dauid = weights_da11_hex.dauid
    ) 
    SELECT
    temp_weight_join.hexid AS hexid,
    SUM(temp_weight_join.weight * temp_weight_join.business2016) AS business2016
    FROM temp_weight_join GROUP BY temp_weight_join.hexid
);

-- Create a density column                                                                 
                                                                  
ALTER TABLE out_data_hex_business2016 ADD COLUMN businessdensity2016 double precision;
UPDATE out_data_hex_business2016 SET businessdensity2016 = business2016 / (34641.0161513719 / (1000 * 1000));
                                                                                               
                                                                                               -- lets write that to a file

\COPY out_data_hex_business2016 TO 'out_data_hex_business2016.csv' WITH (FORMAT CSV, HEADER);

-- we can delete the temp tables if we want, but they take a little while to compute so it may be nice to keep them if space isnt an issue

DROP TABLE temp_DA_business;
DROP TABLE temp_int_DA11_hex;

```