# Setup

In [None]:
# %run ./include
%pip install polars rich ezregex

Python interpreter will be restarted.
Collecting polars
  Downloading polars-0.19.12-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (27.6 MB)
Collecting rich
  Downloading rich-13.6.0-py3-none-any.whl (239 kB)
Collecting ezregex
  Downloading ezregex-1.4.2-py3-none-any.whl (18 kB)
Collecting pygments<3.0.0,>=2.13.0
  Downloading Pygments-2.16.1-py3-none-any.whl (1.2 MB)
Collecting markdown-it-py>=2.2.0
  Downloading markdown_it_py-3.0.0-py3-none-any.whl (87 kB)
Collecting mdurl~=0.1
  Downloading mdurl-0.1.2-py3-none-any.whl (10.0 kB)
Installing collected packages: mdurl, pygments, markdown-it-py, rich, polars, ezregex
  Attempting uninstall: pygments
    Found existing installation: Pygments 2.11.2
    Not uninstalling pygments at /databricks/python3/lib/python3.9/site-packages, outside environment /local_disk0/.ephemeral_nfs/envs/pythonEnv-22babfc2-df75-4d2d-8ab6-db7563cd35e0
    Can't uninstall 'Pygments'. No files were found to uninstall.
Successfully installed ezregex-1.

In [None]:
import re

import datetime as dt
from rich import print
import polars as pl
import plotly.express as px
import requests
import pyspark.sql.functions as sf
import ezregex as er
# Databricks handles the first two imports.

# access to the sql functions https://spark.apache.org/docs/latest/api/python/pyspark.sql.html#module-pyspark.sql.functions
from pyspark.sql import functions as sf 
from rich import print
import json
import polars as pl
import plotly.express as px
from datetime import datetime, timedelta
# Because Spark is stupid
from copy import deepcopy
# from pyspark.sql.dataframe import DataFrame
import pyspark
print('Imported libraries')

In [None]:
patterns = spark.read.format('delta').load('dbfs:/user/hive/warehouse/safegraph.db/patterns/')
spatial = spark.read.format('delta').load('dbfs:/user/hive/warehouse/safegraph.db/spatial/')
places = spark.read.format('delta').load('dbfs:/user/hive/warehouse/safegraph.db/places/')
tract_table = spark.read.format('delta').load('dbfs:/user/hive/warehouse/safegraph.db/tract_table/')
censusblock_table = spark.read.format('delta').load('dbfs:/user/hive/warehouse/safegraph.db/censusblock_table')
censustract_pkmap = spark.read.format('delta').load('dbfs:/user/hive/warehouse/safegraph.db/censustract_pkmap')
# These 3 are manually loaded in by me, because they take a while to generate, or require a specific account to generate
# Source: My webscraper scraping off of https://local.churchofjesuschrist.org/en/us/
addresses = spark.read.csv('dbfs:/FileStore/Attempt3_6509_addresses.csv')
# Source: Scraping the table off of https://pe.usps.com/text/pub28/28apc_002.htm
abbr = pl.DataFrame(spark.read.csv('dbfs:/FileStore/postalTable.csv').toPandas())
# Source: My webscraper + google maps address validation API
fullAddr = spark.read.parquet('dbfs:/FileStore/full_church_building_data_20.parquet')


# Join church buildings with safegraph data

## Method 1: Join on addresses

In [None]:
abbrs = {}
for i in abbr.select('_c0', '_c2').to_dicts()[2:]:
    abbrs[i['_c0'].lower()] = i['_c2'].lower()
abbrs['bayou'] = 'byu'
abbrs.update({
    'south': 's',
    'north': 'n',
    'east': 'e',
    'west': 'w',
})
abbrs

In [None]:
# TODO: 
# 1 & main
# things like pine road and mountainview road get abbreviated -- but also, they get abbreviated the same way, so oh well?
# There's a 840 w midway st and an 841 w midway st
# @udf
def formatAddress(col):
    # Remove excess whitespace and lower all letters
    col = sf.trim(sf.lower(sf.col(col)))

    # Replace all common replacements
    for name, abr in abbrs.items():
        col = sf.regexp_replace(col, name, abr)

    # The only whitespace we want are single spaces
    col = sf.regexp_replace(col, er.whitechunk.str(), ' ')
    
    # I think these still don't work quite right...
    # Remove any 1st, 3rd, 5 th, etc.
    # col = sf.regexp_replace(col, (er.group(er.number) + er.optional(er.space) + er.anyof('st', 'rd', 'th')).str(), er.replace_group(1).str())
    # col = sf.regexp_replace(col, (er.namedGroup('num', er.number) + er.optional(er.space) + er.anyof('st', 'rd', 'th')).str(), '${num}')
    # col = sf.regexp_replace(col, (er.group(er.number) + er.optional(er.space) + er.anyof('st', 'rd', 'th')).str(), '\1')
    col = sf.regexp_replace(col, (er.ifPrecededBy(er.number) + er.optional(er.space) + er.anyof('st', 'rd', 'th')).str(), '')

    # Add a space between any 232n or 343s
    # col = sf.regexp_replace(col, (er.group(er.number) + er.group(er.anyof(*list('nsew')))).str(), (er.replace_group(1) + ' ' + er.replace_group(2)).str())
    # col = sf.regexp_replace(col, (er.namedGroup('a', er.number) + er.namedGroup('b', er.anyof(*list('nsew')))).str(), '${a} ${b}')
    # col = sf.regexp_replace(col, (er.group(er.number) + er.group(er.anyof(*list('nsew')))).str(), '\1 \2')
    return col


### Levenstien dist
Apparently there's this thing called Levenstien distance that measures how close 2 strings are. We could alternatively use that instead of the udf (there's a builtin spark function) to join addresses, but that would require a cross join between datasets with sizes of ~6,000 and (number of non-residential places in the US), and then calling sf.levenstien on all of them, which I can't imagine is particularly fast.

You maaaay be able to do something like match them all by county, then cross join & levenstien within that group, but still, yuck.

In [None]:
# filter places by just LDS meetinghouses
scrapedAddr = addresses\
    .filter(sf.col('_c0') == 'id')\
    .select(formatAddress('_c2').alias('street_address'))

safegraphAddr = places\
    .select(
        'placekey', 'top_category',
        formatAddress('street_address').alias('street_address')
    )

In [None]:
combAddr = safegraphAddr.join(scrapedAddr, on='street_address', how='inner') \
    .filter(sf.col('top_category') == 'Religious Organizations')
print(f'scraped: {scrapedAddr.count()}, comb: {combAddr.count()}')

## Method 2: Join on lat & lon

In [None]:
precision = 4
scrapedCoord = fullAddr.filter(sf.col('state') == 'id')\
    .select(sf.round(sf.col('lat'), precision).alias('lat'), 
            sf.round(sf.col('lon'), precision).alias('lon'))
safegraphCoord = places.select(
    'placekey', 'top_category',
    sf.round(sf.col('latitude'), precision).alias('lat'), 
    sf.round(sf.col('longitude'), precision).alias('lon'))

In [None]:
combCoord = safegraphCoord.join(scrapedCoord, on=['lat', 'lon'], how='inner')\
    .filter(sf.col('top_category') == 'Religious Organizations')
print(f'scraped: {scrapedCoord.count()}, comb: {combCoord.count()}')

## Method 3: Regex

In [None]:
combRegex = places.filter(
    (sf.col("top_category") == "Religious Organizations") &
    (sf.col("location_name").rlike("Latter|latter|Saints|saints|LDS|\b[Ww]ard\b")) &
    (sf.col("location_name").rlike("^((?!Reorganized).)*$")) &
    (sf.col("location_name").rlike("^((?!All Saints).)*$")) &
    (sf.col("location_name").rlike("^((?![cC]ath).)*$")) &
    (sf.col("location_name").rlike("^((?![Bb]ody).)*$")) &
    (sf.col("location_name").rlike("^((?![Pp]eter).)*$")) &
    (sf.col("location_name").rlike("^((?![Cc]atholic).)*$")) &
    (sf.col("location_name").rlike("^((?![Pp]res).)*$")) &
    (sf.col("location_name").rlike("^((?![Mm]inist).)*$")) &
    (sf.col("location_name").rlike("^((?![Mm]ission).)*$")) &
    (sf.col("location_name").rlike("^((?![Ww]orship).)*$")) &
    (sf.col("location_name").rlike("^((?![Rr]ain).)*$")) &
    (sf.col("location_name").rlike("^((?![Bb]aptist).)*$")) &
    (sf.col("location_name").rlike("^((?![Mm]eth).)*$")) &
    (sf.col("location_name").rlike("^((?![Ee]vang).)*$")) &
    (sf.col("location_name").rlike("^((?![Ll]utheran).)*$")) &
    (sf.col("location_name").rlike("^((?![Oo]rthodox).)*$")) &
    (sf.col("location_name").rlike("^((?![Ee]piscopal).)*$")) &
    (sf.col("location_name").rlike("^((?![Tt]abernacle).)*$")) &
    (sf.col("location_name").rlike("^((?![Hh]arvest).)*$")) &
    (sf.col("location_name").rlike("^((?![Aa]ssem).)*$")) &
    (sf.col("location_name").rlike("^((?![Mm]edia).)*$")) &
    (sf.col("location_name").rlike("^((?![Mm]artha).)*$")) &
    (sf.col("location_name").rlike("^((?![Cc]hristian).)*$")) &
    (sf.col("location_name").rlike("^((?![Uu]nited).)*$")) &
    (sf.col("location_name").rlike("^((?![Ff]ellowship).)*$")) &
    (sf.col("location_name").rlike("^((?![Ww]esl).)*$")) &
    (sf.col("location_name").rlike("^((?![C]cosmas).)*$")) &
    (sf.col("location_name").rlike("^((?![Gg]reater).)*$")) &
    (sf.col("location_name").rlike("^((?![Pp]rison).)*$")) &
    (sf.col("location_name").rlike("^((?![Cc]ommuni).)*$")) &
    (sf.col("location_name").rlike("^((?![Cc]lement).)*$")) &
    (sf.col("location_name").rlike("^((?![Vv]iridian).)*$")) &
    (sf.col("location_name").rlike("^((?![Dd]iocese).)*$")) &
    (sf.col("location_name").rlike("^((?![Hh]istory).)*$")) &
    (sf.col("location_name").rlike("^((?![Ss]chool).)*$")) &
    (sf.col("location_name").rlike("^((?![Tt]hougt).)*$")) &
    (sf.col("location_name").rlike("^((?![Hh]oliness).)*$")) &
    (sf.col("location_name").rlike("^((?![Mm]artyr).)*$")) &
    (sf.col("location_name").rlike("^((?![Jj]ames).)*$")) &
    (sf.col("location_name").rlike("^((?![Ff]ellowship).)*$")) &
    (sf.col("location_name").rlike("^((?![Hh]ouse).)*$")) &
    (sf.col("location_name").rlike("^((?![Gg]lory).)*$")) &
    (sf.col("location_name").rlike("^((?![Aa]nglican).)*$")) &
    (sf.col("location_name").rlike("^((?![Pp]oetic).)*$")) &
    (sf.col("location_name").rlike("^((?![Ss]anctuary).)*$")) &
    (sf.col("location_name").rlike("^((?![Ee]quipping).)*$")) &
    (sf.col("location_name").rlike("^((?![Jj]ohn).)*$")) &
    (sf.col("location_name").rlike("^((?![Aa]ndrew).)*$")) &
    (sf.col("location_name").rlike("^((?![Ee]manuel).)*$")) &
    (sf.col("location_name").rlike("^((?![Rr]edeemed).)*$")) &
    (sf.col("location_name").rlike("^((?![Pp]erfecting).)*$")) &
    (sf.col("location_name").rlike("^((?![Aa]ngel).)*$")) &
    (sf.col("location_name").rlike("^((?![Aa]rchangel).)*$")) &
    (sf.col("location_name").rlike("^((?![Mm]icheal).)*$")) &
    (sf.col("location_name").rlike("^((?![Tt]hought).)*$")) &
    (sf.col("location_name").rlike("^((?![Pp]ariosse).)*$")) &
    (sf.col("location_name").rlike("^((?![Cc]osmas).)*$")) &
    (sf.col("location_name").rlike("^((?![Dd]eliverance).)*$")) &
    (sf.col("location_name").rlike("^((?![Ss]ociete).)*$")) &
    (sf.col("location_name").rlike("^((?![Tt]emple).)*$")) &
    (sf.col("location_name").rlike("^((?![Ss]eminary).)*$")) &
    (sf.col("location_name").rlike("^((?![Ee]mployment).)*$")) &
    (sf.col("location_name").rlike("^((?![Ii]nstitute).)*$")) &
    (sf.col("location_name").rlike("^((?![Cc]amp).)*$")) &
    (sf.col("location_name").rlike("^((?![Ss]tudent).)*$")) &
    (sf.col("location_name").rlike("^((?![Ee]ducation).)*$")) &
    (sf.col("location_name").rlike("^((?![Ss]ocial).)*$")) &
    (sf.col("location_name").rlike("^((?![Ww]welfare).)*$")) &
    (sf.col("location_name").rlike("^((?![Cc][Ee][Ss]).)*$")) &
    (sf.col("location_name").rlike("^((?![Ff]amily).)*$")) &
    (sf.col("location_name").rlike("^((?![Mm]ary).)*$")) &
    (sf.col("location_name").rlike("^((?![Rr]ussian).)*$")) &
    (sf.col("location_name").rlike("^((?![Bb]eautif).)*$")) &
    (sf.col("location_name").rlike("^((?![Hh]eaven).)*$")) &    
    (sf.col("location_name").rlike("^((?!Inc).)*$")) &
    (sf.col("location_name").rlike("^((?!God).)*$"))
)
print(f'regex found {combRegex.count()}')

## Combine all 3 methods

In [None]:
# 'zzw-222@5wg-5bt-jjv' -- "Church"
comb = combRegex\
    .join(combCoord.select('placekey'), on='placekey', how='outer')\
    .join(combAddr.select('placekey'), on='placekey', how='outer')
print(f'Total found: {comb.count()}')

nonObvious = (comb
    .filter(sf.col('location_name').isNull())
    .select('placekey')
    .join(places.select('placekey', 'location_name'), on='placekey', how='inner')
    .filter(~sf.col('location_name').rlike('[lL]atter|[sS]aint'))
)
print(f'There are {nonObvious.count()} non-obvious places found')
display(nonObvious)

placekey,location_name
222-223@3x5-4n2-qmk,Coeur d'Alene 3rd Branch Young Single Adults
222-223@5wf-zyt-rhq,Church of Christ Presidents Office
zzw-222@3x5-4n2-qfz,Coeur D'alene 2nd Ward
zzw-222@5wg-3nz-vzz,Church
zzw-222@5wg-5bt-jjv,Church
zzw-222@5wr-qzn-sdv,Cary Ward Family History Center
zzy-224@5qn-rf3-td9,Paris Tabernacle
zzy-224@5qn-rf3-td9,Paris Tabernacle
zzy-224@5qn-rf3-td9,Paris Tabernacle
zzy-224@5qn-rf3-td9,Paris Tabernacle


In [None]:
comb.repartition(1).write.parquet('dbfs:/FileStore/churchPlaces.parquet')
display(comb)

placekey,poi_cbg,parent_placekey,location_name,brands,safegraph_brand_ids,store_id,top_category,sub_category,naics_code,open_hours,category_tags,latitude,longitude,street_address,city,region,postal_code,iso_country_code,opened_on,closed_on,tracking_closed_since,websites,phone_number,wkt_area_sq_meters
222-222@3x5-4j2-cqz,,,The Church of Jesus Christ of Latter day Saints,,,,Religious Organizations,Religious Organizations,813110.0,"Map(Thu -> List(), Sat -> List(), Tue -> List(), Sun -> List(List(9:00, 14:00)), Mon -> List(), Wed -> List(), Fri -> List())",,47.722601,-116.919083,1824 E 16th Ave,Post Falls,ID,83854.0,US,,,2019-07,[],12083803142.0,1598.0
222-222@3x5-4n2-qmk,160550012001.0,,The Church of Jesus Christ of Latter day Saints,,,,Religious Organizations,Religious Organizations,813110.0,"Map(Thu -> List(), Sat -> List(), Tue -> List(), Sun -> List(List(8:30, 15:00)), Mon -> List(), Wed -> List(), Fri -> List())",Churches,47.702578,-116.781765,2801 N 4th St,Coeur D Alene,ID,83815.0,US,,,2019-07,"[""lds.org""]",,2146.0
222-222@3x5-4n8-q9f,,,The Church of Jesus Christ of Latter day Saints,,,,Religious Organizations,Religious Organizations,813110.0,,"Churches,Non-profit",47.721095,-116.775691,5430 N 4th St,Coeur D Alene,ID,83815.0,US,,,2019-07,"[""lds.org""]",12086643521.0,168.0
222-222@5w8-z7t-bc5,160599702001.0,,The Church of Jesus Christ of Latter day Saints,,,,Religious Organizations,Religious Organizations,813110.0,,Churches,45.171196,-113.89175,400 S Daisy St,Salmon,ID,83467.0,US,,,2019-07,"[""lds.org""]",12087563090.0,2842.0
222-222@5w9-hn5-qcq,,,The Church of Jesus Christ of Latter day Saints,,,,Religious Organizations,Religious Organizations,813110.0,,Churches,43.57606,-116.414069,3145 S Linder Rd,Meridian,ID,83642.0,US,,,2019-07,"[""lds.org""]",12084010000.0,295.0
222-222@5w9-hn5-qcq,160010103131.0,,The Church of Jesus Christ of Latter day Saints,,,,Religious Organizations,Religious Organizations,813110.0,,Churches,43.57606,-116.414069,3145 S Linder Rd,Meridian,ID,83642.0,US,,,2019-07,"[""lds.org""]",12084010000.0,295.0
222-222@5w9-hrg-kpv,160270211001.0,,The Church of Jesus Christ of Latter day Saints,,,,Religious Organizations,Religious Organizations,813110.0,,Churches,43.640712,-116.612525,18486 Middleton Rd,Caldwell,ID,83687.0,US,,,2019-07,[],12084020150.0,1668.0
222-222@5w9-j4z-2rk,160270224002.0,,The Church of Jesus Christ of Latter day Saints,,,,Religious Organizations,Religious Organizations,813110.0,,Churches,43.485129,-116.526088,7770 Makayla Dr,Nampa,ID,83686.0,US,,,2019-07,"[""lds.org""]",12084619648.0,1236.0
222-222@5w9-j4z-2rk,,,The Church of Jesus Christ of Latter day Saints,,,,Religious Organizations,Religious Organizations,813110.0,,Churches,43.485129,-116.526088,7770 Makayla Dr,Nampa,ID,83686.0,US,,,2019-07,"[""lds.org""]",12084619648.0,1236.0
222-222@5w9-j6p-p9z,160270210011.0,,The Church of Jesus Christ of Latter day Saints,,,,Religious Organizations,Religious Organizations,813110.0,,Churches,43.619075,-116.683381,5622 S Montana Ave,Caldwell,ID,83607.0,US,,,2019-07,[],12084539118.0,371.0


# Create relevant patterns from comb

In [None]:
rPatterns = comb.select('placekey')\
    .join(patterns, on='placekey', how='inner')\
    .withColumn('date_range_start', sf.to_date('date_range_start').alias('date_range_start'))
    # .filter(sf.col('date_range_start') == dt.datetime(2019, 2, 1))

In [None]:
# Use popularity_by_day to get the percentage of people who visit on sunday compared to other days for each church building
ex = rPatterns.select('placekey', 'date_range_start', sf.explode(sf.col('popularity_by_day')))
sunday = ex.filter(sf.col('key') == 'Sunday').groupBy('placekey', 'date_range_start').agg({'value':'sum'}).withColumnRenamed('sum(value)', 'sunday')
weekday = ex.filter(sf.col('key') != 'Sunday').groupBy('placekey', 'date_range_start').agg({'value':'sum'}).withColumnRenamed('sum(value)', 'weekday')
sundayRatio = sunday.join(weekday, on='placekey', how='inner').select(
    'placekey',
    (sf.col('sunday') / (sf.col('weekday') + sf.col('sunday'))).alias('sundayAttendanceRatio'),
)
rPatterns = rPatterns.join(sundayRatio, on='placekey', how='inner')

In [None]:
display(rPatterns)

placekey,date_range_start,date_range_end,raw_visit_counts,raw_visitor_counts,distance_from_home,median_dwell,bucketed_dwell_times,related_same_day_brand,related_same_month_brand,popularity_by_hour,popularity_by_day,device_type,visits_by_day,visitor_home_cbgs,visitor_home_aggregation,visitor_daytime_cbgs,normalized_visits_by_state_scaling,normalized_visits_by_region_naics_visits,normalized_visits_by_region_naics_visitors,normalized_visits_by_total_visits,sundayAttendanceRatio,normalized_visitors_by_state_scaling,estimatedAttendance,tractcode,tract,sundayAttendanceRatio.1
222-222@5w9-jb6-rff,2019-12-01,2020-01-01T00:00:00-07:00,109.0,46.0,5786.0,54.0,"Map(5-10 -> 12, 21-60 -> 27, 61-120 -> 11, <5 -> 1, >240 -> 23, 11-20 -> 15, 121-240 -> 20)","Map(Petco -> 1, Sonic -> 2, Walmart -> 4, Jared The Galleria of Jewelry -> 1, ULTA Beauty -> 1, TacoTime -> 3, Green Burrito -> 1, Chevron -> 1, Apollo Burgers -> 1, Ross Stores -> 1, Boys & Girls Clubs of America -> 1, Pita Pit -> 1, Taco Bell -> 2, WinCo Foods -> 3, Starbucks -> 2, Kaia FIT -> 2, Fred Meyer Jewelers -> 3, Burger King -> 2, Jacksons Food Stores -> 3, Shell Oil -> 5)","Map(Sonic -> 11, Walmart -> 59, Target -> 15, Chevron -> 30, Albertsons -> 13, WinCo Foods -> 39, The Home Depot -> 13, Starbucks -> 20, Fred Meyer Jewelers -> 22, Dutch Bros Coffee -> 15, Sinclair Oil -> 22, Costco -> 17, Walgreens -> 20, Panda Express -> 17, Burger King -> 17, Jacksons Food Stores -> 17, McDonald's -> 28, Stinker Stores -> 15, Shell Oil -> 24, Subway -> 17)","List(20, 23, 23, 23, 21, 20, 21, 25, 18, 25, 28, 16, 28, 23, 19, 21, 6, 8, 15, 24, 24, 23, 21, 20)","Map(Wednesday -> 16, Monday -> 19, Saturday -> 9, Thursday -> 25, Tuesday -> 13, Friday -> 14, Sunday -> 13)","Map(android -> 34, ios -> 10)","List(3, 3, 2, 4, 12, 6, 5, 3, 8, 4, 6, 7, 4, 0, 5, 4, 2, 3, 5, 2, 4, 1, 3, 2, 3, 1, 2, 0, 1, 1, 3)","Map(160270201001 -> 5, 160010102013 -> 4, 160010023023 -> 5, 160010024102 -> 4, 160010018003 -> 4, 160010014001 -> 4, 160010023021 -> 4, 490351034001 -> 4, 160010024112 -> 4, 160010102253 -> 4)","Map(16001002410 -> 4, 16001010332 -> 4, 41045970400 -> 4, 16001002310 -> 4, 16053970300 -> 4, 49035103400 -> 4, 16027021002 -> 4, 16001002312 -> 4, 16027021500 -> 4, 16001010225 -> 4, 16001002302 -> 9, 16001001400 -> 4)","Map(160010103353 -> 4, 160010023024 -> 4, 160270215003 -> 4, 160010103321 -> 4, 160010023023 -> 4, 160270219032 -> 4, 160010024102 -> 4)",1756.0120016578337,0.0008040542035806231,0.0020958717095775,1.3109339225560373e-05,0.0881316098707403,741.0692851033057,65.31162912191296,16001002410,2410,0.1192660550458715
222-222@5w9-jb6-rff,2019-12-01,2020-01-01T00:00:00-07:00,109.0,46.0,5786.0,54.0,"Map(5-10 -> 12, 21-60 -> 27, 61-120 -> 11, <5 -> 1, >240 -> 23, 11-20 -> 15, 121-240 -> 20)","Map(Petco -> 1, Sonic -> 2, Walmart -> 4, Jared The Galleria of Jewelry -> 1, ULTA Beauty -> 1, TacoTime -> 3, Green Burrito -> 1, Chevron -> 1, Apollo Burgers -> 1, Ross Stores -> 1, Boys & Girls Clubs of America -> 1, Pita Pit -> 1, Taco Bell -> 2, WinCo Foods -> 3, Starbucks -> 2, Kaia FIT -> 2, Fred Meyer Jewelers -> 3, Burger King -> 2, Jacksons Food Stores -> 3, Shell Oil -> 5)","Map(Sonic -> 11, Walmart -> 59, Target -> 15, Chevron -> 30, Albertsons -> 13, WinCo Foods -> 39, The Home Depot -> 13, Starbucks -> 20, Fred Meyer Jewelers -> 22, Dutch Bros Coffee -> 15, Sinclair Oil -> 22, Costco -> 17, Walgreens -> 20, Panda Express -> 17, Burger King -> 17, Jacksons Food Stores -> 17, McDonald's -> 28, Stinker Stores -> 15, Shell Oil -> 24, Subway -> 17)","List(20, 23, 23, 23, 21, 20, 21, 25, 18, 25, 28, 16, 28, 23, 19, 21, 6, 8, 15, 24, 24, 23, 21, 20)","Map(Wednesday -> 16, Monday -> 19, Saturday -> 9, Thursday -> 25, Tuesday -> 13, Friday -> 14, Sunday -> 13)","Map(android -> 34, ios -> 10)","List(3, 3, 2, 4, 12, 6, 5, 3, 8, 4, 6, 7, 4, 0, 5, 4, 2, 3, 5, 2, 4, 1, 3, 2, 3, 1, 2, 0, 1, 1, 3)","Map(160270201001 -> 5, 160010102013 -> 4, 160010023023 -> 5, 160010024102 -> 4, 160010018003 -> 4, 160010014001 -> 4, 160010023021 -> 4, 490351034001 -> 4, 160010024112 -> 4, 160010102253 -> 4)","Map(16001002410 -> 4, 16001010332 -> 4, 41045970400 -> 4, 16001002310 -> 4, 16053970300 -> 4, 49035103400 -> 4, 16027021002 -> 4, 16001002312 -> 4, 16027021500 -> 4, 16001010225 -> 4, 16001002302 -> 9, 16001001400 -> 4)","Map(160010103353 -> 4, 160010023024 -> 4, 160270215003 -> 4, 160010103321 -> 4, 160010023023 -> 4, 160270219032 -> 4, 160010024102 -> 4)",1756.0120016578337,0.0008040542035806231,0.0020958717095775,1.3109339225560373e-05,0.0881316098707403,741.0692851033057,65.31162912191296,16001002410,2410,0.1083333333333333
222-222@5w9-jb6-rff,2019-12-01,2020-01-01T00:00:00-07:00,109.0,46.0,5786.0,54.0,"Map(5-10 -> 12, 21-60 -> 27, 61-120 -> 11, <5 -> 1, >240 -> 23, 11-20 -> 15, 121-240 -> 20)","Map(Petco -> 1, Sonic -> 2, Walmart -> 4, Jared The Galleria of Jewelry -> 1, ULTA Beauty -> 1, TacoTime -> 3, Green Burrito -> 1, Chevron -> 1, Apollo Burgers -> 1, Ross Stores -> 1, Boys & Girls Clubs of America -> 1, Pita Pit -> 1, Taco Bell -> 2, WinCo Foods -> 3, Starbucks -> 2, Kaia FIT -> 2, Fred Meyer Jewelers -> 3, Burger King -> 2, Jacksons Food Stores -> 3, Shell Oil -> 5)","Map(Sonic -> 11, Walmart -> 59, Target -> 15, Chevron -> 30, Albertsons -> 13, WinCo Foods -> 39, The Home Depot -> 13, Starbucks -> 20, Fred Meyer Jewelers -> 22, Dutch Bros Coffee -> 15, Sinclair Oil -> 22, Costco -> 17, Walgreens -> 20, Panda Express -> 17, Burger King -> 17, Jacksons Food Stores -> 17, McDonald's -> 28, Stinker Stores -> 15, Shell Oil -> 24, Subway -> 17)","List(20, 23, 23, 23, 21, 20, 21, 25, 18, 25, 28, 16, 28, 23, 19, 21, 6, 8, 15, 24, 24, 23, 21, 20)","Map(Wednesday -> 16, Monday -> 19, Saturday -> 9, Thursday -> 25, Tuesday -> 13, Friday -> 14, Sunday -> 13)","Map(android -> 34, ios -> 10)","List(3, 3, 2, 4, 12, 6, 5, 3, 8, 4, 6, 7, 4, 0, 5, 4, 2, 3, 5, 2, 4, 1, 3, 2, 3, 1, 2, 0, 1, 1, 3)","Map(160270201001 -> 5, 160010102013 -> 4, 160010023023 -> 5, 160010024102 -> 4, 160010018003 -> 4, 160010014001 -> 4, 160010023021 -> 4, 490351034001 -> 4, 160010024112 -> 4, 160010102253 -> 4)","Map(16001002410 -> 4, 16001010332 -> 4, 41045970400 -> 4, 16001002310 -> 4, 16053970300 -> 4, 49035103400 -> 4, 16027021002 -> 4, 16001002312 -> 4, 16027021500 -> 4, 16001010225 -> 4, 16001002302 -> 9, 16001001400 -> 4)","Map(160010103353 -> 4, 160010023024 -> 4, 160270215003 -> 4, 160010103321 -> 4, 160010023023 -> 4, 160270219032 -> 4, 160010024102 -> 4)",1756.0120016578337,0.0008040542035806231,0.0020958717095775,1.3109339225560373e-05,0.0881316098707403,741.0692851033057,65.31162912191296,16001002410,2410,0.0970149253731343
222-222@5w9-jb6-rff,2019-12-01,2020-01-01T00:00:00-07:00,109.0,46.0,5786.0,54.0,"Map(5-10 -> 12, 21-60 -> 27, 61-120 -> 11, <5 -> 1, >240 -> 23, 11-20 -> 15, 121-240 -> 20)","Map(Petco -> 1, Sonic -> 2, Walmart -> 4, Jared The Galleria of Jewelry -> 1, ULTA Beauty -> 1, TacoTime -> 3, Green Burrito -> 1, Chevron -> 1, Apollo Burgers -> 1, Ross Stores -> 1, Boys & Girls Clubs of America -> 1, Pita Pit -> 1, Taco Bell -> 2, WinCo Foods -> 3, Starbucks -> 2, Kaia FIT -> 2, Fred Meyer Jewelers -> 3, Burger King -> 2, Jacksons Food Stores -> 3, Shell Oil -> 5)","Map(Sonic -> 11, Walmart -> 59, Target -> 15, Chevron -> 30, Albertsons -> 13, WinCo Foods -> 39, The Home Depot -> 13, Starbucks -> 20, Fred Meyer Jewelers -> 22, Dutch Bros Coffee -> 15, Sinclair Oil -> 22, Costco -> 17, Walgreens -> 20, Panda Express -> 17, Burger King -> 17, Jacksons Food Stores -> 17, McDonald's -> 28, Stinker Stores -> 15, Shell Oil -> 24, Subway -> 17)","List(20, 23, 23, 23, 21, 20, 21, 25, 18, 25, 28, 16, 28, 23, 19, 21, 6, 8, 15, 24, 24, 23, 21, 20)","Map(Wednesday -> 16, Monday -> 19, Saturday -> 9, Thursday -> 25, Tuesday -> 13, Friday -> 14, Sunday -> 13)","Map(android -> 34, ios -> 10)","List(3, 3, 2, 4, 12, 6, 5, 3, 8, 4, 6, 7, 4, 0, 5, 4, 2, 3, 5, 2, 4, 1, 3, 2, 3, 1, 2, 0, 1, 1, 3)","Map(160270201001 -> 5, 160010102013 -> 4, 160010023023 -> 5, 160010024102 -> 4, 160010018003 -> 4, 160010014001 -> 4, 160010023021 -> 4, 490351034001 -> 4, 160010024112 -> 4, 160010102253 -> 4)","Map(16001002410 -> 4, 16001010332 -> 4, 41045970400 -> 4, 16001002310 -> 4, 16053970300 -> 4, 49035103400 -> 4, 16027021002 -> 4, 16001002312 -> 4, 16027021500 -> 4, 16001010225 -> 4, 16001002302 -> 9, 16001001400 -> 4)","Map(160010103353 -> 4, 160010023024 -> 4, 160270215003 -> 4, 160010103321 -> 4, 160010023023 -> 4, 160270219032 -> 4, 160010024102 -> 4)",1756.0120016578337,0.0008040542035806231,0.0020958717095775,1.3109339225560373e-05,0.0881316098707403,741.0692851033057,65.31162912191296,16001002410,2410,0.1830985915492957
222-222@5w9-jb6-rff,2019-12-01,2020-01-01T00:00:00-07:00,109.0,46.0,5786.0,54.0,"Map(5-10 -> 12, 21-60 -> 27, 61-120 -> 11, <5 -> 1, >240 -> 23, 11-20 -> 15, 121-240 -> 20)","Map(Petco -> 1, Sonic -> 2, Walmart -> 4, Jared The Galleria of Jewelry -> 1, ULTA Beauty -> 1, TacoTime -> 3, Green Burrito -> 1, Chevron -> 1, Apollo Burgers -> 1, Ross Stores -> 1, Boys & Girls Clubs of America -> 1, Pita Pit -> 1, Taco Bell -> 2, WinCo Foods -> 3, Starbucks -> 2, Kaia FIT -> 2, Fred Meyer Jewelers -> 3, Burger King -> 2, Jacksons Food Stores -> 3, Shell Oil -> 5)","Map(Sonic -> 11, Walmart -> 59, Target -> 15, Chevron -> 30, Albertsons -> 13, WinCo Foods -> 39, The Home Depot -> 13, Starbucks -> 20, Fred Meyer Jewelers -> 22, Dutch Bros Coffee -> 15, Sinclair Oil -> 22, Costco -> 17, Walgreens -> 20, Panda Express -> 17, Burger King -> 17, Jacksons Food Stores -> 17, McDonald's -> 28, Stinker Stores -> 15, Shell Oil -> 24, Subway -> 17)","List(20, 23, 23, 23, 21, 20, 21, 25, 18, 25, 28, 16, 28, 23, 19, 21, 6, 8, 15, 24, 24, 23, 21, 20)","Map(Wednesday -> 16, Monday -> 19, Saturday -> 9, Thursday -> 25, Tuesday -> 13, Friday -> 14, Sunday -> 13)","Map(android -> 34, ios -> 10)","List(3, 3, 2, 4, 12, 6, 5, 3, 8, 4, 6, 7, 4, 0, 5, 4, 2, 3, 5, 2, 4, 1, 3, 2, 3, 1, 2, 0, 1, 1, 3)","Map(160270201001 -> 5, 160010102013 -> 4, 160010023023 -> 5, 160010024102 -> 4, 160010018003 -> 4, 160010014001 -> 4, 160010023021 -> 4, 490351034001 -> 4, 160010024112 -> 4, 160010102253 -> 4)","Map(16001002410 -> 4, 16001010332 -> 4, 41045970400 -> 4, 16001002310 -> 4, 16053970300 -> 4, 49035103400 -> 4, 16027021002 -> 4, 16001002312 -> 4, 16027021500 -> 4, 16001010225 -> 4, 16001002302 -> 9, 16001001400 -> 4)","Map(160010103353 -> 4, 160010023024 -> 4, 160270215003 -> 4, 160010103321 -> 4, 160010023023 -> 4, 160270219032 -> 4, 160010024102 -> 4)",1756.0120016578337,0.0008040542035806231,0.0020958717095775,1.3109339225560373e-05,0.0881316098707403,741.0692851033057,65.31162912191296,16001002410,2410,0.1511627906976744
222-222@5w9-jb6-rff,2019-12-01,2020-01-01T00:00:00-07:00,109.0,46.0,5786.0,54.0,"Map(5-10 -> 12, 21-60 -> 27, 61-120 -> 11, <5 -> 1, >240 -> 23, 11-20 -> 15, 121-240 -> 20)","Map(Petco -> 1, Sonic -> 2, Walmart -> 4, Jared The Galleria of Jewelry -> 1, ULTA Beauty -> 1, TacoTime -> 3, Green Burrito -> 1, Chevron -> 1, Apollo Burgers -> 1, Ross Stores -> 1, Boys & Girls Clubs of America -> 1, Pita Pit -> 1, Taco Bell -> 2, WinCo Foods -> 3, Starbucks -> 2, Kaia FIT -> 2, Fred Meyer Jewelers -> 3, Burger King -> 2, Jacksons Food Stores -> 3, Shell Oil -> 5)","Map(Sonic -> 11, Walmart -> 59, Target -> 15, Chevron -> 30, Albertsons -> 13, WinCo Foods -> 39, The Home Depot -> 13, Starbucks -> 20, Fred Meyer Jewelers -> 22, Dutch Bros Coffee -> 15, Sinclair Oil -> 22, Costco -> 17, Walgreens -> 20, Panda Express -> 17, Burger King -> 17, Jacksons Food Stores -> 17, McDonald's -> 28, Stinker Stores -> 15, Shell Oil -> 24, Subway -> 17)","List(20, 23, 23, 23, 21, 20, 21, 25, 18, 25, 28, 16, 28, 23, 19, 21, 6, 8, 15, 24, 24, 23, 21, 20)","Map(Wednesday -> 16, Monday -> 19, Saturday -> 9, Thursday -> 25, Tuesday -> 13, Friday -> 14, Sunday -> 13)","Map(android -> 34, ios -> 10)","List(3, 3, 2, 4, 12, 6, 5, 3, 8, 4, 6, 7, 4, 0, 5, 4, 2, 3, 5, 2, 4, 1, 3, 2, 3, 1, 2, 0, 1, 1, 3)","Map(160270201001 -> 5, 160010102013 -> 4, 160010023023 -> 5, 160010024102 -> 4, 160010018003 -> 4, 160010014001 -> 4, 160010023021 -> 4, 490351034001 -> 4, 160010024112 -> 4, 160010102253 -> 4)","Map(16001002410 -> 4, 16001010332 -> 4, 41045970400 -> 4, 16001002310 -> 4, 16053970300 -> 4, 49035103400 -> 4, 16027021002 -> 4, 16001002312 -> 4, 16027021500 -> 4, 16001010225 -> 4, 16001002302 -> 9, 16001001400 -> 4)","Map(160010103353 -> 4, 160010023024 -> 4, 160270215003 -> 4, 160010103321 -> 4, 160010023023 -> 4, 160270219032 -> 4, 160010024102 -> 4)",1756.0120016578337,0.0008040542035806231,0.0020958717095775,1.3109339225560373e-05,0.0881316098707403,741.0692851033057,65.31162912191296,16001002410,2410,0.1074380165289256
222-222@5w9-jb6-rff,2019-12-01,2020-01-01T00:00:00-07:00,109.0,46.0,5786.0,54.0,"Map(5-10 -> 12, 21-60 -> 27, 61-120 -> 11, <5 -> 1, >240 -> 23, 11-20 -> 15, 121-240 -> 20)","Map(Petco -> 1, Sonic -> 2, Walmart -> 4, Jared The Galleria of Jewelry -> 1, ULTA Beauty -> 1, TacoTime -> 3, Green Burrito -> 1, Chevron -> 1, Apollo Burgers -> 1, Ross Stores -> 1, Boys & Girls Clubs of America -> 1, Pita Pit -> 1, Taco Bell -> 2, WinCo Foods -> 3, Starbucks -> 2, Kaia FIT -> 2, Fred Meyer Jewelers -> 3, Burger King -> 2, Jacksons Food Stores -> 3, Shell Oil -> 5)","Map(Sonic -> 11, Walmart -> 59, Target -> 15, Chevron -> 30, Albertsons -> 13, WinCo Foods -> 39, The Home Depot -> 13, Starbucks -> 20, Fred Meyer Jewelers -> 22, Dutch Bros Coffee -> 15, Sinclair Oil -> 22, Costco -> 17, Walgreens -> 20, Panda Express -> 17, Burger King -> 17, Jacksons Food Stores -> 17, McDonald's -> 28, Stinker Stores -> 15, Shell Oil -> 24, Subway -> 17)","List(20, 23, 23, 23, 21, 20, 21, 25, 18, 25, 28, 16, 28, 23, 19, 21, 6, 8, 15, 24, 24, 23, 21, 20)","Map(Wednesday -> 16, Monday -> 19, Saturday -> 9, Thursday -> 25, Tuesday -> 13, Friday -> 14, Sunday -> 13)","Map(android -> 34, ios -> 10)","List(3, 3, 2, 4, 12, 6, 5, 3, 8, 4, 6, 7, 4, 0, 5, 4, 2, 3, 5, 2, 4, 1, 3, 2, 3, 1, 2, 0, 1, 1, 3)","Map(160270201001 -> 5, 160010102013 -> 4, 160010023023 -> 5, 160010024102 -> 4, 160010018003 -> 4, 160010014001 -> 4, 160010023021 -> 4, 490351034001 -> 4, 160010024112 -> 4, 160010102253 -> 4)","Map(16001002410 -> 4, 16001010332 -> 4, 41045970400 -> 4, 16001002310 -> 4, 16053970300 -> 4, 49035103400 -> 4, 16027021002 -> 4, 16001002312 -> 4, 16027021500 -> 4, 16001010225 -> 4, 16001002302 -> 9, 16001001400 -> 4)","Map(160010103353 -> 4, 160010023024 -> 4, 160270215003 -> 4, 160010103321 -> 4, 160010023023 -> 4, 160270219032 -> 4, 160010024102 -> 4)",1756.0120016578337,0.0008040542035806231,0.0020958717095775,1.3109339225560373e-05,0.0881316098707403,741.0692851033057,65.31162912191296,16001002410,2410,0.2063492063492063
222-222@5w9-jb6-rff,2019-12-01,2020-01-01T00:00:00-07:00,109.0,46.0,5786.0,54.0,"Map(5-10 -> 12, 21-60 -> 27, 61-120 -> 11, <5 -> 1, >240 -> 23, 11-20 -> 15, 121-240 -> 20)","Map(Petco -> 1, Sonic -> 2, Walmart -> 4, Jared The Galleria of Jewelry -> 1, ULTA Beauty -> 1, TacoTime -> 3, Green Burrito -> 1, Chevron -> 1, Apollo Burgers -> 1, Ross Stores -> 1, Boys & Girls Clubs of America -> 1, Pita Pit -> 1, Taco Bell -> 2, WinCo Foods -> 3, Starbucks -> 2, Kaia FIT -> 2, Fred Meyer Jewelers -> 3, Burger King -> 2, Jacksons Food Stores -> 3, Shell Oil -> 5)","Map(Sonic -> 11, Walmart -> 59, Target -> 15, Chevron -> 30, Albertsons -> 13, WinCo Foods -> 39, The Home Depot -> 13, Starbucks -> 20, Fred Meyer Jewelers -> 22, Dutch Bros Coffee -> 15, Sinclair Oil -> 22, Costco -> 17, Walgreens -> 20, Panda Express -> 17, Burger King -> 17, Jacksons Food Stores -> 17, McDonald's -> 28, Stinker Stores -> 15, Shell Oil -> 24, Subway -> 17)","List(20, 23, 23, 23, 21, 20, 21, 25, 18, 25, 28, 16, 28, 23, 19, 21, 6, 8, 15, 24, 24, 23, 21, 20)","Map(Wednesday -> 16, Monday -> 19, Saturday -> 9, Thursday -> 25, Tuesday -> 13, Friday -> 14, Sunday -> 13)","Map(android -> 34, ios -> 10)","List(3, 3, 2, 4, 12, 6, 5, 3, 8, 4, 6, 7, 4, 0, 5, 4, 2, 3, 5, 2, 4, 1, 3, 2, 3, 1, 2, 0, 1, 1, 3)","Map(160270201001 -> 5, 160010102013 -> 4, 160010023023 -> 5, 160010024102 -> 4, 160010018003 -> 4, 160010014001 -> 4, 160010023021 -> 4, 490351034001 -> 4, 160010024112 -> 4, 160010102253 -> 4)","Map(16001002410 -> 4, 16001010332 -> 4, 41045970400 -> 4, 16001002310 -> 4, 16053970300 -> 4, 49035103400 -> 4, 16027021002 -> 4, 16001002312 -> 4, 16027021500 -> 4, 16001010225 -> 4, 16001002302 -> 9, 16001001400 -> 4)","Map(160010103353 -> 4, 160010023024 -> 4, 160270215003 -> 4, 160010103321 -> 4, 160010023023 -> 4, 160270219032 -> 4, 160010024102 -> 4)",1756.0120016578337,0.0008040542035806231,0.0020958717095775,1.3109339225560373e-05,0.0881316098707403,741.0692851033057,65.31162912191296,16001002410,2410,0.3513513513513513
222-222@5w9-jb6-rff,2019-12-01,2020-01-01T00:00:00-07:00,109.0,46.0,5786.0,54.0,"Map(5-10 -> 12, 21-60 -> 27, 61-120 -> 11, <5 -> 1, >240 -> 23, 11-20 -> 15, 121-240 -> 20)","Map(Petco -> 1, Sonic -> 2, Walmart -> 4, Jared The Galleria of Jewelry -> 1, ULTA Beauty -> 1, TacoTime -> 3, Green Burrito -> 1, Chevron -> 1, Apollo Burgers -> 1, Ross Stores -> 1, Boys & Girls Clubs of America -> 1, Pita Pit -> 1, Taco Bell -> 2, WinCo Foods -> 3, Starbucks -> 2, Kaia FIT -> 2, Fred Meyer Jewelers -> 3, Burger King -> 2, Jacksons Food Stores -> 3, Shell Oil -> 5)","Map(Sonic -> 11, Walmart -> 59, Target -> 15, Chevron -> 30, Albertsons -> 13, WinCo Foods -> 39, The Home Depot -> 13, Starbucks -> 20, Fred Meyer Jewelers -> 22, Dutch Bros Coffee -> 15, Sinclair Oil -> 22, Costco -> 17, Walgreens -> 20, Panda Express -> 17, Burger King -> 17, Jacksons Food Stores -> 17, McDonald's -> 28, Stinker Stores -> 15, Shell Oil -> 24, Subway -> 17)","List(20, 23, 23, 23, 21, 20, 21, 25, 18, 25, 28, 16, 28, 23, 19, 21, 6, 8, 15, 24, 24, 23, 21, 20)","Map(Wednesday -> 16, Monday -> 19, Saturday -> 9, Thursday -> 25, Tuesday -> 13, Friday -> 14, Sunday -> 13)","Map(android -> 34, ios -> 10)","List(3, 3, 2, 4, 12, 6, 5, 3, 8, 4, 6, 7, 4, 0, 5, 4, 2, 3, 5, 2, 4, 1, 3, 2, 3, 1, 2, 0, 1, 1, 3)","Map(160270201001 -> 5, 160010102013 -> 4, 160010023023 -> 5, 160010024102 -> 4, 160010018003 -> 4, 160010014001 -> 4, 160010023021 -> 4, 490351034001 -> 4, 160010024112 -> 4, 160010102253 -> 4)","Map(16001002410 -> 4, 16001010332 -> 4, 41045970400 -> 4, 16001002310 -> 4, 16053970300 -> 4, 49035103400 -> 4, 16027021002 -> 4, 16001002312 -> 4, 16027021500 -> 4, 16001010225 -> 4, 16001002302 -> 9, 16001001400 -> 4)","Map(160010103353 -> 4, 160010023024 -> 4, 160270215003 -> 4, 160010103321 -> 4, 160010023023 -> 4, 160270219032 -> 4, 160010024102 -> 4)",1756.0120016578337,0.0008040542035806231,0.0020958717095775,1.3109339225560373e-05,0.0881316098707403,741.0692851033057,65.31162912191296,16001002410,2410,0.2765957446808511
222-222@5w9-jb6-rff,2019-12-01,2020-01-01T00:00:00-07:00,109.0,46.0,5786.0,54.0,"Map(5-10 -> 12, 21-60 -> 27, 61-120 -> 11, <5 -> 1, >240 -> 23, 11-20 -> 15, 121-240 -> 20)","Map(Petco -> 1, Sonic -> 2, Walmart -> 4, Jared The Galleria of Jewelry -> 1, ULTA Beauty -> 1, TacoTime -> 3, Green Burrito -> 1, Chevron -> 1, Apollo Burgers -> 1, Ross Stores -> 1, Boys & Girls Clubs of America -> 1, Pita Pit -> 1, Taco Bell -> 2, WinCo Foods -> 3, Starbucks -> 2, Kaia FIT -> 2, Fred Meyer Jewelers -> 3, Burger King -> 2, Jacksons Food Stores -> 3, Shell Oil -> 5)","Map(Sonic -> 11, Walmart -> 59, Target -> 15, Chevron -> 30, Albertsons -> 13, WinCo Foods -> 39, The Home Depot -> 13, Starbucks -> 20, Fred Meyer Jewelers -> 22, Dutch Bros Coffee -> 15, Sinclair Oil -> 22, Costco -> 17, Walgreens -> 20, Panda Express -> 17, Burger King -> 17, Jacksons Food Stores -> 17, McDonald's -> 28, Stinker Stores -> 15, Shell Oil -> 24, Subway -> 17)","List(20, 23, 23, 23, 21, 20, 21, 25, 18, 25, 28, 16, 28, 23, 19, 21, 6, 8, 15, 24, 24, 23, 21, 20)","Map(Wednesday -> 16, Monday -> 19, Saturday -> 9, Thursday -> 25, Tuesday -> 13, Friday -> 14, Sunday -> 13)","Map(android -> 34, ios -> 10)","List(3, 3, 2, 4, 12, 6, 5, 3, 8, 4, 6, 7, 4, 0, 5, 4, 2, 3, 5, 2, 4, 1, 3, 2, 3, 1, 2, 0, 1, 1, 3)","Map(160270201001 -> 5, 160010102013 -> 4, 160010023023 -> 5, 160010024102 -> 4, 160010018003 -> 4, 160010014001 -> 4, 160010023021 -> 4, 490351034001 -> 4, 160010024112 -> 4, 160010102253 -> 4)","Map(16001002410 -> 4, 16001010332 -> 4, 41045970400 -> 4, 16001002310 -> 4, 16053970300 -> 4, 49035103400 -> 4, 16027021002 -> 4, 16001002312 -> 4, 16027021500 -> 4, 16001010225 -> 4, 16001002302 -> 9, 16001001400 -> 4)","Map(160010103353 -> 4, 160010023024 -> 4, 160270215003 -> 4, 160010103321 -> 4, 160010023023 -> 4, 160270219032 -> 4, 160010024102 -> 4)",1756.0120016578337,0.0008040542035806231,0.0020958717095775,1.3109339225560373e-05,0.0881316098707403,741.0692851033057,65.31162912191296,16001002410,2410,0.1688311688311688


# Calculate Visitors from rPatterns

## Option 1: Calculate normalized visitors

In [None]:
def getNormalizedVisitors():
    total_visits = sf.col('normalized_visits_by_state_scaling')
    visitsWithPhones = sf.col('raw_visit_counts')
    visitsNoPhones = total_visits - visitsWithPhones
    noPhonesRatio = visitsNoPhones / total_visits

    visitorsWithPhones = sf.col('raw_visitor_counts')
    # Derived from visitorsNoPhones / (visitorsWithPhones + visitorsNoPhones) == noPhonesRatio, and solving for visitorsNoPhones
    visitorsNoPhones = (-visitorsWithPhones * noPhonesRatio) / (noPhonesRatio - 1)
    total_visitors = visitorsNoPhones + visitorsWithPhones

    return total_visitors

if False:
    rPatterns = rPatterns.withColumn('normalized_visitors_by_state_scaling', getNormalizedVisitors())


In [None]:
if False:
    rPatterns = rPatterns.withColumn('estimatedAttendance', sf.col('normalized_visitors_by_state_scaling') * sf.col('sundayAttendanceRatio'))

### Get rPlaces from rPatterns

In [None]:
# Get the tract data
if False:
    rPatterns = rPatterns.join(censustract_pkmap.select('placekey', 'tractcode', 'tract'), on='placekey', how='left')
    rPlaces = rPatterns.join(places, on='placekey', how='left')

In [None]:
if False:
    rPlacesSub = rPlaces.groupBy('tractcode').agg({
        'estimatedAttendance': 'sum',
        'normalized_visitors_by_state_scaling': 'sum',
        'sundayAttendanceRatio': 'mean',
    }).sort('tractcode')
    display(rPlacesSub)

tractcode,avg(sundayAttendanceRatio),sum(estimatedAttendance),sum(normalized_visitors_by_state_scaling)
,0.2174217965231898,39503.51948170106,158141.0920605404
16001000100.0,0.0081384403123034,1901.129897898316,277903.094423983
16001000302.0,0.0952380952380952,316.5119151134746,3323.375108691482
16001000500.0,0.2727924709195635,2747.7714754055296,8500.763405631002
16001000802.0,0.0906801007556674,222.70711364200972,2455.9645587743853
16001000803.0,0.4938118811881188,2737.3160963497767,5543.236606141903
16001000804.0,0.463345864661654,3575.833739022973,7717.41804933153
16001000900.0,0.3452409910453624,7362.925012964562,37062.02292508422
16001001100.0,0.0539906103286382,3307.911610645232,61268.27591890728
16001001202.0,0.4703222710937009,6586.009687923166,13900.861474032983


## Option 2: Calculate by visitor_home_aggregation

In [None]:
final = (
  rPatterns
    .select('sundayAttendanceRatio', 'date_range_start', sf.explode('visitor_home_aggregation'))
    .select('date_range_start', sf.col('key').alias('tractcode'), (sf.col('sundayAttendanceRatio') * sf.col('value')).alias('estimatedMembers'))
    .groupBy('tractcode', 'date_range_start').agg(sf.sum('estimatedMembers').alias('estimatedMembers'))
    .withColumn('tract', sf.substring(sf.rpad(sf.col('tractcode').cast('string'), 11, '0'), 6, 6))
    .groupBy('tract').agg(
      sf.median(sf.col('estimatedMembers')),
      sf.mean(sf.col('estimatedMembers')), 
      sf.min(sf.col('estimatedMembers')), 
      sf.max(sf.col('estimatedMembers')), 
      sf.stddev(sf.col('estimatedMembers')),
      sf.count(sf.col('estimatedMembers')),
    )
    .withColumn('team_name', sf.lit('Marquet'))
    # .withColumnRenamed('median(estimatedMembers)', 'estimatedMembers')
    # .join(tract_table.select('tractcode', 'tract'), on='tractcode', how='left')
    .sort('tract')
)
display(final)

tract,median(estimatedMembers),avg(estimatedMembers),min(estimatedMembers),max(estimatedMembers),stddev_samp(estimatedMembers),count(estimatedMembers),team_name
100,260.4711399266984,411.90871468226845,15.015079426895554,2192.656573291568,474.9399599947735,39,Marquet
101,801.3687946822783,632.1584579752734,9.425043794309047,1241.642865031498,450.887118540076,9,Marquet
102,268.8739262329275,344.27924005754346,127.5575495557855,1011.201975040024,257.0703635247495,10,Marquet
105,185.98335288048375,185.98335288048375,185.98335288048375,185.98335288048375,,1,Marquet
113,119.94700242374608,119.94700242374608,119.94700242374608,119.94700242374608,0.0,2,Marquet
132,89.74461123487566,89.74461123487566,89.74461123487566,89.74461123487566,,1,Marquet
200,488.1366757733973,1911.428240969746,1.6039915333236667,5947.719073884627,2100.850664501673,58,Marquet
201,232.5017701332225,230.5668063197096,25.52681237292137,593.4086654253143,150.92172226916497,17,Marquet
202,325.5075951815573,496.0548040868904,100.68337845950784,1608.2950773956077,442.45485105264936,26,Marquet
203,252.9777807422294,275.19792588667343,189.4257908225365,460.25848617821583,87.0632678531453,7,Marquet


In [None]:
final.describe().show()

+-------+-----------------+------------------------+---------------------+---------------------+---------------------+-----------------------------+-----------------------+---------+
|summary|            tract|median(estimatedMembers)|avg(estimatedMembers)|min(estimatedMembers)|max(estimatedMembers)|stddev_samp(estimatedMembers)|count(estimatedMembers)|team_name|
+-------+-----------------+------------------------+---------------------+---------------------+---------------------+-----------------------------+-----------------------+---------+
|  count|             1169|                    1169|                 1169|                 1169|                 1169|                          539|                   1169|     1169|
|   mean|488380.6723695466|       523.9070980588267|    574.3034971275939|   275.97017884196345|   1001.7624516022976|            512.8161507614506|      4.704875962360992|     null|
| stddev|281371.7235455154|      1859.2411393607472|   1977.8597617504026|   1353.602

In [None]:
display(tract_table.select('tractcode', 'tract'))

tractcode,tract
16041970200,970200
16041970100,970100
16073950200,950200
16073950101,950101
16073950102,950102
16069960700,960700
16069960900,960900
16069960400,960400
16069960200,960200
16069960300,960300


## Alternate Methods

In [None]:
dist = rPatterns.join(sundayRatio, on='placekey', how='left')\
    .select(
        'placekey',
        (sf.col('distance_from_home') * sf.col('sunday_ratio')).alias('normalized_dist'),
    )
# small, not mean, because of https://web.archive.org/web/20220526041151/https://docs.safegraph.com/docs/monthly-patterns#section-distance-from-home
# small, not min, because apparently the min is 0
small = dist.agg(sf.percentile_approx(dist.normalized_dist, .25)).collect()[0][0]
dist = dist.fillna(small)\
    .groupBy('placekey').agg({'normalized_dist': 'sum'})\
    .withColumnRenamed('sum(normalized_dist)', 'normalized_dist')
# dist.show()


[0;31m---------------------------------------------------------------------------[0m
[0;31mAnalysisException[0m                         Traceback (most recent call last)
File [0;32m<command-3930947574261965>:1[0m
[0;32m----> 1[0m dist [38;5;241m=[39m rPatterns[38;5;241m.[39mjoin(sundayRatio, on[38;5;241m=[39m[38;5;124m'[39m[38;5;124mplacekey[39m[38;5;124m'[39m, how[38;5;241m=[39m[38;5;124m'[39m[38;5;124mleft[39m[38;5;124m'[39m)\
[1;32m      2[0m     [38;5;241m.[39mselect(
[1;32m      3[0m         [38;5;124m'[39m[38;5;124mplacekey[39m[38;5;124m'[39m,
[1;32m      4[0m         (sf[38;5;241m.[39mcol([38;5;124m'[39m[38;5;124mdistance_from_home[39m[38;5;124m'[39m) [38;5;241m*[39m sf[38;5;241m.[39mcol([38;5;124m'[39m[38;5;124msunday_ratio[39m[38;5;124m'[39m))[38;5;241m.[39malias([38;5;124m'[39m[38;5;124mnormalized_dist[39m[38;5;124m'[39m),
[1;32m      5[0m     )
[1;32m      6[0m [38;5;66;03m# small, not mean, because of 