In [None]:
# My jupyter notebook
# assignment: Church Buildings in Utah vs Georgia

# export to html / py before submitting to canvas

In [1]:
# imports
import polars as pl
import pyarrow as pa
import pyarrow.parquet as pq
import numpy as np
import plotly.express as px
import plotly.io as pio

In [2]:
# read parquet files
# patterns = pl.read_parquet("../data/parquet/patterns.parquet", use_pyarrow = True)
# places = pl.read_parquet("../data/parquet/places.parquet")

# using this because the regular way had issues where I was getting null data
patterns = pl.from_arrow(pq.read_table("../data/parquet/patterns.parquet"))
places = pl.from_arrow(pq.read_table("../data/parquet/places.parquet"))

In [3]:
# display patterns_parquet (original)
#patterns_parquet
list(patterns.columns)

['placekey',
 'date_range_start',
 'date_range_end',
 'raw_visit_counts',
 'raw_visitor_counts',
 'distance_from_home',
 'median_dwell',
 'bucketed_dwell_times',
 'related_same_day_brand',
 'related_same_month_brand',
 'popularity_by_hour',
 'popularity_by_day',
 'device_type',
 'visits_by_day',
 'visitor_home_cbgs',
 'visitor_home_aggregation',
 'visitor_daytime_cbgs',
 'visitor_country_of_origin',
 'normalized_visits_by_state_scaling',
 'normalized_visits_by_region_naics_visits',
 'normalized_visits_by_region_naics_visitors',
 'normalized_visits_by_total_visits']

In [13]:
# display places_parquet (original)
#places_parquet
list(places.columns)

['placekey',
 'poi_cbg',
 'parent_placekey',
 'location_name',
 'brands',
 'safegraph_brand_ids',
 'store_id',
 'top_category',
 'sub_category',
 'naics_code',
 'open_hours',
 'category_tags',
 'latitude',
 'longitude',
 'street_address',
 'city',
 'region',
 'postal_code',
 'iso_country_code',
 'opened_on',
 'closed_on',
 'tracking_closed_since',
 'websites',
 'phone_number',
 'wkt_area_sq_meters']

In [62]:
# all the different church names
churches = {}
for location in places["location_name"]:
    location = location.lower()
    if location not in churches:
        if ("latter" or "day" or "saints") in location:
            churches[location] = 1
    else:
        churches[location] += 1

myKeys = list(churches.keys())
myKeys.sort()
sorted_churches = {i: churches[i] for i in myKeys}

for key,value in sorted_churches.items():
    print(f"{key}: {value}")

lds = []
for key,value in sorted_churches.items():
    if key.find("church of jesus christ"):
        lds.append(key)

for item in lds:
    print(item)

associated latter day media artists: 2
bountiful utah north canyon stake church of jesus christ of latter d: 1
bountiful utah orchard stake church of jesus christ of latter day sain: 1
chambersburg pennsylvania stake church of jesus christ of latter d: 1
charleston south carolina stake church of jesus christ of latter d: 1
charleston west virginia stake church of jesus christ of latter d: 1
church of jesus christ of latter day: 5
church of jesus christ of latter day saint: 1
church of jesus christ of latter day saints: 2
church of jesus christ of latter day saints foundation: 1
church of jesus christ of latter dy snts the cnt'd: 2
corporation of the prophet of the body of jesus christ of latter day: 1
farmington new mexico stake church of jesus christ of latter day sain: 2
harvest of the latter rain: 1
highlands ranch colorado stake church of jesus christ of latter d: 2
indianapolis indiana north stake church of jesus christ of latter d: 2
latter day church of christ: 1
latter day mini

In [78]:
# Question 1
# What differences are there between iPhone and Android users when comparing visits to The Church of Jesus Christ buildings of Latter-day Saints in Utah and Georgia?

# filter data to only include LDS buildings in Utah and Georgia
# don't need to filter region by "UT" or "GA" since dataset is already limited to UT and GA
places_lds = places.filter(
    #pl.col("location_name") == "The Church of Jesus Christ of Latter day Saints"
    pl.col("location_name").str.to_lowercase().is_in(lds)
)
# remove unnecessary columns
places_lds = places_lds[["placekey", "poi_cbg", "region", "location_name"]]

len(places_lds)

# for item in places_lds["location_name"]:
#     if item != "The Church of Jesus Christ of Latter day Saints":
#         print(item)

places_lds.head()

placekey,poi_cbg,region,location_name
str,str,str,str
"""zzy-223@5qc-x4…","""490490102203""","""UT""","""The Church of …"
"""zzw-222@5qd-22…","""490111258013""","""UT""","""The Church of …"
"""zzw-222@5ws-h5…","""490050014023""","""UT""","""The Church of …"
"""zzy-222@5ws-mx…",,"""UT""","""The Church of …"
"""zzy-223@5qb-9z…","""490490032011""","""UT""","""The Church of …"


In [79]:
# filter out all the null device_type
patterns_device_type = patterns.filter(
    pl.all_horizontal(pl.col("device_type").is_not_null())
)

patterns_device_type.head()

placekey,date_range_start,date_range_end,raw_visit_counts,raw_visitor_counts,distance_from_home,median_dwell,bucketed_dwell_times,related_same_day_brand,related_same_month_brand,popularity_by_hour,popularity_by_day,device_type,visits_by_day,visitor_home_cbgs,visitor_home_aggregation,visitor_daytime_cbgs,visitor_country_of_origin,normalized_visits_by_state_scaling,normalized_visits_by_region_naics_visits,normalized_visits_by_region_naics_visitors,normalized_visits_by_total_visits
str,str,str,f64,f64,f64,f64,list[struct[2]],list[struct[2]],list[struct[2]],list[i32],list[struct[2]],list[struct[2]],list[i32],list[struct[2]],list[struct[2]],list[struct[2]],list[struct[2]],f64,f64,f64,f64
"""222-223@8g7-2n…","""2019-10-01T00:…","""2019-11-01T00:…",10.0,6.0,12439.0,30.0,"[{""<5"",1}, {""5-10"",1}, … {"">240"",1}]","[{""Chevrolet"",11}, {""Marco's Pizza"",11}, … {""Shell Oil"",11}]","[{""Toyota"",33}, {""Walmart"",33}, … {""CVS"",33}]","[1, 1, … 1]","[{""Monday"",3}, {""Tuesday"",1}, … {""Sunday"",2}]","[{""android"",4}, {""ios"",5}]","[1, 0, … 0]","[{""131350505461"",4}]","[{""13135050540"",4}, {""13135050546"",4}]","[{""131390015021"",4}]","[{""US"",9}]",106.203073,8e-06,2e-05,1.2499e-07
"""222-223@8g7-2n…","""2019-10-01T00:…","""2019-11-01T00:…",10.0,6.0,12439.0,30.0,"[{""<5"",1}, {""5-10"",1}, … {"">240"",1}]","[{""Chevrolet"",11}, {""Marco's Pizza"",11}, … {""Shell Oil"",11}]","[{""Toyota"",33}, {""Walmart"",33}, … {""CVS"",33}]","[1, 1, … 1]","[{""Monday"",3}, {""Tuesday"",1}, … {""Sunday"",2}]","[{""android"",4}, {""ios"",5}]","[1, 0, … 0]","[{""131350505461"",4}]","[{""13135050540"",4}, {""13135050546"",4}]","[{""131390015021"",4}]","[{""US"",9}]",106.203073,8e-06,2e-05,1.2499e-07
"""222-222@8g7-3z…","""2019-11-01T00:…","""2019-12-01T00:…",49.0,32.0,7412.0,29.0,"[{""<5"",1}, {""5-10"",14}, … {"">240"",9}]","[{""FleetPride"",2}, {""Walmart"",4}, … {""Dollar Tree"",4}]","[{""Walmart"",50}, {""Chevron"",38}, … {""Dollar Tree"",22}]","[7, 6, … 6]","[{""Monday"",7}, {""Tuesday"",2}, … {""Sunday"",8}]","[{""android"",17}, {""ios"",14}]","[1, 2, … 3]","[{""131210038001"",4}, {""131210086013"",4}, … {""131210077042"",4}]","[{""13121010209"",7}, {""13121007704"",4}, … {""13121002400"",4}]","[{""131210077042"",4}, {""131210038001"",4}, … {""130970801031"",4}]","[{""US"",27}]",600.035254,4.2e-05,0.000108,6.5526e-07
"""zzy-223@5qc-x6…","""2019-11-01T00:…","""2019-12-01T00:…",123.0,70.0,5527.0,61.0,"[{""<5"",1}, {""5-10"",23}, … {"">240"",8}]","[{""Menchie's"",1}, {""Walmart"",5}, … {""Dollar Tree"",2}]","[{""Walmart"",51}, {""Target"",27}, … {""Dollar Tree"",20}]","[2, 0, … 2]","[{""Monday"",5}, {""Tuesday"",6}, … {""Sunday"",55}]","[{""android"",44}, {""ios"",25}]","[5, 0, … 6]","[{""490490008013"",12}, {""490490008011"",7}, … {""490351125012"",4}]","[{""49049000801"",16}, {""49049000706"",8}, … {""08059011723"",4}]","[{""490490008013"",10}, {""490490009012"",5}, … {""490490004001"",4}]","[{""US"",66}]",1984.161444,0.000436,0.001085,8e-06
"""222-222@5qd-38…","""2019-11-01T00:…","""2019-12-01T00:…",111.0,22.0,3582.0,121.0,"[{""<5"",0}, {""5-10"",6}, … {"">240"",45}]","[{""Walmart"",1}, {""TacoTime"",1}, … {""Arby's"",2}]","[{""Petco"",9}, {""Walmart"",32}, … {""Arby's"",14}]","[0, 0, … 0]","[{""Monday"",16}, {""Tuesday"",23}, … {""Sunday"",0}]","[{""android"",18}, {""ios"",4}]","[7, 0, … 0]","[{""490351011012"",4}, {""490351036002"",4}, … {""490451307022"",4}]","[{""49035103100"",5}, {""49011126901"",4}, … {""49035102900"",4}]","[{""490351020002"",4}, {""490351014003"",4}]","[{""US"",20}]",1790.584718,0.000393,0.000979,7e-06


In [81]:
# combine patterns and places based on placekey
patterns_UT_GA = patterns_device_type.filter(
    pl.all_horizontal(pl.col("placekey").is_in(places_lds["placekey"]))
)

len(patterns_UT_GA)
patterns_UT_GA.head()

placekey,date_range_start,date_range_end,raw_visit_counts,raw_visitor_counts,distance_from_home,median_dwell,bucketed_dwell_times,related_same_day_brand,related_same_month_brand,popularity_by_hour,popularity_by_day,device_type,visits_by_day,visitor_home_cbgs,visitor_home_aggregation,visitor_daytime_cbgs,visitor_country_of_origin,normalized_visits_by_state_scaling,normalized_visits_by_region_naics_visits,normalized_visits_by_region_naics_visitors,normalized_visits_by_total_visits
str,str,str,f64,f64,f64,f64,list[struct[2]],list[struct[2]],list[struct[2]],list[i32],list[struct[2]],list[struct[2]],list[i32],list[struct[2]],list[struct[2]],list[struct[2]],list[struct[2]],f64,f64,f64,f64
"""zzy-223@5qc-x6…","""2019-11-01T00:…","""2019-12-01T00:…",123.0,70.0,5527.0,61.0,"[{""<5"",1}, {""5-10"",23}, … {"">240"",8}]","[{""Menchie's"",1}, {""Walmart"",5}, … {""Dollar Tree"",2}]","[{""Walmart"",51}, {""Target"",27}, … {""Dollar Tree"",20}]","[2, 0, … 2]","[{""Monday"",5}, {""Tuesday"",6}, … {""Sunday"",55}]","[{""android"",44}, {""ios"",25}]","[5, 0, … 6]","[{""490490008013"",12}, {""490490008011"",7}, … {""490351125012"",4}]","[{""49049000801"",16}, {""49049000706"",8}, … {""08059011723"",4}]","[{""490490008013"",10}, {""490490009012"",5}, … {""490490004001"",4}]","[{""US"",66}]",1984.161444,0.000436,0.001085,8e-06
"""zzw-222@5qd-38…","""2019-11-01T00:…","""2019-12-01T00:…",67.0,29.0,17080.0,97.0,"[{""<5"",0}, {""5-10"",4}, … {"">240"",26}]","[{""Audi"",2}, {""Walmart"",11}, … {""Cafe Rio"",3}]","[{""Walmart"",41}, {""Target"",21}, … {""Cafe Rio"",21}]","[3, 3, … 2]","[{""Monday"",13}, {""Tuesday"",8}, … {""Sunday"",4}]","[{""android"",23}, {""ios"",4}]","[4, 0, … 1]","[{""490111270021"",4}, {""490572105122"",4}, … {""490490022013"",4}]","[{""49011125303"",4}, {""49035112619"",4}, … {""49035114700"",4}]","[{""490351023002"",9}, {""490351135052"",6}, … {""490351020003"",4}]","[{""US"",27}]",1080.803388,0.000237,0.000591,5e-06
"""zzy-222@5qd-38…","""2019-12-01T00:…","""2020-01-01T00:…",91.0,43.0,493.0,88.0,"[{""<5"",0}, {""5-10"",3}, … {"">240"",2}]","[{""Walmart"",4}, {""Vasa Fitness"",2}, … {""Sam's Club"",1}]","[{""Walmart"",65}, {""Vasa Fitness"",14}, … {""Cafe Rio"",16}]","[0, 0, … 0]","[{""Monday"",0}, {""Tuesday"",4}, … {""Sunday"",58}]","[{""android"",24}, {""ios"",20}]","[7, 0, … 0]","[{""490351014002"",23}, {""490351040002"",6}, … {""320030054335"",4}]","[{""49035101400"",26}, {""49049010105"",5}, … {""49011125406"",4}]","[{""490351014002"",21}, {""490111254062"",4}, … {""490351102001"",4}]","[{""US"",42}]",1382.403621,0.000291,0.00067,6e-06
"""zzy-222@5qc-x4…","""2019-10-01T00:…","""2019-11-01T00:…",72.0,16.0,1517.0,27.0,"[{""<5"",0}, {""5-10"",17}, … {"">240"",1}]","[{""Sonic"",2}, {""Walmart"",26}, … {""Phillips 66"",5}]","[{""Walmart"",63}, {""Chevron"",50}, … {""Cafe Rio"",19}]","[0, 1, … 3]","[{""Monday"",13}, {""Tuesday"",10}, … {""Sunday"",10}]","[{""android"",5}, {""ios"",9}]","[1, 4, … 1]","[{""490490102092"",4}, {""490490102113"",4}]","[{""49049010211"",6}, {""49035110200"",4}, {""49049002000"",4}]","[{""490490102113"",6}]","[{""US"",14}]",968.85186,0.000255,0.000591,4e-06
"""zzy-222@5qc-x4…","""2019-10-01T00:…","""2019-11-01T00:…",72.0,16.0,1517.0,27.0,"[{""<5"",0}, {""5-10"",17}, … {"">240"",1}]","[{""Sonic"",2}, {""Walmart"",26}, … {""Phillips 66"",5}]","[{""Walmart"",63}, {""Chevron"",50}, … {""Cafe Rio"",19}]","[0, 1, … 3]","[{""Monday"",13}, {""Tuesday"",10}, … {""Sunday"",10}]","[{""android"",5}, {""ios"",9}]","[1, 4, … 1]","[{""490490102092"",4}, {""490490102113"",4}]","[{""49049010211"",6}, {""49035110200"",4}, {""49049002000"",4}]","[{""490490102113"",6}]","[{""US"",14}]",968.85186,0.000255,0.000591,4e-06
