In [None]:
# This scrapes the ACHD covid-19 reporting site.  It was checked into github 
# on Randy's laptop on 4/14/20, and modified on Anne's laptop to try to include new 
# data on race 

In [None]:
import datetime, json, re, time
from selenium import webdriver
import pandas as pd
from sqlitedict import SqliteDict
from collections import defaultdict
from shapely.geometry import Polygon, MultiPolygon, shape, Point
import math
#!conda install -y selenium
#!pip install sqlitedict

In [None]:
def init():
    global driver
    try:
        driver.close()
    except:
        pass
    options = webdriver.ChromeOptions()
    options.add_argument('install-autogenerated-theme=255,0,255')
    driver = webdriver.Chrome('chromedriver', options=options)
    #driver.implicitly_wait(10)

In [None]:
def achd_get_display_selections():
    try:
        stat_type_dropdown = driver.find_element_by_css_selector("#tableau_base_widget_ParameterControl_1 > div > div.PCContent > span > div.tabComboBoxNameContainer.tab-ctrl-formatted-fixedsize")
        stat_type_dropdown.click()
    except:
        stat_type_dropdown = driver.find_element_by_id('tableau_base_widget_ParameterControl_3')
        stat_type_dropdown.click()
        ##tableau_base_widget_ParameterControl_1 > div > div.PCContent > span > div.tabComboBoxNameContainer.tab-ctrl-formatted-fixedsize
    
    try:
        display_sel_parent = driver.find_element_by_css_selector('.tableau_base_widget_ParameterControl_1_menu')
    except:
        display_sel_parent = driver.find_element_by_id('tableau_base_widget_ParameterControl_0_menu-style')

    display_sel_spans = display_sel_parent.find_elements_by_css_selector('span')

    # Create a map of type names to selection elements
    ret_map = {x.text:x for x in display_sel_spans}
    
    return ret_map

In [None]:
def achd_get_display_panes():
    ret_map = {}
    ret_map['map'] = driver.find_element_by_id('tabZoneId3')
    ret_map['age_bars'] = driver.find_element_by_css_selector('#tabZoneId111')#driver.find_element_by_css_selector("div.tvScrollContainer.tvmodeRectSelect > div.tvimagesContainer > canvas:nth-child(2)")
    ret_map['race_pie'] = driver.find_element_by_css_selector('#view5368947392126195861_4899206460912332246')
    #> div.tvScrollContainer.tvmodeRectSelect > div.tvimagesContainer > canvas:nth-child(2)')#"div.tvScrollContainer.tvmodeRectSelect > div.tvimagesContainer > canvas:nth-child(2)")
    ##view5368947392126195861_4899206460912332246 > div.tvScrollContainer.tvmodeRectSelect > div.tvimagesContainer > canvas:nth-child(2)
    ##view5368947392126195861_4899206460912332246 > div.tvScrollContainer.tvmodeRectSelect > div.tvimagesContainer > canvas:nth-child(2)
    ret_map['gender_bars'] = driver.find_element_by_css_selector('#view5368947392126195861_3289867524579464219')
    #> div.tvScrollContainer.tvmodeRectSelect > div.tvimagesContainer > canvas:nth-child(2)')
    #driver.find_element_by_css_selector("div.tvScrollContainer.tvmodeRectSelect > div.tvimagesContainer > canvas:nth-child(2)")
    #view5368947392126195861_3289867524579464219 > div.tvScrollContainer.tvmodeRectSelect > div.tvimagesContainer > canvas:nth-child(2)
    return ret_map

In [None]:
def get_age_bars_coord(elt, i):
    x = 80
    y = 15+i*15
    height = elt.size['height']
    if y>= height:
        return None
    return(Point(x, y))

In [None]:
def get_gender_bars_coord(elt, i):
    x = 40
    y = 10+i*15
    height = elt.size['height']
    if y>= height:
        return None
    return(Point(x, y))

In [None]:
def get_race_pie_coord(elt, i):
    # Set how fine grained the wedges to sample are
    t_div = 100
    # Stop after going all the way around the circle once
    if i>t_div*2:
        return None
    
    height = elt.size['height']
    width = elt.size['width']
    
    center_x = width/2
    center_y = height/2
    radius = min(center_x,center_y)*0.8
    theta = (float(i)/t_div)*math.pi
    x = radius*math.sin(theta)+center_x
    y = radius*math.cos(theta)+center_y
    return(Point(x, y))

In [None]:
def get_age_bars_info_name(info):
    if 'Age Group' in info:
        return info['Age Group']
    return None

In [None]:
def get_gender_bars_info_name(info):
    if 'Gender' in info:
        return info['Gender']
    return None

In [None]:
def get_race_pie_info_name(info):
    if 'Race' in info:
        return info['Race']
    return None

In [None]:
# Define the paths for each pane
pane_path_func_map = {
    'age_bars':get_age_bars_coord,
    'gender_bars':get_gender_bars_coord,
    'race_pie':get_race_pie_coord
}

In [None]:
# Define the paths for each pane
pane_name_func_map = {
    'age_bars':get_age_bars_info_name,
    'gender_bars':get_gender_bars_info_name,
    'race_pie':get_race_pie_info_name
}

In [None]:
def achd_scan_pane(pane_name):
    global disp_pane_map

    assert pane_name in disp_pane_map, "Missing %s in disp_pane_map"%(pane_name)
    assert pane_name in pane_path_func_map, "Missing %s in pane_path_func_map"%(pane_name)
    assert pane_name in pane_name_func_map, "Missing %s in pane_name_func_map"%(pane_name)
    
    elt = disp_pane_map[pane_name]
    path_func = pane_path_func_map[pane_name]
    name_func = pane_name_func_map[pane_name]
    i = 0
    retry = 0
    
    info_map = {}
    pixels_map = defaultdict(lambda:[])
    while True:
        try:
            chk_point = path_func(elt, i)
            if chk_point is None:
                break
            location_info = get_info(elt, chk_point.x,chk_point.y)
            if location_info:
                # Get name for this info map for this pane
                name = name_func(location_info)
                if not name:
                    print("WARNING: pane %r, no name for %r"%(pane_name,location_info))

                if not name in info_map:
                    print(location_info)
                    info_map[name] = location_info

                # Keep track of the pixels where each element was found
                pixels_map[name].append((chk_point.x,chk_point.y))
        except Exception as e:
            print("    %s: Disp retry %d: Exception %r"%(pane_name, retry,e))
            panes_ok=False
            for pane_retry in range(0,5):
                try:
                    # Wait a couple seconds for the display to stabilize
                    time.sleep(10)
                    # Get the display elements to scan
                    disp_pane_map = achd_get_display_panes()
                    panes_ok=True
                    break
                except Exception as e:
                    print("  %s: Pane retry %d: Exception %r"%(disp_name, pane_retry,e))
            if not panes_ok:
                raise
            # Get the new element pointer for the pane
            assert pane_name in disp_pane_map, "Missing %s in disp_pane_map"%(pane_name)
            elt = disp_pane_map[pane_name]
            retry+=1
        i+=1
        
    return info_map, pixels_map

In [None]:
def achd_scan_display_type(disp_name):
    global disp_sel_map
    global disp_pane_map
    
    for disp_retry in range(0,5):
        try:
            # Get the display selections map
            disp_sel_map = achd_get_display_selections()

            assert disp_name in disp_sel_map and disp_sel_map[disp_name], "Missing %s in disp_sel_map"%(disp_name)
            # Select the desired display type
            disp_sel_map[disp_name].click()


            for pane_retry in range(0,5):
                try:
                    # Wait a couple seconds for the display to stabilize
                    time.sleep(10)
                    # Get the display elements to scan
                    disp_pane_map = achd_get_display_panes()
                    break
                except Exception as e:
                    print("  %s: Pane retry %d: Exception %r"%(disp_name, pane_retry,e))
                    
            break
        except Exception as e:
            print("  %s: Disp retry %d: Exception %r"%(disp_name, disp_retry,e))
            
    info_map = {}
    pixels_map = {}
    
    # Scan the various pane types
    for pane_name in pane_name_func_map.keys():
        pane_info_map, pane_pixels_map = achd_scan_pane(pane_name)
        info_map[pane_name] = pane_info_map
        pixels_map[pane_name] = pane_pixels_map
        
    return info_map, pixels_map

In [None]:
def get_info(elt, x,y):
    actions = webdriver.common.action_chains.ActionChains(driver)
    actions.move_to_element_with_offset(elt, x, y)
    actions.perform()
    try:
        popup = driver.find_element_by_css_selector('.tab-tooltipBR')
    except:
        return None
    offset_x = popup.location['x'] - elt.location['x'] - x
    offset_y = popup.location['y'] - elt.location['y'] - y
    assert(abs(offset_x-16)<2 and abs(offset_y-16)<2)
    info = popup.find_element_by_css_selector('span').text

    # Split on newlines and ignore blank lines
    lines = [line.strip() for line in info.split('\n') if len(line.strip())]
    location_name = lines[0]
    location_info = {x[0].strip():x[1].strip() for x in [line.split(':') for line in lines[1:]]}
    location_info['name'] = location_name
    location_info['x'] = x
    location_info['y'] = y
    return location_info

In [None]:
# Keep track of results for each type of info.  Keys are display types + 'Map'
scrape_info_map = {}
scrape_pixels_map = {}

In [None]:
init()
url = 'https://tableau.alleghenycounty.us/t/PublicSite/views/COVID-19AlleghenyCounty/COVID-19?iframeSizedToWindow=true&:embed=y&:showAppBanner=false&:display_count=no&:showVizHome=no&:origin=viz_share_link'
driver.get(url)

In [None]:
for disp_type in ['All Cases', 'All Tests', 'Hospitalizations', 'Admitted to ICU', 'Deaths']:
    if  disp_type in scrape_info_map:
        print("Skipping %s"%(disp_type))
        continue
        
    print("Scraping %s"%(disp_type))
    info_map, pixels_map = achd_scan_display_type(disp_type)
    scrape_info_map[disp_type] = info_map
    scrape_pixels_map[disp_type] = pixels_map

In [None]:
# Scrape map info
locations = {}
pixels = defaultdict(lambda:set())

In [None]:
achd_get_display_panes()
map_elt = disp_pane_map['map']
map_elt_size = map_elt.size
print(f'map_elt.size: {map_elt_size}')

In [None]:
for y in range(0, map_elt.size['height'], 2):
    for x in range(0, map_elt.size['width'], 2):
        # Split on newlines and ignore blank lines
        location_info = get_info(map_elt, x,y)
        if location_info and not location_info['name'] in locations:
            print(location_info)
            locations[location_info['name']] = location_info
        if location_info:
            pixels[location_info['name']].add((x,y))

In [None]:
# Store map info in scrape_info_map and scrape_pixels_map
scrape_info_map['Map'] = locations
scrape_pixels_map['Map'] = pixels

In [None]:
achd_covid_table = SqliteDict('achd_covid.db', autocommit=True)

In [None]:
now_time = datetime.datetime.now()

In [None]:
achd_covid_table[now_time] = scrape_info_map

In [None]:
achd_covid_table['scrape_sessions'] = achd_covid_table['scrape_sessions'] + [now_time]

In [None]:
# IMPORTANT: Manually set data_date by reading it off the ACHD page; it can't be parsed
data_date = '2020-04-14'
md_elt = {'map_size':map_elt_size,'data_date':data_date}

if not 'metadata' in achd_covid_table:
    achd_covid_table['metadata'] = md_elt
else:
    m_dict = achd_covid_table['metadata']
    m_dict[now_time] = md_elt
    achd_covid_table['metadata'] = m_dict  
    

In [None]:
achd_covid_table['metadata']

In [None]:
achd_covid_table['scrape_sessions']

In [None]:
list(achd_covid_table[now_time].keys())