## Download from RShiny

In [1]:
"""
Download LA County neighborhood testing data.
Download persons tested and tests performed.
Download from RShiny.
No date column, it probably reflects 2 days ago, given their comment in sidebar (community testing).
"""
import pandas as pd
import pytz

from datetime import datetime, timedelta

RSHINY_URL = "https://lacdph.shinyapps.io/covid19_surveillance_dashboard/_w_685647da/session/416ce7cb486c4435ce825ea78304934b/download/download2?w=685647da"
#session_id = "_w_d8266aad"
#session_string = "8a5e8bde7e3cd05d0d095f41122749bd"

# Parse into the sections of the URL
ONE = "https://lacdph.shinyapps.io/covid19_surveillance_dashboard/"
TWO = "/session/"
THREE = "/download/"
session_id = RSHINY_URL.split(ONE)[1].split(TWO)[0]
session_string = RSHINY_URL.split(ONE + session_id + TWO)[1].split(THREE)[0]
session_id2 = session_id[3:]

NEIGHBORHOOD_TESTING_URL = (
    f"https://lacdph.shinyapps.io/covid19_surveillance_dashboard/{session_id}/"
    f"session/{session_string}/download/download4?w={session_id2}"
)
  
TESTS_PERFORMED = (
    f"https://lacdph.shinyapps.io/covid19_surveillance_dashboard/{session_id}/"
    f"session/{session_string}/download/download6?w={session_id2}"
)

PERSONS_TESTED = (
    f"https://lacdph.shinyapps.io/covid19_surveillance_dashboard/{session_id}/"
    f"session/{session_string}/download/download5?w={session_id2}"   
)

CASES_DEATHS = (
    f"https://lacdph.shinyapps.io/covid19_surveillance_dashboard/{session_id}/"
    f"session/{session_string}/download/download2?w={session_id2}"
)


S3_FILE_PATH = "s3://public-health-dashboard/jhu_covid19/"


today = datetime.today().astimezone(pytz.timezone("US/Pacific")).date()
#month = today.strftime('%m')
#day = today.strftime("%d")

# Neighborhood Testing data
def download_from_rshiny():
    df = pd.read_csv(NEIGHBORHOOD_TESTING_URL)

    def add_date_columns(df):
        df = (df.assign(
                download_date = today,
                date = today - timedelta(days=2)
            ).rename(columns = {"geo_merge": "neighborhood"})
        )

        df = (df.assign(
                download_date = pd.to_datetime(df.download_date),
                date = pd.to_datetime(df.date)
            ).drop(columns = "Unnamed: 0")
        )
        return df
    
    df = add_date_columns(df)
    
    NEIGHBORHOOD_TESTING = f"{S3_FILE_PATH}la-county-neighborhood-testing-appended.parquet"
    full_df = pd.read_parquet(NEIGHBORHOOD_TESTING)
    df = (full_df.append(df)
          .drop_duplicates()
          .sort_values(["neighborhood", "date"])
          .reset_index(drop=True)
         )

    df.to_parquet(NEIGHBORHOOD_TESTING)
    
    tests_df = pd.read_csv(TESTS_PERFORMED)
    tests_df.to_csv(f"{S3_FILE_PATH}county-tests-performed-rshiny.csv")

    persons_df = pd.read_csv(PERSONS_TESTED)
    persons_df.to_csv(f"{S3_FILE_PATH}county-persons-tested-rshiny.csv")
    
    cases_deaths_df = pd.read_csv(CASES_DEATHS)
    cases_deaths_df = add_date_columns(cases_deaths_df)    
    cases_deaths_df.to_csv(f"{S3_FILE_PATH}la-county-neighborhood-rshiny.csv")

    
download_from_rshiny()