In [1]:
import pandas as pd
import geopandas as gpd
import numpy as np
from shapely.geometry import Point
from datetime import datetime, date
import os
import sys
import logging  
#from keplergl import KeplerGl
logging.basicConfig(stream=sys.stdout, level=logging.INFO, format="%(asctime)s %(message)s")
logger=logging.getLogger(__name__)
logger.setLevel(level=logging.DEBUG)

base_dir = os.getcwd()
respnet_outpath = "../../../../data/cdc/resp_net/output/"
output_dir = os.path.join(base_dir, respnet_outpath)
if not os.path.exists(output_dir):
    os.makedirs(output_dir, exist_ok=True,)

states_shapefile = "../../../../data/geographic/cb_2018_us_state_20m/cb_2018_us_state_20m.shp"
states_gdf = gpd.read_file(states_shapefile)
states_gdf.head()

Unnamed: 0,STATEFP,STATENS,AFFGEOID,GEOID,STUSPS,NAME,LSAD,ALAND,AWATER,geometry
0,24,1714934,0400000US24,24,MD,Maryland,0,25151100280,6979966958,"MULTIPOLYGON (((-76.04621 38.02553, -76.00734 ..."
1,19,1779785,0400000US19,19,IA,Iowa,0,144661267977,1084180812,"POLYGON ((-96.62187 42.77926, -96.57794 42.827..."
2,10,1779781,0400000US10,10,DE,Delaware,0,5045925646,1399985648,"POLYGON ((-75.77379 39.7222, -75.75323 39.7579..."
3,39,1085497,0400000US39,39,OH,Ohio,0,105828882568,10268850702,"MULTIPOLYGON (((-82.86334 41.69369, -82.82572 ..."
4,42,1779798,0400000US42,42,PA,Pennsylvania,0,115884442321,3394589990,"POLYGON ((-80.51989 40.90666, -80.51964 40.987..."


In [None]:
#https://data.cdc.gov/Public-Health-Surveillance/Rates-of-Laboratory-Confirmed-RSV-COVID-19-and-Flu/kvib-3txy/about_data
#https://data.cdc.gov/resource/kvib-3txy/rows.csv?fourfour=x9gk-5huc&cacheBust=1714061648&date=20240714&accessType=DOWNLOAD
#https://data.cdc.gov/resource/kvib-3txy/rows.csv?fourfour=x9gk-5huc&cacheBust=1714061648&date=20240714&accessType=DOWNLOAD
cdc_url ="https://oss.resilientservice.mooo.com/resilientdata/cdc/resp_net/Rates_of_Laboratory-Confirmed_RSV__COVID-19__and_Flu_Hospitalizations_from_the_RESP-NET_Surveillance_Systems_20240714.csv"

#Reporting Area,Current MMWR Year,MMWR WEEK,Label,Current week,"Current week, flag",Previous 52 week Max,"Previous 52 weeks Max, flag",Cumulative YTD Current MMWR Year,"Cumulative YTD Current MMWR Year, flag",Cumulative YTD Previous MMWR Year,"Cumulative YTD Previous MMWR Year, flag",LOCATION1,LOCATION2,sort_order,geocode

rsv_df = pd.read_csv(
    cdc_url, delimiter=",", 
    )
networks = rsv_df['Surveillance Network'].unique().tolist()

rsv_df.head()


In [43]:
us_state_to_abbrev = {
    "Alabama": "AL",
    "Alaska": "AK",
    "Arizona": "AZ",
    "Arkansas": "AR",
    "California": "CA",
    "Colorado": "CO",
    "Connecticut": "CT",
    "Delaware": "DE",
    "Florida": "FL",
    "Georgia": "GA",
    "Hawaii": "HI",
    "Idaho": "ID",
    "Illinois": "IL",
    "Indiana": "IN",
    "Iowa": "IA",
    "Kansas": "KS",
    "Kentucky": "KY",
    "Louisiana": "LA",
    "Maine": "ME",
    "Maryland": "MD",
    "Massachusetts": "MA",
    "Michigan": "MI",
    "Minnesota": "MN",
    "Mississippi": "MS",
    "Missouri": "MO",
    "Montana": "MT",
    "Nebraska": "NE",
    "Nevada": "NV",
    "New Hampshire": "NH",
    "New Jersey": "NJ",
    "New Mexico": "NM",
    "New York": "NY",
    "North Carolina": "NC",
    "North Dakota": "ND",
    "Ohio": "OH",
    "Oklahoma": "OK",
    "Oregon": "OR",
    "Pennsylvania": "PA",
    "Rhode Island": "RI",
    "South Carolina": "SC",
    "South Dakota": "SD",
    "Tennessee": "TN",
    "Texas": "TX",
    "Utah": "UT",
    "Vermont": "VT",
    "Virginia": "VA",
    "Washington": "WA",
    "West Virginia": "WV",
    "Wisconsin": "WI",
    "Wyoming": "WY",
    "District of Columbia": "DC",
    "American Samoa": "AS",
    "Guam": "GU",
    "Northern Mariana Islands": "MP",
    "Puerto Rico": "PR",
    "United States Minor Outlying Islands": "UM",
    "U.S. Virgin Islands": "VI",
}

# invert the dictionary
abbrev_to_us_state = dict(map(reversed, us_state_to_abbrev.items()))

In [58]:
rsv_df["Week Ending Date"]=pd.to_datetime(rsv_df["Week Ending Date"], utc=True,  format="ISO8601")
rsv_df["Week Ending Date2"]=rsv_df["Week Ending Date"].dt.date
rsv_df = rsv_df[(rsv_df["Surveillance Network"] != "Combined") &  (rsv_df["Site"] != "Overall") ]
rsv_df["State"] = rsv_df["Site"].apply(lambda x: us_state_to_abbrev[x])
#rsv_df.to_parquet(f"{respnet_outpath}respnet_states.parquet", index=False)
rsv_df.to_parquet(f"{output_dir}respnet_states.parquet", index=False)
rsv_df.head()

Unnamed: 0,Surveillance Network,Season,MMWR Year,MMWR Week,Age group,Sex,Race/Ethnicity,Site,Weekly Rate,Cumulative Rate,Week Ending Date,Type,Week Ending Date2,State
4295,FluSurv-NET,2018-19,2018,40,Overall,Overall,Overall,California,0.2,0.2,2018-10-06 00:00:00+00:00,Unadjusted Rate,2018-10-06,CA
4296,FluSurv-NET,2018-19,2018,41,Overall,Overall,Overall,California,0.2,0.4,2018-10-13 00:00:00+00:00,Unadjusted Rate,2018-10-13,CA
4297,FluSurv-NET,2018-19,2018,42,Overall,Overall,Overall,California,0.2,0.5,2018-10-20 00:00:00+00:00,Unadjusted Rate,2018-10-20,CA
4298,FluSurv-NET,2018-19,2018,43,Overall,Overall,Overall,California,0.2,0.7,2018-10-27 00:00:00+00:00,Unadjusted Rate,2018-10-27,CA
4299,FluSurv-NET,2018-19,2018,44,Overall,Overall,Overall,California,0.1,0.9,2018-11-03 00:00:00+00:00,Unadjusted Rate,2018-11-03,CA


In [45]:
merged_gdf = states_gdf.merge(rsv_df, left_on='NAME', right_on='Site', how='right')
merged_gdf.head()

Unnamed: 0,STATEFP,STATENS,AFFGEOID,GEOID,STUSPS,NAME,LSAD,ALAND,AWATER,geometry,...,MMWR Week,Age group,Sex,Race/Ethnicity,Site,Weekly Rate,Cumulative Rate,Week Ending Date,Type,State
0,6,1779778,0400000US06,6,CA,California,0,403503931312,20463871877,"MULTIPOLYGON (((-118.59397 33.4672, -118.48478...",...,40,Overall,Overall,Overall,California,0.2,0.2,2018-10-06,Unadjusted Rate,CA
1,6,1779778,0400000US06,6,CA,California,0,403503931312,20463871877,"MULTIPOLYGON (((-118.59397 33.4672, -118.48478...",...,41,Overall,Overall,Overall,California,0.2,0.4,2018-10-13,Unadjusted Rate,CA
2,6,1779778,0400000US06,6,CA,California,0,403503931312,20463871877,"MULTIPOLYGON (((-118.59397 33.4672, -118.48478...",...,42,Overall,Overall,Overall,California,0.2,0.5,2018-10-20,Unadjusted Rate,CA
3,6,1779778,0400000US06,6,CA,California,0,403503931312,20463871877,"MULTIPOLYGON (((-118.59397 33.4672, -118.48478...",...,43,Overall,Overall,Overall,California,0.2,0.7,2018-10-27,Unadjusted Rate,CA
4,6,1779778,0400000US06,6,CA,California,0,403503931312,20463871877,"MULTIPOLYGON (((-118.59397 33.4672, -118.48478...",...,44,Overall,Overall,Overall,California,0.1,0.9,2018-11-03,Unadjusted Rate,CA


In [59]:
rsv_gby = rsv_df.groupby(["Surveillance Network","Season","Site","Week Ending Date"], as_index=False).max()
rsv_gby.head()

rvs_df2=rsv_gby [["Surveillance Network","Season","Week Ending Date","Weekly Rate", "Cumulative Rate", "Site", "State",]]
rvs_df2 = states_gdf.merge(rvs_df2, left_on='NAME', right_on='Site', how='right')
# 
# 
rvs_df2.to_parquet(f"{output_dir}respnet_network_groupy.parquet", index=False)
#rvs_df2.dropna().to_json(f"{output_dir}respnet_network_groupy.geojson", drop_id=False, index=False, to_wgs84=True)
rvs_df2.to_file(f"{output_dir}respnet_network_groupy.geojson", driver="GeoJSON")
#rvs_df2.head()



2024-07-15 09:13:53,897 Created 8,510 records


In [None]:
rsv_gby2 = rsv_df2.groupby(["Surveillance Network","Season","Site","Week Ending Date"], as_index=False).max()
rvs_df22=rsv_gby2 [["Surveillance Network","Season","Week Ending Date","Weekly Rate", "Cumulative Rate", "Site", "State", "geometry"]]
