In [None]:
#!pip install uszipcode
#pip install pyqt5==5.12.0

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
sns.set(color_codes = True)

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)

from uszipcode import SearchEngine

import time
import datetime
import dateutil.parser
#import dateutil
#from dateutil.parser import *


In [2]:
daily = pd.read_csv("beat19-collab-data\\versions\\2020-06-01\\daily-data.csv")
enrollment = pd.read_csv("beat19-collab-data\\versions\\2020-06-01\\enrollment-data.csv")

In [3]:
print(daily.info())
print("*********************************************")
print(enrollment.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 39032 entries, 0 to 39031
Data columns (total 81 columns):
_id                                  39032 non-null object
rel_date                             39032 non-null int64
date                                 39032 non-null object
updated_at                           39032 non-null object
version                              39032 non-null int64
ble_prompt                           39032 non-null bool
ble_travel_prompt                    39032 non-null bool
ble_travel_outside                   14395 non-null float64
ble_travel_bike                      4649 non-null float64
ble_travel_bus                       4649 non-null float64
ble_travel_car                       4649 non-null float64
ble_travel_train                     4649 non-null float64
ble_travel_taxi                      4649 non-null float64
ble_travel_walk                      4649 non-null float64
ble_travel_time                      3209 non-null float64
ble_travel_

In [4]:
daily_ttl_records = len(daily)
daily_unique_ids = daily._id.nunique()

enrollment_ttl = len(enrollment)
unique_enrollment = enrollment._id.nunique()

print(f"number of records in daily df: {daily_ttl_records}" )
print(f"number of unique _ids in daily df': {daily_unique_ids}")
print()
print("*************************************************************")

print(f"number of records in enrollment df: {enrollment_ttl}" )
print(f"number of unique _ids in enrollment df': {unique_enrollment}")
print()

number of records in daily df: 39032
number of unique _ids in daily df': 2081

*************************************************************
number of records in enrollment df: 2905
number of unique _ids in enrollment df': 2905



**Average number of entries per participant:**

In [5]:
avg_time_steps = daily_ttl_records / daily_unique_ids
print(f"each unique id has an average of{avg_time_steps: .0f} records")

each unique id has an average of 19 records


**Columns with NaN values:**

In [6]:
cols = list(daily)
counter = 0
for i in cols:
    if daily[i].isna().any() ==True:  #daily[i] is a series
        counter += 1
        
print(f"there are {counter} out of 81 columns ({counter/len(daily.columns) * 100: .0f}%) in df daily with NaN values")
print(f"percent of column NaN values range from 60% to 100%")

there are 56 out of 81 columns ( 69%) in df daily with NaN values
percent of column NaN values range from 60% to 100%


**NaN percent per feature:**

In [7]:
percent_nan = daily.isna().sum() * 100 / len(daily)
missing_values = pd.DataFrame({'column_name' : daily.columns,
                              'percent_nan' : percent_nan})
missing_values

Unnamed: 0,column_name,percent_nan
_id,_id,0.0
rel_date,rel_date,0.0
date,date,0.0
updated_at,updated_at,0.0
version,version,0.0
ble_prompt,ble_prompt,0.0
ble_travel_prompt,ble_travel_prompt,0.0
ble_travel_outside,ble_travel_outside,63.120004
ble_travel_bike,ble_travel_bike,88.08926
ble_travel_bus,ble_travel_bus,88.08926


In [8]:
# impute county based off zip

search = SearchEngine(simple_zipcode=False)

counties = []
for i in enrollment.zip_postcode:
    counties.append(search.by_zipcode(i).county)

enrollment.insert(10, "county", counties )

In [10]:
# view records missing county info; can we fix?
missing_county = enrollment[enrollment['county'].isnull()]
missing_county

Unnamed: 0,_id,date,age,gender,ethnicity,race,shelter_in_place,household_size,state,zip_postcode,county,country,exposure,hcw_setting,work_setting_clinic,work_setting_hospital,work_setting_nursinghome,work_setting_seniorcare,work_setting_other,BMI,health_status,smoker,exercise,health_prompt,prescriptions,ace_inhibitor,arb,thiazide,potassium_sparing_diuretic,beta_blocker,hydroxychloroquine,other_condition_desc,has_resp_condition,has_heart_blood_condition,has_immune_condition,has_other_condition,asthma_condition,asthma_meds,copd_condition,copd_meds,emphysema_condition,emphysema_meds,lungdis_tb_condition,lungdis_tb_meds,allergies_condition,allergies_meds,hiv_aids_condition,hiv_aids_meds,transplant_condition,transplant_meds,bleeding_condition,bleeding_meds,bloodpressure_condition,bloodpressure_meds,heartdis_condition,heartdis_meds,arrhythmia_condition,arrhythmia_meds,stroke_condition,stroke_meds,alzheimers_condition,alzheimers_meds,cancer_condition,cancer_meds,kidneydis_condition,kidneydis_meds,diabetes_condition,diabetes_meds,arthritis_condition,arthritis_meds,liverdis_condition,liverdis_meds,other_condition,other_meds
75,753cb185678f539eba2e88feb6c9ca2c7cf67773,2020-04-06,21-29,Female,0.0,Asian,1.0,>3,,90237,,USA,0.0,0,0,0,0,0,0,,,never,,False,,0,0,0,0,0,0,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
123,2dffb54b596f101bea2a814f44ac3174b1f9c545,2020-03-30,21-29,Female,0.0,White,1.0,2-3,OH,45668,,USA,0.0,0,0,0,0,0,0,28.0,Well above average,never,1.0,False,,0,0,0,0,0,0,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
151,0ff5a3d6b11b9d04efde3f634fa65231c853e28d,2020-03-30,60-69,Female,0.0,White,1.0,2-3,NC,27714,,USA,0.0,0,0,0,0,0,0,26.0,Above average,never,1.0,False,,0,0,0,0,0,0,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
298,f89acb8e0c4396c9e354355de7cbcf90cee3373b,2020-03-31,30-39,Female,0.0,White,1.0,,NC,27795,,USA,0.0,0,0,0,0,0,0,22.0,Average,never,1.0,False,,0,0,0,0,0,0,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
682,16855f988555ff7e379d805e9b2e0e05dafa8e80,2020-04-06,40-49,Female,0.0,White,1.0,>3,CA,94075,,USA,0.0,0,0,0,0,0,0,26.0,Average,never,1.0,True,GABAPENTIN,0,0,0,0,0,0,"PAIN BY ANATOMIC DESCRIPTOR, CTCAE, CHRONIC PA...",0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1
1128,257d3e9798a9014391a91586573bcb8447ac3231,2020-04-14,60-69,Female,0.0,White,1.0,2-3,WA,98423,,USA,,0,0,0,0,0,0,43.0,Below average,never,1.0,True,"DICLOFENAC, VENLAFAXINE HYDROCHLORIDE, MESALAM...",1,0,0,0,0,0,"INFLAMMATORY BOWEL DISEASE, MONOCLONAL GAMMOPA...",0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0
1537,885a00e6d22267a90ea6ddf83356ffd5abc9cdd4,2020-04-23,50-59,Female,0.0,Black,1.0,2-3,CA,90892,,USA,0.0,0,0,0,0,0,0,25.0,Above average,former,1.0,False,,0,0,0,0,0,0,,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1542,43f07e1997c5ab9034d63b9b801d182249522ad6,2020-04-23,40-49,Female,0.0,White,1.0,1,IL,69614,,USA,1.0,0,0,0,0,0,0,21.0,Below average,never,0.0,True,,0,0,0,0,0,0,,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1645,a96ce2ef235a5b48bc50ba2ee678bafbc2d1268a,2020-04-27,60-69,Male,0.0,Black,0.0,2-3,IL,69646,,USA,0.0,0,0,0,0,0,0,31.0,Average,current,1.0,True,LISINOPRIL,1,0,0,0,0,0,,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0
1826,cc99debfba1022b4262a017e465c4819f0530b1e,2020-05-02,70-79,Female,0.0,White,1.0,>3,CA,34925,,USA,1.0,0,0,0,0,0,0,25.0,Above average,never,1.0,True,OTHER,0,0,0,0,0,0,OTHER,0,1,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0


**Enrollment db cannot match counties to 14 zip codes that were not entered accurately.  This translates into 149 total time steps.**

**Average number of entries per day per county:**

In [11]:
# pull in county by id into daily df

data = daily.merge(enrollment[['_id','state', 'county']] , how = 'outer', on = '_id')
print(data.shape)
data = data[data['county'].notna()]
print(data.shape)

(39856, 83)
(39707, 83)


In [13]:
# change date and updated_at to type datetime
data["date"] = pd.to_datetime(data["date"])
data["updated_at"] = pd.to_datetime(data["updated_at"])

In [14]:
(data.updated_at[0]).time()

datetime.time(21, 23, 4, 95000)

In [15]:
data.head(1)

Unnamed: 0,_id,rel_date,date,updated_at,version,ble_prompt,ble_travel_prompt,ble_travel_outside,ble_travel_bike,ble_travel_bus,ble_travel_car,ble_travel_train,ble_travel_taxi,ble_travel_walk,ble_travel_time,ble_travel_interactions,ble_sip,ble_household_sick,ble_physically_feeling,ble_stress,ble_concerned,ble_care_prompt,ble_care_clinic,ble_care_hospitalized,ble_care_telemedicine,ble_care_flu_prompt,ble_care_flu_result,ble_care_covid_prompt,ble_care_covid_result,ble_care_covid_date,ble_care_covid_rel_date,ble_care_covid_recovered,med_prompt,med_text,med_rx_prompt,med_rx_text,med_supp_prompt,med_supp_text,med_other,sym_prompt,sym_ent_prompt,sym_ent_headache_severity,sym_ent_smell_severity,sym_ent_sore_severity,sym_ent_runny_severity,sym_ent_stuffy_severity,sym_ent_itchy_severity,sym_ent_watery_severity,sym_ent_dizzy_severity,sym_ent_dizzy_frequency,sym_ent_other,sym_git_prompt,sym_git_nausea_frequency,sym_git_nausea_severity,sym_git_vomiting_frequency,sym_git_vomiting_severity,sym_git_diarrhea_frequency,sym_git_other,sym_resp_prompt,sym_resp_wheezing_severity,sym_resp_sneezing_severity,sym_resp_coughing_severity,sym_resp_wet,sym_resp_bloody,sym_resp_dyspnea_severity,sym_resp_other,sym_fever_prompt,sym_fever_temp_prompt,sym_fever_temp,sym_fever_chills_frequency,sym_fever_chills_severity,sym_fever_aches_severity,sym_fever_fatigue_severity,sym_fever_sleeping_severity,sym_fever_other,change_ace_inhibitor,change_arb,change_thiazide,change_potassium_sparing_diuretic,change_beta_blocker,change_hydroxychloroquine,state,county
0,0003cfe800bc0152c77bdd759e9afb9c5f19621a,1.0,2020-04-07 00:00:00+00:00,2020-04-07 21:23:04.095000+00:00,1.0,True,True,0.0,,,,,,,,1.0,1.0,0.0,9.0,10.0,0.0,False,,,,False,,False,,,,,False,,False,,False,,,False,False,,,,,,,,,,,False,,,,,,,False,,,,,,,,False,False,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,NC,Mecklenburg County


In [18]:
group = data.groupby(by=["county", "date"])["_id"].count()

In [20]:
group.head(20)

county  date                     
        2020-04-09 00:00:00+00:00    1
        2020-04-29 00:00:00+00:00    1
        2020-04-30 00:00:00+00:00    1
        2020-05-01 00:00:00+00:00    1
        2020-05-02 00:00:00+00:00    1
        2020-05-03 00:00:00+00:00    1
        2020-05-04 00:00:00+00:00    1
        2020-05-05 00:00:00+00:00    1
        2020-05-06 00:00:00+00:00    1
        2020-05-07 00:00:00+00:00    1
        2020-05-08 00:00:00+00:00    1
        2020-05-12 00:00:00+00:00    1
        2020-05-13 00:00:00+00:00    1
        2020-05-14 00:00:00+00:00    1
        2020-05-15 00:00:00+00:00    1
        2020-05-16 00:00:00+00:00    1
        2020-05-21 00:00:00+00:00    1
        2020-05-22 00:00:00+00:00    1
        2020-05-24 00:00:00+00:00    1
        2020-05-27 00:00:00+00:00    1
Name: _id, dtype: int64

In [29]:
data.groupby(["county", "date"]).size()[:100]

county           date                     
                 2020-04-09 00:00:00+00:00     1
                 2020-04-29 00:00:00+00:00     1
                 2020-04-30 00:00:00+00:00     1
                 2020-05-01 00:00:00+00:00     1
                 2020-05-02 00:00:00+00:00     1
                 2020-05-03 00:00:00+00:00     1
                 2020-05-04 00:00:00+00:00     1
                 2020-05-05 00:00:00+00:00     1
                 2020-05-06 00:00:00+00:00     1
                 2020-05-07 00:00:00+00:00     1
                 2020-05-08 00:00:00+00:00     1
                 2020-05-12 00:00:00+00:00     1
                 2020-05-13 00:00:00+00:00     1
                 2020-05-14 00:00:00+00:00     1
                 2020-05-15 00:00:00+00:00     1
                 2020-05-16 00:00:00+00:00     1
                 2020-05-21 00:00:00+00:00     1
                 2020-05-22 00:00:00+00:00     1
                 2020-05-24 00:00:00+00:00     1
                 2020-05-2

In [28]:
data.groupby(["county", "date"]).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,_id,rel_date,updated_at,version,ble_prompt,ble_travel_prompt,ble_travel_outside,ble_travel_bike,ble_travel_bus,ble_travel_car,ble_travel_train,ble_travel_taxi,ble_travel_walk,ble_travel_time,ble_travel_interactions,ble_sip,ble_household_sick,ble_physically_feeling,ble_stress,ble_concerned,ble_care_prompt,ble_care_clinic,ble_care_hospitalized,ble_care_telemedicine,ble_care_flu_prompt,ble_care_flu_result,ble_care_covid_prompt,ble_care_covid_result,ble_care_covid_date,ble_care_covid_rel_date,ble_care_covid_recovered,med_prompt,med_text,med_rx_prompt,med_rx_text,med_supp_prompt,med_supp_text,med_other,sym_prompt,sym_ent_prompt,sym_ent_headache_severity,sym_ent_smell_severity,sym_ent_sore_severity,sym_ent_runny_severity,sym_ent_stuffy_severity,sym_ent_itchy_severity,sym_ent_watery_severity,sym_ent_dizzy_severity,sym_ent_dizzy_frequency,sym_ent_other,sym_git_prompt,sym_git_nausea_frequency,sym_git_nausea_severity,sym_git_vomiting_frequency,sym_git_vomiting_severity,sym_git_diarrhea_frequency,sym_git_other,sym_resp_prompt,sym_resp_wheezing_severity,sym_resp_sneezing_severity,sym_resp_coughing_severity,sym_resp_wet,sym_resp_bloody,sym_resp_dyspnea_severity,sym_resp_other,sym_fever_prompt,sym_fever_temp_prompt,sym_fever_temp,sym_fever_chills_frequency,sym_fever_chills_severity,sym_fever_aches_severity,sym_fever_fatigue_severity,sym_fever_sleeping_severity,sym_fever_other,change_ace_inhibitor,change_arb,change_thiazide,change_potassium_sparing_diuretic,change_beta_blocker,change_hydroxychloroquine,state
county,date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1
,2020-04-09 00:00:00+00:00,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,0,0,0,1,0,1,0,0,0,0,1,0,1,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,1,1,1,1,1,1
,2020-04-29 00:00:00+00:00,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,1,0,1,0,0,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1
,2020-04-30 00:00:00+00:00,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,1,1,1,1,1,0,0,1,1,1,1,1,1,1,1,1,1,1,0,1,0,0,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,1,1,1,1,1,1,1
,2020-05-01 00:00:00+00:00,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,1,0,1,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,1,1,1,1,1,1
,2020-05-02 00:00:00+00:00,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,1,0,1,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1,1,1,1,1,1,0,1,1,0,0,0,0,0,0,0,1,1,1,1,1,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
York County,2020-05-27 00:00:00+00:00,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,1,0,0,0,0,1,0,1,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,1,1,1,1,1,1
York County,2020-05-28 00:00:00+00:00,3,3,3,3,3,3,2,2,2,2,2,2,2,2,2,2,2,1,2,2,3,0,0,0,3,0,3,0,0,0,0,3,1,3,1,3,1,1,3,3,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,3,0,0,0,0,0,0,0,3,3,0,0,0,0,0,0,0,3,3,3,3,3,3,3
York County,2020-05-29 00:00:00+00:00,2,2,2,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,2,0,2,0,0,0,0,2,0,2,0,2,0,0,2,2,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,2,0,0,0,0,0,0,0,2,2,0,0,0,0,0,0,0,2,2,2,2,2,2,2
York County,2020-05-30 00:00:00+00:00,3,3,3,3,3,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,3,0,3,0,0,0,0,3,0,3,0,3,0,0,3,3,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,3,0,0,0,0,0,0,0,3,3,0,0,0,0,0,0,0,3,3,3,3,3,3,3


In [None]:
# group counties by date
data = data[["date", "county"]]
list_of_0s = [0] * len(data)
data.insert(2, "counter", list_of_0s)

group = data['county'].groupby(data['date'])
#list(group)

In [None]:

data.groupby(['date', 'county']).count()


In [None]:
# group counties by date
import copy
data_1 = copy.deepcopy(data[["date", "county"]])
list_of_0s_ = [0] * len(data_1)
#data.insert(2, "counter", list_of_0s)

group = data_1['county'].groupby(data['date'])
#list(group)

In [None]:
type(data_1.groupby(['date', 'county']).sum())

In [None]:
# convert date from string to date time object

# https://github.com/sanand0/benchmarks/blob/master/date-parse/date-parse.py

#def lookup(s):
#    dates = {date:pd.to_datetime(date) for date in s.unique()}
#    return s.apply(lambda v: dates[v])
#
