## NY Times Daily Reports Data  

_**Purpose:**_  Create a single row per case on the day it was confirmed from the daily aggregated data appearing in the data source.  

_**Data Source:**_  [NY Times COVID-19 data repository](https://github.com/nytimes/covid-19-data)

#### Python Libraries

In [1]:
import pandas as pd
from pathlib import Path

#### Load csv data into dataframes

In [2]:
url = 'https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv'
df = pd.read_csv(url, dtype={'fips': str})
df

Unnamed: 0,date,county,state,fips,cases,deaths
0,2020-01-21,Snohomish,Washington,53061,1,0
1,2020-01-22,Snohomish,Washington,53061,1,0
2,2020-01-23,Snohomish,Washington,53061,1,0
3,2020-01-24,Cook,Illinois,17031,1,0
4,2020-01-24,Snohomish,Washington,53061,1,0
...,...,...,...,...,...,...
14042,2020-03-26,Natrona,Wyoming,56025,6,0
14043,2020-03-26,Park,Wyoming,56029,1,0
14044,2020-03-26,Sheridan,Wyoming,56033,4,0
14045,2020-03-26,Sweetwater,Wyoming,56037,1,0


#### Check a single date

In [3]:
date=str('20200308')
date=date[:4]+'-'+date[4:6]+'-'+date[6:8]
print ('\nDate:',date)
ky_df = df[(df.state=='Kentucky') & (df.cases > 0) & (df.date==date)].copy()
print('# of counties:', len(ky_df))
total_cases, total_deaths = ky_df[['cases','deaths']].sum(axis = 0, skipna = True)
print('total # cases:',total_cases,'\ntotal # deaths:',total_deaths,'\n')


Date: 2020-03-08
# of counties: 3
total # cases: 4 
total # deaths: 0 



#### Output only KY data to CSV

In [4]:
file_name = 'ky_data_from_nytimes.csv'
filePath = Path("data")   # the file path for data
file_out = filePath.joinpath(file_name)  # path and filename

ky_df = df[(df.state=='Kentucky') & (df.cases > 0)].copy()
ky_df.to_csv(file_out, index=False)
ky_df

Unnamed: 0,date,county,state,fips,cases,deaths
614,2020-03-06,Fayette,Kentucky,21067,1,0
699,2020-03-07,Fayette,Kentucky,21067,1,0
798,2020-03-08,Fayette,Kentucky,21067,2,0
799,2020-03-08,Harrison,Kentucky,21097,1,0
800,2020-03-08,Jefferson,Kentucky,21111,1,0
...,...,...,...,...,...,...
12958,2020-03-26,Union,Kentucky,21225,1,0
12959,2020-03-26,Unknown,Kentucky,,42,0
12960,2020-03-26,Warren,Kentucky,21227,7,0
12961,2020-03-26,Wayne,Kentucky,21231,1,0


#### Loop over each row. Create a single rows for each case per day.

In [5]:
data=[]  # initialize a list to hold daily records
last_value=0  # a variable to determine # of new cases for each day
current_county=''  # a variable to detect when the row contains a new county

#sort the dataframe
ky_df = ky_df.sort_values(by=['county','date'])

for index, row in ky_df.iterrows():
    if (row.county != current_county):  # if a new county is encountered reset the tracking variable
        current_county = row.county
        last_value=0
    increment=row.cases-last_value   # number of new rows to create for individual cases on the day
    for x in range(increment):
        data.append([row.fips,row.date,row.county,'1'])  # add row to list
    last_value=row.cases  # last total value of number of cases in county
        
ky_df_single_rows = pd.DataFrame(data,columns=['fips','date','county','cases'])
ky_df_single_rows


Unnamed: 0,fips,date,county,cases
0,21003,2020-03-22,Allen,1
1,21005,2020-03-21,Anderson,1
2,21005,2020-03-23,Anderson,1
3,21015,2020-03-26,Boone,1
4,21015,2020-03-26,Boone,1
...,...,...,...,...
247,21227,2020-03-22,Warren,1
248,21227,2020-03-23,Warren,1
249,21227,2020-03-25,Warren,1
250,21231,2020-03-25,Wayne,1


#### Write single rows to CSV

In [6]:
file_name = 'ky_data_single_row.csv'

filePath = Path("data")   # the file path for data
file_out = filePath.joinpath(file_name)  # path and filename

ky_df_single_rows.to_csv(file_out,index=False)