In [1]:
import pandas as pd
import numpy as np

import wget
import os, datetime
import shutil

import pycountry_convert as pc

In [2]:
# global csv files
csv_confirmed = "time_series_covid19_confirmed_US.csv"
csv_deaths = "time_series_covid19_deaths_US.csv"

In [3]:
# urls of the files
urls = ['https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_US.csv', 
        'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_US.csv']

# download files
for url in urls:
    filename = wget.download(url)

In [4]:
currDir = "./COVID-19-data-US"
backupDir = "./COVID-19-data-US-" + datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')

isdir = os.path.isdir(currDir) 

if isdir:
    try:
        os.rename(currDir, backupDir)
    except OSError:
        print ("Rename of the directory %s failed" % backupDir)

try:
    os.mkdir(currDir)
except OSError:
    print ("Creation of the directory %s failed" % currDir)
else:
    print ("Successfully created the directory %s " % currDir)

Successfully created the directory ./COVID-19-data-US 


In [5]:
for file in os.listdir(os.getcwd()):
    if '.csv' in file:
        print(file)
        shutil.move(file, currDir)

time_series_covid19_confirmed_US.csv
time_series_covid19_deaths_US.csv


In [6]:
# Datasets loaded to DataFrame
df_confirmed = pd.read_csv("./COVID-19-data-US/time_series_covid19_confirmed_US.csv")
df_deaths = pd.read_csv("./COVID-19-data-US/time_series_covid19_deaths_US.csv")

In [7]:
df_confirmed.columns

Index(['UID', 'iso2', 'iso3', 'code3', 'FIPS', 'Admin2', 'Province_State',
       'Country_Region', 'Lat', 'Long_',
       ...
       '4/12/20', '4/13/20', '4/14/20', '4/15/20', '4/16/20', '4/17/20',
       '4/18/20', '4/19/20', '4/20/20', '4/21/20'],
      dtype='object', length=102)

In [8]:
ids = df_confirmed.columns[0:11]
us_dates = df_confirmed.columns[11:]

us_conf_df_long = df_confirmed.melt(id_vars=ids, value_vars=us_dates, var_name='Date', value_name='Confirmed')
us_deaths_df_long = df_deaths.melt(id_vars=ids, value_vars=us_dates, var_name='Date', value_name='Deaths')

print(us_conf_df_long.shape)
print(us_deaths_df_long.shape)

(297479, 13)
(297479, 13)


In [9]:
us_conf_df_long.head()

Unnamed: 0,UID,iso2,iso3,code3,FIPS,Admin2,Province_State,Country_Region,Lat,Long_,Combined_Key,Date,Confirmed
0,16,AS,ASM,16,60.0,,American Samoa,US,-14.271,-170.132,"American Samoa, US",1/22/20,0
1,316,GU,GUM,316,66.0,,Guam,US,13.4443,144.7937,"Guam, US",1/22/20,0
2,580,MP,MNP,580,69.0,,Northern Mariana Islands,US,15.0979,145.6739,"Northern Mariana Islands, US",1/22/20,0
3,630,PR,PRI,630,72.0,,Puerto Rico,US,18.2208,-66.5901,"Puerto Rico, US",1/22/20,0
4,850,VI,VIR,850,78.0,,Virgin Islands,US,18.3358,-64.8963,"Virgin Islands, US",1/22/20,0


In [10]:
ft_ids = us_conf_df_long.columns[:-1]
ft_ids

Index(['UID', 'iso2', 'iso3', 'code3', 'FIPS', 'Admin2', 'Province_State',
       'Country_Region', 'Lat', 'Long_', 'Combined_Key', 'Date'],
      dtype='object')

In [11]:
us_full_table = pd.concat([us_conf_df_long, us_deaths_df_long[['Deaths']]], axis=1)
us_full_table.head()

Unnamed: 0,UID,iso2,iso3,code3,FIPS,Admin2,Province_State,Country_Region,Lat,Long_,Combined_Key,Date,Confirmed,Deaths
0,16,AS,ASM,16,60.0,,American Samoa,US,-14.271,-170.132,"American Samoa, US",1/22/20,0,0
1,316,GU,GUM,316,66.0,,Guam,US,13.4443,144.7937,"Guam, US",1/22/20,0,0
2,580,MP,MNP,580,69.0,,Northern Mariana Islands,US,15.0979,145.6739,"Northern Mariana Islands, US",1/22/20,0,0
3,630,PR,PRI,630,72.0,,Puerto Rico,US,18.2208,-66.5901,"Puerto Rico, US",1/22/20,0,0
4,850,VI,VIR,850,78.0,,Virgin Islands,US,18.3358,-64.8963,"Virgin Islands, US",1/22/20,0,0


In [13]:
us_full_table.loc[us_full_table['Country_Region'] == "US", "Country_Region"] = "USA"

In [21]:
us_full_table[(us_full_table['Province_State'] == "Maryland" )
                  & (us_full_table['Admin2'] == "Montgomery")] 

Unnamed: 0,UID,iso2,iso3,code3,FIPS,Admin2,Province_State,Country_Region,Lat,Long_,Combined_Key,Date,Confirmed,Deaths
1212,84024031,US,USA,840,24031.0,Montgomery,Maryland,USA,39.136763,-77.203582,"Montgomery, Maryland, US",1/22/20,0,0
4481,84024031,US,USA,840,24031.0,Montgomery,Maryland,USA,39.136763,-77.203582,"Montgomery, Maryland, US",1/23/20,0,0
7750,84024031,US,USA,840,24031.0,Montgomery,Maryland,USA,39.136763,-77.203582,"Montgomery, Maryland, US",1/24/20,0,0
11019,84024031,US,USA,840,24031.0,Montgomery,Maryland,USA,39.136763,-77.203582,"Montgomery, Maryland, US",1/25/20,0,0
14288,84024031,US,USA,840,24031.0,Montgomery,Maryland,USA,39.136763,-77.203582,"Montgomery, Maryland, US",1/26/20,0,0
17557,84024031,US,USA,840,24031.0,Montgomery,Maryland,USA,39.136763,-77.203582,"Montgomery, Maryland, US",1/27/20,0,0
20826,84024031,US,USA,840,24031.0,Montgomery,Maryland,USA,39.136763,-77.203582,"Montgomery, Maryland, US",1/28/20,0,0
24095,84024031,US,USA,840,24031.0,Montgomery,Maryland,USA,39.136763,-77.203582,"Montgomery, Maryland, US",1/29/20,0,0
27364,84024031,US,USA,840,24031.0,Montgomery,Maryland,USA,39.136763,-77.203582,"Montgomery, Maryland, US",1/30/20,0,0
30633,84024031,US,USA,840,24031.0,Montgomery,Maryland,USA,39.136763,-77.203582,"Montgomery, Maryland, US",1/31/20,0,0


In [22]:
us_full_table.to_csv('./COVID-19-data-US/usa_county_wise.csv', index=False)