# CORONA DATASET

## CONFIRMED -- DATA REQUEST

In [577]:
import requests

#CURRENT LIFE UPDATES 
CONFIRMED_URL = 'https://api.covid19api.com/live/country/us/status/confirmed/date/2020-04-01T00:00:00Z'

#Total cases per province by the day. 
URL_LIVE = 'https://api.covid19api.com/live/country/us/status/confirmed'

#Total aggregated confirmed by the day.
URL_TOTAL = 'https://api.covid19api.com/total/country/us/status/confirmed'


response = requests.get(CONFIRMED_URL)
data = response.json()

## VIEW DATA 

#### FIRST 5 ROWS

In [578]:
import pandas as pd
df = pd.DataFrame(data=data)
df.head()

Unnamed: 0,Country,Province,Lat,Lon,Date,Cases,Status
0,US,South Dakota,44.2998,-99.4388,2020-04-01T00:52:49Z,108,confirmed
1,US,Colorado,39.0598,-105.311,2020-04-01T00:52:49Z,2966,confirmed
2,US,New Jersey,40.2989,-74.521,2020-04-01T00:52:49Z,18696,confirmed
3,US,Guam,13.4443,144.794,2020-04-01T00:52:49Z,69,confirmed
4,US,Washington,47.4009,-121.49,2020-04-01T00:52:49Z,5449,confirmed


#### LAST 5 ROWS

In [579]:
df.tail()

Unnamed: 0,Country,Province,Lat,Lon,Date,Cases,Status
1039,US,Wisconsin,44.2685,-89.6165,2020-04-01T21:58:49Z,1556,confirmed
1040,US,Kansas,38.5266,-96.7265,2020-04-01T21:58:49Z,485,confirmed
1041,US,Recovered,0.0,0.0,2020-04-01T21:58:49Z,0,confirmed
1042,US,Connecticut,41.5978,-72.7554,2020-04-01T21:58:49Z,3557,confirmed
1043,US,Virgin Islands,18.3358,-64.8963,2020-04-01T21:58:49Z,30,confirmed


### CLEAN DATA 

#### REMOVE ANY ROWS WITH `NAN` VALUES 

In [580]:
df = df.dropna(how='any')
df = df[df.Province != 'Recovered']
df

Unnamed: 0,Country,Province,Lat,Lon,Date,Cases,Status
0,US,South Dakota,44.2998,-99.4388,2020-04-01T00:52:49Z,108,confirmed
1,US,Colorado,39.0598,-105.3110,2020-04-01T00:52:49Z,2966,confirmed
2,US,New Jersey,40.2989,-74.5210,2020-04-01T00:52:49Z,18696,confirmed
3,US,Guam,13.4443,144.7940,2020-04-01T00:52:49Z,69,confirmed
4,US,Washington,47.4009,-121.4900,2020-04-01T00:52:49Z,5449,confirmed
...,...,...,...,...,...,...,...
1038,US,Texas,31.0545,-97.5635,2020-04-01T21:58:49Z,4355,confirmed
1039,US,Wisconsin,44.2685,-89.6165,2020-04-01T21:58:49Z,1556,confirmed
1040,US,Kansas,38.5266,-96.7265,2020-04-01T21:58:49Z,485,confirmed
1042,US,Connecticut,41.5978,-72.7554,2020-04-01T21:58:49Z,3557,confirmed


####  CONVERT TIME TO UTC? 

In [581]:
from datetime import datetime

time_format = "%Y-%m-%dT%H:%M:%SZ"
confirmed_date = [datetime.strptime(t, time_format) for t in df['Date']]

df['Date'] = pd.to_datetime(confirmed_date, errors='coerce').tz_localize('UTC').tz_convert('US/Pacific')
df = df.dropna(subset=['Date'])
df

Unnamed: 0,Country,Province,Lat,Lon,Date,Cases,Status
0,US,South Dakota,44.2998,-99.4388,2020-03-31 17:52:49-07:00,108,confirmed
1,US,Colorado,39.0598,-105.3110,2020-03-31 17:52:49-07:00,2966,confirmed
2,US,New Jersey,40.2989,-74.5210,2020-03-31 17:52:49-07:00,18696,confirmed
3,US,Guam,13.4443,144.7940,2020-03-31 17:52:49-07:00,69,confirmed
4,US,Washington,47.4009,-121.4900,2020-03-31 17:52:49-07:00,5449,confirmed
...,...,...,...,...,...,...,...
1038,US,Texas,31.0545,-97.5635,2020-04-01 14:58:49-07:00,4355,confirmed
1039,US,Wisconsin,44.2685,-89.6165,2020-04-01 14:58:49-07:00,1556,confirmed
1040,US,Kansas,38.5266,-96.7265,2020-04-01 14:58:49-07:00,485,confirmed
1042,US,Connecticut,41.5978,-72.7554,2020-04-01 14:58:49-07:00,3557,confirmed


#### GET AVERAGE CASE PER STATE

In [582]:
df[['Province','Cases']].groupby('Province').mean()

Unnamed: 0_level_0,Cases
Province,Unnamed: 1_level_1
Alabama,1004.444444
Alaska,131.277778
Arizona,1329.555556
Arkansas,560.666667
California,8617.833333
Colorado,2969.222222
Connecticut,3151.833333
Delaware,321.722222
Diamond Princess,49.0
District of Columbia,518.166667


## RECOVERED -- DATA REQUEST

In [583]:
RECOVERED_URL = 'https://api.covid19api.com/live/country/us/status/recovered/date/2020-04-01T00:00:00Z'
recovered_response = requests.get(RECOVERED_URL)
recovered_data = response.json()

#### FIRST 5 ROWS

In [584]:
import pandas as pd
rf = pd.DataFrame(data=recovered_data)
rf.head()

Unnamed: 0,Country,Province,Lat,Lon,Date,Cases,Status
0,US,South Dakota,44.2998,-99.4388,2020-04-01T00:52:49Z,108,confirmed
1,US,Colorado,39.0598,-105.311,2020-04-01T00:52:49Z,2966,confirmed
2,US,New Jersey,40.2989,-74.521,2020-04-01T00:52:49Z,18696,confirmed
3,US,Guam,13.4443,144.794,2020-04-01T00:52:49Z,69,confirmed
4,US,Washington,47.4009,-121.49,2020-04-01T00:52:49Z,5449,confirmed


#### LAST 5 ROWS

In [574]:
rf.tail()

Unnamed: 0,Country,Province,Lat,Lon,Date,Cases,Status
1039,US,Wisconsin,44.2685,-89.6165,2020-04-01T21:58:49Z,1556,confirmed
1040,US,Kansas,38.5266,-96.7265,2020-04-01T21:58:49Z,485,confirmed
1041,US,Recovered,0.0,0.0,2020-04-01T21:58:49Z,0,confirmed
1042,US,Connecticut,41.5978,-72.7554,2020-04-01T21:58:49Z,3557,confirmed
1043,US,Virgin Islands,18.3358,-64.8963,2020-04-01T21:58:49Z,30,confirmed


### CLEAN DATA

In [567]:
rf = rf.dropna(how='any')
rf = rf[rf.Province != 'Recovered']
rf

Unnamed: 0,Country,Province,Lat,Lon,Date,Cases,Status
0,US,South Dakota,44.2998,-99.4388,2020-04-01T00:52:49Z,108,confirmed
1,US,Colorado,39.0598,-105.3110,2020-04-01T00:52:49Z,2966,confirmed
2,US,New Jersey,40.2989,-74.5210,2020-04-01T00:52:49Z,18696,confirmed
3,US,Guam,13.4443,144.7940,2020-04-01T00:52:49Z,69,confirmed
4,US,Washington,47.4009,-121.4900,2020-04-01T00:52:49Z,5449,confirmed
...,...,...,...,...,...,...,...
1038,US,Texas,31.0545,-97.5635,2020-04-01T21:58:49Z,4355,confirmed
1039,US,Wisconsin,44.2685,-89.6165,2020-04-01T21:58:49Z,1556,confirmed
1040,US,Kansas,38.5266,-96.7265,2020-04-01T21:58:49Z,485,confirmed
1042,US,Connecticut,41.5978,-72.7554,2020-04-01T21:58:49Z,3557,confirmed


In [571]:
time_format = "%Y-%m-%dT%H:%M:%SZ"
recovered_date = [datetime.strptime(t, time_format) for t in rf['Date']]

rf['Date'] = pd.to_datetime(recovered_date, errors='coerce').tz_localize('UTC').tz_convert('US/Pacific')
rf = rf.dropna(subset=['Date'])
rf

TypeError: strptime() argument 1 must be str, not Timestamp

#### GET AVERAGE 

Unnamed: 0_level_0,Cases
Province,Unnamed: 1_level_1
Alabama,1004.444444
Alaska,131.277778
Arizona,1329.555556
Arkansas,560.666667
California,8617.833333
Colorado,2969.222222
Connecticut,3151.833333
Delaware,321.722222
Diamond Princess,49.0
District of Columbia,518.166667


In [None]:
## DEATH -- DATA REQUEST