In [111]:
import numpy as np
import pandas as pd
import seaborn as sns
import plotly
import matplotlib as plt
import plotly.graph_objects as go
import plotly.express as px

In [112]:
covid = pd.read_csv("us-counties.csv")

In [113]:
covid.head()

Unnamed: 0,date,county,state,fips,cases,deaths
0,2020-01-21,Snohomish,Washington,53061.0,1,0
1,2020-01-22,Snohomish,Washington,53061.0,1,0
2,2020-01-23,Snohomish,Washington,53061.0,1,0
3,2020-01-24,Cook,Illinois,17031.0,1,0
4,2020-01-24,Snohomish,Washington,53061.0,1,0


In [114]:
covid.shape

(405287, 6)

In [115]:
covid["date"].isnull().count()

405287

In [116]:
del covid["fips"]

In [117]:
covid.head()

Unnamed: 0,date,county,state,cases,deaths
0,2020-01-21,Snohomish,Washington,1,0
1,2020-01-22,Snohomish,Washington,1,0
2,2020-01-23,Snohomish,Washington,1,0
3,2020-01-24,Cook,Illinois,1,0
4,2020-01-24,Snohomish,Washington,1,0


In [118]:
covid.columns

Index(['date', 'county', 'state', 'cases', 'deaths'], dtype='object')

In [119]:
covid_states = covid['state'].value_counts().to_frame()
covid_states

Unnamed: 0,state
Texas,29636
Georgia,21529
Virginia,17193
Kentucky,15052
Missouri,14017
North Carolina,13208
Illinois,13048
Tennessee,12724
Iowa,12472
Indiana,12454


In [120]:
covid_states_lst = sorted(list(covid_states.index))
print(covid_states_lst)

['Alabama', 'Alaska', 'Arizona', 'Arkansas', 'California', 'Colorado', 'Connecticut', 'Delaware', 'District of Columbia', 'Florida', 'Georgia', 'Guam', 'Hawaii', 'Idaho', 'Illinois', 'Indiana', 'Iowa', 'Kansas', 'Kentucky', 'Louisiana', 'Maine', 'Maryland', 'Massachusetts', 'Michigan', 'Minnesota', 'Mississippi', 'Missouri', 'Montana', 'Nebraska', 'Nevada', 'New Hampshire', 'New Jersey', 'New Mexico', 'New York', 'North Carolina', 'North Dakota', 'Northern Mariana Islands', 'Ohio', 'Oklahoma', 'Oregon', 'Pennsylvania', 'Puerto Rico', 'Rhode Island', 'South Carolina', 'South Dakota', 'Tennessee', 'Texas', 'Utah', 'Vermont', 'Virgin Islands', 'Virginia', 'Washington', 'West Virginia', 'Wisconsin', 'Wyoming']


In [121]:
covid = covid.sort_values(by="state")
covid

Unnamed: 0,date,county,state,cases,deaths
246410,2020-06-17,Marion,Alabama,153,12
325159,2020-07-12,Perry,Alabama,251,2
325158,2020-07-12,Morgan,Alabama,1352,5
325157,2020-07-12,Montgomery,Alabama,4595,114
325156,2020-07-12,Monroe,Alabama,264,4
...,...,...,...,...,...
151196,2020-05-16,Fremont,Wyoming,220,4
151197,2020-05-16,Goshen,Wyoming,5,0
151198,2020-05-16,Hot Springs,Wyoming,7,0
151200,2020-05-16,Laramie,Wyoming,181,1


In [122]:
covid.dtypes

date      object
county    object
state     object
cases      int64
deaths     int64
dtype: object

In [123]:
covid["state"] = covid.state.astype(str)

In [124]:
covid.dtypes

date      object
county    object
state     object
cases      int64
deaths     int64
dtype: object

# Creating a new dataframe with three columns: state, total_cases, total_deaths

In [125]:
# I will call this dataframe scd
scd = pd.DataFrame(columns=["state","total_cases", "total_deaths"])
scd

Unnamed: 0,state,total_cases,total_deaths


In [126]:
scd["state"] = pd.Series(covid_states_lst)
scd["total_cases"] = pd.Series(covid_states_lst)
scd["total_deaths"] = pd.Series(covid_states_lst)
scd.head()

Unnamed: 0,state,total_cases,total_deaths
0,Alabama,Alabama,Alabama
1,Alaska,Alaska,Alaska
2,Arizona,Arizona,Arizona
3,Arkansas,Arkansas,Arkansas
4,California,California,California


In [127]:
scd.shape

(55, 3)

In [133]:
for state in covid_states_lst:
    total_case_count = covid.loc[covid["state"] == state, "cases"].sum()
    total_death_count = covid.loc[covid["state"] == state, "deaths"].sum()
    scd["total_cases"] = scd["total_cases"].replace(state, total_case_count)
    scd["total_deaths"] = scd["total_deaths"].replace(state, total_death_count)
    

In [134]:
covid.loc[covid["state"] == "New York", "cases"]

210893      51
210933     706
210934      51
210935      12
210936      61
          ... 
134945    9599
134944     107
134947      75
134948      67
134946     130
Name: cases, Length: 8224, dtype: int64

In [136]:
scd

Unnamed: 0,state,total_cases,total_deaths
0,Alabama,3683761,88881
1,Alaska,132147,1324
2,Arizona,6852781,161358
3,Arkansas,1805470,23299
4,California,22315360,566805
5,Colorado,3286242,159231
6,Connecticut,4688352,414384
7,Delaware,1096866,42797
8,District of Columbia,987876,50087
9,Florida,16946110,354652


In [137]:
fig = go.Figure(data=[
    go.Bar(name='Cases', x=covid_states_lst, y=scd["total_cases"]),
    go.Bar(name='Deaths', x=covid_states_lst, y=scd["total_deaths"])
]
)
fig.update_layout(title="COVID-19 Cases in the United States", barmode='group')
fig.show()

In [132]:
total_cases = scd["total_cases"].sum()
total_deaths = scd["total_deaths"].sum()
raw_data = {"Total Cases": [total_cases], "Total Deaths": [total_deaths]}
covid_totals = pd.DataFrame(raw_data, columns = ["Total Cases", "Total Deaths"] )
covid_totals

TypeError: can only concatenate str (not "int") to str

In [None]:
fig = px.pie(covid_totals, values = covid_totals.loc(1), names = covid_totals.columns, title = "Cases & Deaths - COVID-19, USA")