## This will be a project to show different statistics about Covid19 in the United States

In [13]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

### Let's read the data and see what is in it. 


In [3]:
covid_data = pd.read_csv("us-counties.csv")
covid_data.head()

Unnamed: 0,date,county,state,fips,cases,deaths
0,2020-01-21,Snohomish,Washington,53061.0,1,0
1,2020-01-22,Snohomish,Washington,53061.0,1,0
2,2020-01-23,Snohomish,Washington,53061.0,1,0
3,2020-01-24,Cook,Illinois,17031.0,1,0
4,2020-01-24,Snohomish,Washington,53061.0,1,0


In [14]:
covid_data.tail(10)

Unnamed: 0,date,county,state,fips,cases,deaths
173758,2020-05-24,Natrona,Wyoming,56025.0,68,0
173759,2020-05-24,Niobrara,Wyoming,56027.0,2,0
173760,2020-05-24,Park,Wyoming,56029.0,2,0
173761,2020-05-24,Platte,Wyoming,56031.0,1,0
173762,2020-05-24,Sheridan,Wyoming,56033.0,16,0
173763,2020-05-24,Sublette,Wyoming,56035.0,3,0
173764,2020-05-24,Sweetwater,Wyoming,56037.0,25,0
173765,2020-05-24,Teton,Wyoming,56039.0,100,1
173766,2020-05-24,Uinta,Wyoming,56041.0,12,0
173767,2020-05-24,Washakie,Wyoming,56043.0,20,1


### I can observe that this is a dataframe. Let's see if there are any N/A values in out data

In [6]:
covid_data.isna().sum()

date         0
county       0
state        0
fips      1913
cases        0
deaths       0
dtype: int64

### There are 1913 N/A values for FIPS. I might get rid of the rows that don't a FIPS value. 
### Let me see how many total values there are first before I get rid of them.


In [8]:
len(covid_data)

173768

In [9]:
# There are 173768 total rows in my data. If I took out 1913, that will only be...

In [10]:
1913/173768

0.011008931448828323

# ...just a hair over *1% of the data*. I can afford to take that out. Let's do that.

In [15]:
covid_data.dropna(subset=["fips"], inplace=True)
covid_data.isna().sum()

date      0
county    0
state     0
fips      0
cases     0
deaths    0
dtype: int64

### Our data no longer has N/A values!

In [18]:
## Let's take a look at what statictics can be shown of the data

In [19]:
covid_data.describe()

Unnamed: 0,fips,cases,deaths
count,171855.0,171855.0,171855.0
mean,30009.143284,262.325292,12.617532
std,15372.545134,1617.885603,88.923463
min,1001.0,1.0,0.0
25%,18065.0,4.0,0.0
50%,29043.0,16.0,0.0
75%,45033.0,72.0,2.0
max,56043.0,72010.0,3304.0


In [20]:
covid_data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 171855 entries, 0 to 173767
Data columns (total 6 columns):
 #   Column  Non-Null Count   Dtype  
---  ------  --------------   -----  
 0   date    171855 non-null  object 
 1   county  171855 non-null  object 
 2   state   171855 non-null  object 
 3   fips    171855 non-null  float64
 4   cases   171855 non-null  int64  
 5   deaths  171855 non-null  int64  
dtypes: float64(1), int64(2), object(3)
memory usage: 9.2+ MB


## I will not have a need for FIPS (a specific code for counties), so I will drop this column all together.

In [47]:
covid_data

Unnamed: 0,date,county,state,cases,deaths
0,2020-01-21,Snohomish,Washington,1,0
1,2020-01-22,Snohomish,Washington,1,0
2,2020-01-23,Snohomish,Washington,1,0
3,2020-01-24,Cook,Illinois,1,0
4,2020-01-24,Snohomish,Washington,1,0
...,...,...,...,...,...
173763,2020-05-24,Sublette,Wyoming,3,0
173764,2020-05-24,Sweetwater,Wyoming,25,0
173765,2020-05-24,Teton,Wyoming,100,1
173766,2020-05-24,Uinta,Wyoming,12,0


In [31]:
# I want to assign different columns to variables so I can use them later 

In [33]:
covid_cases = covid_data["cases"]
covid_deaths = covid_data["deaths"]
covid_state = covid_data["state"]
covid_county = covid_data["county"]
covid_date = covid_data["date"]

In [61]:
covid_data.groupby(covid_state);

In [62]:
covid_data

Unnamed: 0,date,county,state,cases,deaths
0,2020-01-21,Snohomish,Washington,1,0
1,2020-01-22,Snohomish,Washington,1,0
2,2020-01-23,Snohomish,Washington,1,0
3,2020-01-24,Cook,Illinois,1,0
4,2020-01-24,Snohomish,Washington,1,0
...,...,...,...,...,...
173763,2020-05-24,Sublette,Wyoming,3,0
173764,2020-05-24,Sweetwater,Wyoming,25,0
173765,2020-05-24,Teton,Wyoming,100,1
173766,2020-05-24,Uinta,Wyoming,12,0
