In [None]:
#Import libraries
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
df = pd.read_csv("covid19.csv")

#Check sample data
df.head(10)

In [None]:
df.columns

In [None]:
df.shape

In [None]:
#Exploratory Data Analysis

df.describe()

In [None]:
sns.scatterplot(x="cases", y="deaths", 
                hue="deaths",size="deaths", data=df)

In [None]:
#Data based EDA

df = pd.read_csv("https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-states.csv")
df["date"] = pd.to_datetime(df["date"])
df = df.set_index("date")
print(f"Columns: {df.columns}")
df.index

In [None]:
#Check newly created dataframe with dates as index

df.head(10)

In [None]:
#Filter results by date range

from datetime import date, timedelta
today = date.today()
daybefore = today - timedelta(days = 2)
print(f"Today {today}")
print(f"Two days ago {daybefore}") 

In [None]:
df.loc[daybefore:today].head()

In [None]:
sns.kdeplot(df.loc[daybefore:today]["deaths"], shade=True)

In [None]:
current_df = df.loc[daybefore:today].sort_values(by=["deaths", "cases"], ascending=False)
current_df.head(10)

In [None]:
current_df.shape

In [None]:
#State based analysis

top_ten_states = list(current_df["state"].head(10).values)
top_ten_states

In [None]:
top_states_df = df[df['state'].isin(top_ten_states)].drop(columns="fips")

In [None]:
set(top_states_df.state.values)

In [None]:
plt.figure(figsize=(15,5))
ax = sns.lmplot(x="cases", y="deaths",
                    height=3.5,
                    col="state",
                    col_wrap=5,
                    hue="state", 
                    palette="Set2",
                    data=top_states_df)
ax.fig.subplots_adjust(wspace=.2)

In [None]:
#Interactive plot of top states

top_states_march_current_df = top_states_df.loc["2020-03-08":today].sort_values(by=["deaths", "cases"], ascending=True)
top_states_march_current_df.head()

In [None]:
#Sugar intake by states

cdc_2013 = pd.read_csv("education_sugar_cdc_2003.csv")
cdc_2013.to_csv("education_sugar_cdc_2003.csv", index=False)
cdc_2013.set_index("State", inplace=True)
for column in cdc_2013.columns:
  cdc_2013[column]=cdc_2013[column].str.replace(r"\(.*\)","")
  cdc_2013[column]=pd.to_numeric(cdc_2013[column])
  
cdc_2013.reset_index(inplace=True)
cdc_2013.rename(columns={"State": "state", "Employed": "employed-sugar-intake-daily"},inplace=True)
cdc_2013.head()

In [None]:
#Combine sugar and covid 19 data

cdc_employed_df = cdc_2013[["employed-sugar-intake-daily", "state"]]
sugar_covid_df = df.merge(cdc_employed_df, how="inner", on="state")
sugar_covid_df.head()