In [63]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
import numpy as np

In [64]:
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [78]:
df = pd.read_csv("complete_covid_data.csv")
df.head()

Unnamed: 0.1,Unnamed: 0,Province_State,Lat,Long_,date,confirmed,Population,deaths,Incidence_Rate,Mortality_Rate
0,0,Alabama,2203.246784,-5809.578199,1/22/20,0,4903185.0,0,0.0,
1,1,Alaska,1747.177765,-4293.070291,1/22/20,0,731545.0,0,0.0,
2,2,American Samoa,-14.271,-170.132,1/22/20,0,55641.0,0,0.0,
3,3,Arizona,505.138555,-1671.948482,1/22/20,0,7278717.0,0,0.0,
4,4,Arkansas,2618.391704,-6932.54837,1/22/20,0,3017804.0,0,0.0,


In [66]:
mobility_reports_url = 'https://www.gstatic.com/covid19/mobility/Global_Mobility_Report.csv?cachebust=2a2bd4912d1fa29a'
mobility_data = pd.read_csv(mobility_reports_url)
us_mobility = mobility_data.loc[mobility_data["country_region"]=="United States"]

In [67]:
def get_state_mobility(state):
    return us_mobility[us_mobility["sub_region_1"]==state]

In [68]:
us_mobility["Averaged_mobility"] = ((0.2 * us_mobility["retail_and_recreation_percent_change_from_baseline"])+(0.4 *us_mobility["transit_stations_percent_change_from_baseline"])+(0.4 * us_mobility["workplaces_percent_change_from_baseline"]))/3



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy



In [69]:
hospital_data = pd.read_csv("covid19-NatEst.csv")
hospital_data = hospital_data[1:]

In [70]:
dates = df["date"].unique()

In [79]:
def plot_countries():
  x = dates
  ny_y = df.loc[df["Province_State"]=="New York", "Incidence_Rate"]
  fl_y = df.loc[df["Province_State"]=="Florida", "Incidence_Rate"]
  ct_y = df.loc[df["Province_State"]=="Connecticut", "Incidence_Rate"]
  ma_y = df.loc[df["Province_State"]=="Massachusetts", "Incidence_Rate"]
  tx_y = df.loc[df["Province_State"]=="Texas", "Incidence_Rate"]
  nj_y= df.loc[df["Province_State"]=="New Jersey", "Incidence_Rate"]
  us_y = df.loc[df["Province_State"]=="US", "Incidence_Rate"]
  fig=go.Figure()
  fig.add_trace(go.Scatter(x=x, y=tx_y,
                      mode='lines+markers',
                      name='Texas'))
  fig.add_trace(go.Scatter(x=x, y=fl_y,
                      mode='lines+markers',
                      name='Florida'))
  fig.add_trace(go.Scatter(x=x, y=ct_y,
                      mode='lines+markers',
                      name='Connecticut'))
  fig.add_trace(go.Scatter(x=x, y=ma_y,
                      mode='lines+markers',
                      name='Massachusetts'))
  fig.add_trace(go.Scatter(x=x, y=nj_y,
                      mode='lines+markers',
                      name='New Jersey'))
  fig.add_trace(go.Scatter(x=x, y=ny_y,
                      mode='lines+markers',
                      name='New York'))
  fig.add_trace(go.Scatter(x=x, y=us_y,
                      mode='lines+markers',
                      name='US'))
  fig.update_layout(title="Incident rate over time (confirmed cases for every 100,000 people)",
                      xaxis_title="Date",yaxis_title="Incident rate",legend=dict(x=0,y=1,traceorder="normal"))
  fig.show()


In [80]:
def plot_rates():
  ny_mortality = df.loc[df["Province_State"]=="New York", "Mortality_Rate"]
  ny_y = df.loc[df["Province_State"]=="New York", "Incidence_Rate"]
  fl_y = df.loc[df["Province_State"]=="Florida", "Incidence_Rate"]


  fig = make_subplots(
      rows=1, cols=2,
      subplot_titles=("New York", "Florida"))

  fig.add_trace(go.Scatter(x=dates, y=ny_mortality,
                      mode='lines+markers',
                      name='Mortality_Rate in New York'), row=1, col=1)
  fig.add_trace(go.Scatter(x=dates, y=ny_y,
                      mode='lines+markers',
                      name='Incident_Rate in New York'), row=1, col=1)
  #florida
  fl_mortality = df.loc[df["Province_State"]=="Florida", "Mortality_Rate"]

  fig.add_trace(go.Scatter(x=dates, y=fl_mortality,
                      mode='lines+markers',
                      name='Mortality_Rate in Florida'), row=1, col=2)
  fig.add_trace(go.Scatter(x=dates, y=fl_y,
                      mode='lines+markers',
                      name='Incident_Rate in Florida'), row=1, col=2)
  fig.update_yaxes(title_text="y-axis in logarithmic scale", type="log", row=1, col=1)
  fig.update_yaxes(title_text="y-axis in logarithmic scale", type="log", row=1, col=2)

  fig.update_layout(title="Incident & Mortality Rate New York v.s. Florida (Log scale)")
  fig.show()

In [81]:
def plot_mobility_and_incidence(state):
  var = "Averaged_mobility"
  b = get_state_mobility(state)[var].isnull()==False
  x = get_state_mobility(state).loc[b, "date"]
  y = list(get_state_mobility(state).loc[b, var].values)
  fig = make_subplots(
      rows=1, cols=2,
      subplot_titles=("Mobility Rate", "Incident Rate"))
  y2 = df.loc[df["Province_State"]==state, "Incidence_Rate"]
  y3 = get_state_mobility("New York").groupby('date').mean()["Averaged_mobility"].rolling(window=7).mean()
  fig.add_trace(go.Scatter(x=x, y=y,
                      mode='markers',
                      name=var), row=1, col=1)
  fig.add_trace(go.Scatter(x=x, y=y3, mode='lines+markers', name="Rolling mean mobility"), row=1, col=1)
  fig.add_trace(go.Scatter(x=dates, y=y2,
                      mode='lines+markers',
                      name='Incident_Rate'), row=1, col=2)
  
  fig.update_yaxes(title_text="Average mobility % change from baseline", row=1, col=1)
  fig.update_yaxes(title_text="Incident Rate", row=1, col=2)
  fig.update_layout(title="Incident rate and mobility rate over time in "+state)
  fig.show()

In [74]:
def rolling_mean_and_new_cases(state, var):
  x = dates
  y = df.loc[df["Province_State"]==state, var].diff().rolling(window=7).mean()
  fig = go.Figure()
  y2 = df.loc[df["Province_State"]==state, var].diff().fillna(0)
  fig.add_trace(go.Bar(x=x, y=y2,
                      name="New cases"))
  fig.add_trace(go.Scatter(x=x, y=y,
                      mode='lines+markers',
                      name="Rolling mean"))
  fig.update_layout(title=var +" rolling mean & daily new cases in "+state,
                  xaxis_title="Date",yaxis_title=var,legend=dict(x=0,y=1,traceorder="normal"), height=300, width=1000)
  fig.show()

In [82]:
def plot_hospital_capacity(state):
  s = state
  v = "ICUBedsOccAnyPat__N_ICUBeds_Est"
  x = hospital_data.loc[hospital_data["statename"]==s, "collectionDate"]
  y = hospital_data.loc[hospital_data["statename"]==s, v]
  y = y.astype(float).rolling(window=7).mean()
  fig = go.Figure()
  fig.add_trace(go.Scatter(x=x, y=y, mode="lines"))
  fig.update_layout(title=v+" 7 day rolling mean since April in "+s,
                  xaxis_title="Date",yaxis_title=v,legend=dict(x=0,y=1,traceorder="normal"), height=300, width=1000)
  fig.show()

In [76]:
geo_df = df.loc[df["Lat"].isnull()==False, :]
fig = px.scatter_geo(geo_df,
                     lat="Lat", lon="Long_", color="Province_State",
                     hover_name="Province_State", size="confirmed",
                     size_max=50, animation_frame="date",
                     template='plotly_dark', projection="natural earth",
                     title="COVID-19 US cases over time")
fig.update_layout(geo_scope='usa')

fig.show()

KeyboardInterrupt: 

In [83]:
plot_countries()

- We'll start off by looking at the incident rate over time in different states in US. As you can see above, the virus begins to spread rapidly in New York causing states nearby such as Connecticut and Massachussets to follow a similar pattern. States such as Florida and Texas confirm their first cases a later and take longer to experience such rapid growth in incident rate. However, we also see that New York and states nearby are substantially slowing down their incident rate, and the incident rate in Florida and Texas are sky rocketing. We see that the majority of us states follow a similar trend as Florida and Texas by looking at US incident rate.

In [84]:
plot_rates()

- In New York, the mortality rate follows incident rate. Once the incident rate shot up the mortality rate did soon after. On the other hand, in Florida, the mortality rate is already high once confirmed cases are reported, and it begins to fluctuate as the incident rate rises

In [85]:
plot_mobility_and_incidence("New York")

- As you can see, in late April/early May when the mobility is at its lowest percentage, the incidence rate begins to slow down

In [86]:
plot_mobility_and_incidence("Florida")

- You can see that in Florida the incident rate increases slowly from April-May, which is when the mobility rate is at its lowest

In [87]:
rolling_mean_and_new_cases("New York", "Incidence_Rate")
plot_hospital_capacity("New York")

- We can conclude that the occupancy of beds in the intensive care unit depends on the incident rate. As the new cases each day decreases so does hospital occupancy

In [88]:
rolling_mean_and_new_cases("Florida", "Incidence_Rate")
plot_hospital_capacity("Florida")

- In Florida we see the same idea in the opposite direction.

- Florida and New York have had very different experiences with the coronavirus. New York had a terrible outbreak in early march likely to do to its dense population, but they've recently been able to control their cases. On the other hand, Florida has only recently had a large increase in incidence rate, and they have now surpassed New York in confirmed case per 100,000 people. Despite the increase in cases, Florida has been able to keep their mortality rate very low, likely due to the fact that it is mostly younger people in Florida getting covid and they are significantly less likely to die. From the mobility data, we see that shortly after the mobility of a state drops, the incidence rate slows down. Additionally, icu bed occupancy decreases and increases with incidence rate. New York's hospital bed occupancy has been on a very steady decrease, so if they have another outbreak, they will be prepared. In contrast, Hospital bed occupancy in Florida has increased significantly since their outbreak in May. Thankfully, the mortality rate in Florida is continuing to decrease even as the incidence rate increases. Finally, it is evident that the US as a whole looks more similar to Florida in terms of their incidence rate, which suggests that our country will continue our steady increase in cases for at least a few more months.