In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
df = pd.read_csv("covid_19_clean_complete.csv")
df.head()

In [None]:
df.info()

In [None]:
df.drop(columns=["Province/State"], inplace=True)
df.columns

In [None]:
df.rename({'Country/Region': 'Country', 'WHO Region':'Region'}, axis=1, inplace=True)

In [None]:
df['Date'] = pd.to_datetime(df['Date'])

In [None]:
print(f"{df['Date'].min()},{df['Date'].max()}")

In [None]:
# df["Month"] = df['Date'].dt.month
# df["Month"].value_counts()
# #US confirmed cases
# us = df[df["Country"]=="US"].groupby("Month")["Confirmed"].mean().round(2)
# us.head()
# us.plot()

In [None]:
#Top 5 confirmed cases by country
cdf = df.groupby(by="Country")[["Confirmed"]].sum()
cdf.sort_values(by="Confirmed", ascending=False, inplace=True)
cdf.head().plot()

In [None]:
#Top 10 recovered cases by country
rdf = df.groupby(by="Country")["Recovered"].sum().sort_values(ascending=False).head(10).reset_index()
sns.barplot(rdf, x="Country", y="Recovered")

In [None]:
#Total deaths by date
ddf = df.groupby("Date")["Deaths"].sum().sort_values(ascending=False).reset_index()
sns.lineplot(ddf, x="Date", y="Deaths")
plt.xticks(rotation=45)
plt.show()

In [None]:
#confirmed cases for US/inda/china:
us = df[df["Country"]=="US"].groupby("Date")[['Confirmed', 'Deaths', 'Recovered','Active']].sum()
india = df[df["Country"]=="India"].groupby("Date")[['Confirmed', 'Deaths', 'Recovered','Active']].sum()
china = df[df["Country"]=="China"].groupby("Date")[['Confirmed', 'Deaths', 'Recovered','Active']].sum()
plt.figure(figsize=(20,5))
sns.pointplot(us, x="Date", y="Confirmed", color="Red")
sns.pointplot(india, x="Date", y="Confirmed", color="Green")
sns.pointplot(china, x="Date", y="Confirmed", color="Blue")
plt.xticks(rotation=90)
plt.show()

In [None]:
#dfc = dfc.sort_values(by="Confirmed", ascending=False).head(8)
#sns.pointplot(dfc, x="Country", y="Confirmed", hue="Country")

In [None]:
#Active cases for US/inda/china:
plt.figure(figsize=(20,5))
sns.pointplot(us, x="Date", y="Active", color="Red")
sns.pointplot(india, x="Date", y="Active", color="Green")
sns.pointplot(china, x="Date", y="Active", color="Blue")
plt.xticks(rotation=90)
plt.show()

In [None]:
#Active cases by country:
adf = df.groupby("Country")["Active"].sum().reset_index()
adf

In [None]:
from plotly import express as px
pt = px.choropleth(adf, 
                   locations="Country", 
                   locationmode="country names",
                   color="Active", 
                   hover_name="Country",
                   color_continuous_scale="Inferno_r",
                   projection="equirectangular",
                   title="Global Covid19 active cases"
                  )
pt.update_layout(width=1000,height=450, margin=dict(l=0,b=0))
pt.show()

In [None]:
#model building
!pip install prophet

In [None]:
from prophet import Prophet

def predict_on_field(field):
    p_cdf = df.groupby("Date")[field].sum().reset_index()
    p_cdf.columns=["ds","y"]
    model = Prophet()
    model.fit(p_cdf)
    mdf = model.make_future_dataframe(periods=20)
    ff = model.predict(mdf)
    model.plot(ff)
    plt.show()

In [None]:
#predict on confirmed cases
predict_on_field("Confirmed")

In [None]:
#predict on active cases
predict_on_field("Active")

In [None]:
#predict on deaths cases
predict_on_field("Deaths")