# COVID-19 Project
## Digital Innovation One

In [None]:
# Import libraries
import pandas as pd
import numpy as np

from datetime import datetime
import plotly.express as px
import plotly.graph_objects as go

import re

In [None]:

# let's import project data
url = 'https://github.com/neylsoncrepalde/projeto_eda_covid/blob/master/covid_19_data.csv?raw=true'

df = pd.read_csv(url, parse_dates=['ObservationDate', 'Last Update'])

In [None]:
# Check datetypes
df.dtypes

In [None]:
# Rename colunms, whose names should be all lowercase and without special characters
def rename_columns(col_name):
  return re.sub(r'[/| ]', '', col_name).lower()

df.columns = [rename_columns(col) for col in df.columns]

## Focus on Brazil

In [None]:
# Checking only Brazilian cases
brazil = df.loc[
  (df.countryregion == 'Brazil') & 
  (df.confirmed > 0)
]

In [None]:
# Confirmed cases evolution
px.line(brazil, 'observationdate', 'confirmed', title='Confirmed cases in Brazil')

## New cases by day

In [None]:
# create new column with new cases confirmed per day
brazil['newcases'] = list(map(
  lambda x: 0 if (x==0) else brazil['confirmed'].iloc[x] - brazil['confirmed'].iloc[x-1],
  np.arange(brazil.shape[0])
))

# plot a graph with the new cases per day
px.line(brazil, x='observationdate', y='newcases', title='New cases per day')

## Deaths

In [None]:
fig = go.Figure()
fig.add_trace(
  go.Scatter(
    x=brazil.observationdate,
    y=brazil.deaths,
    name='Deaths',
    mode='lines+markers',
    line={'color': 'red'}
  )
)

# Layout
fig.update_layout(title='Deaths by COVID-19 in Brazil')

## Increase rate

In [None]:
# define the increase rate function
def increase_rate(data, variable, start_date=None, end_date=None):
  if start_date == None:
    start_date = data.observationdate.loc[data[variable] > 0].min()
  else:
    start_date = pd.to_datetime(start_date)

  if end_date == None:
    end_date = data.observationdate.iloc[-1]
  else:
    end_date = pd.to_datetime(end_date)

  past_value = data.loc[data.observationdate == start_date, variable].values[0]
  present_value = data.loc[data.observationdate == end_date, variable].values[0]

  n = (end_date - start_date).days
  rate = (present_value/past_value)**(1/n) - 1

  return rate*100

# calculate increase rate for confirmed cases in Brazil
increase_rate(brazil, 'confirmed')

In [None]:
# define the daily increase rate function
def daily_increase_rate(data, variable, start_date=None):
  if start_date == None:
    start_date = data.observationdate.loc[data[variable] > 0].min()
  else: start_date = pd.to_datetime(start_date)

  end_date = data.observationdate.max()

  n = (end_date - start_date).days

  rates = list(map(
    lambda x: (data[variable].iloc[x] - data[variable].iloc[x-1]) / data[variable].iloc[x-1],
    range(1, n+1)
  ))
  return np.array(rates)*100

# plot daily rates for confirmed cases in Brazil
daily_rates = daily_increase_rate(brazil, 'confirmed')
first_day = brazil.observationdate.loc[brazil.confirmed > 0].min()
px.line(
  x = pd.date_range(first_day, brazil.observationdate.max())[1:],
  y = daily_rates,
  title = 'Daily increase rates of confirmed cases in Brazil' 
)

## Predições

In [None]:
from statsmodels.tsa.seasonal import seasonal_decompose
import matplotlib.pyplot as plt

In [None]:

confirmed = brazil.confirmed
confirmed.index = brazil.observationdate

In [None]:
res = seasonal_decompose(confirmed)

In [None]:
fig, (ax1, ax2, ax3, ax4) = plt.subplots(4, 1, figsize=(10,8))

ax1.plot(res.observed)
ax2.plot(res.trend)
ax3.plot(res.seasonal)
ax4.plot(confirmed.index, res.resid)
ax4.axhline(0, linestyle='dashed', c='black')
plt.show()

## ARIMA Model

In [None]:
%pip install pmdarima

In [None]:
from pmdarima.arima import auto_arima

model = auto_arima(confirmed) 

In [None]:
fig = go.Figure(go.Scatter(
  x=confirmed.index,
  y=confirmed,
  name='Observed'
))

fig.add_trace(go.Scatter(
  x=confirmed.index,
  y=model.predict_in_sample(),
  name='Predicted'
))

fig.add_trace(go.Scatter(
  x=pd.date_range('2020-05-20', '2020-06-20'),
  y=model.predict(31),
  name='Forecast'
))

fig.update_layout(title='Prediction of confirmed cases in Brazil for the next 30 days')
fig.show()