# Comparando diferentes fontes de dados

## WlCota dataset

In [1]:

import pandas as pd
from datetime import datetime

wlcota_df = pd.read_csv(
    "https://raw.githubusercontent.com/wcota/covid19br/master/cases-brazil-states.csv")

# Removing some columns
wlcota_columns = wlcota_df.columns.tolist()
rm_columns = ["deaths_per_100k_inhabitants", "totalCases_per_100k_inhabitants", 
              "deaths_by_totalCases", "tests_per_100k_inhabitants", "totalCasesMS"]
for col in rm_columns:
    wlcota_columns.pop(wlcota_columns.index(col))
wlcota_df = wlcota_df[wlcota_columns]

wlcota_df = wlcota_df.where(wlcota_df["state"]=="TOTAL").dropna(subset=["recovered"])
wlcota_df = wlcota_df[["date", "newDeaths", "newCases", "recovered"]]
wlcota_df.columns = ["date", "new_deaths", "new_confirmed", "recovered"]
wlcota_df['date'] = [datetime.fromisoformat(f) for f in wlcota_df['date']]

wlcota_df.head()

Unnamed: 0,date,new_deaths,new_confirmed,recovered
321,2020-03-23,9.0,358.0,8.0
349,2020-03-24,13.0,303.0,20.0
377,2020-03-25,12.0,311.0,27.0
405,2020-03-26,18.0,424.0,42.0
433,2020-03-27,16.0,486.0,42.0


## Turiga dataset

In [2]:

turiga_df = pd.read_csv("./PG_IMT/covid19-ce4dd230309946b0bb7ff75bfe786133.csv")
turiga_df = turiga_df.where(turiga_df["place_type"] == "state").dropna(subset=["state"])
turiga_df = turiga_df.groupby("date").sum()
turiga_df["date"] = turiga_df.index
turiga_df.index = range(len(turiga_df))
turiga_df = turiga_df[["date", "new_deaths", "new_confirmed"]]
turiga_df['date'] = [datetime.fromisoformat(f) for f in turiga_df['date']]

turiga_df.head()

FileNotFoundError: ignored

In [None]:
turiga_df.dropna()

Unnamed: 0,date,new_deaths,new_confirmed
0,2020-02-25,0.0,1.0
1,2020-02-26,0.0,0.0
2,2020-02-27,0.0,0.0
3,2020-02-28,0.0,1.0
4,2020-02-29,0.0,0.0
...,...,...,...
109,2020-06-13,857.0,21098.0
110,2020-06-14,634.0,16925.0
111,2020-06-15,764.0,24581.0
112,2020-06-16,1291.0,36541.0


## About-corona dataset

In [4]:

import requests

covid_api = 'https://corona-api.com/countries/'
rest_countries = 'https://restcountries.eu/rest/v2/alpha/'
country = 'BR' # Alpha-2 ISO3166

data_json =  requests.get(covid_api + country).json()
country = requests.get(covid_api + country).json()

N = country['data']['population']

from datetime import datetime

ac_df = pd.DataFrame(data_json['data']['timeline'])
ac_df = ac_df.sort_values('date').reset_index()

from datetime import datetime, timedelta
ac_df['date'] = [datetime.fromisoformat(f) for f in ac_df['date']]
ac_df = ac_df.drop_duplicates(subset='date', keep = 'last')

# Criando o vetor de tempo
first_date = ac_df['date'].iloc[0]
size_days = (ac_df['date'].iloc[-1] - ac_df['date'].iloc[0]).days
date_vec = [first_date + timedelta(days=k) for k in range(size_days)]

new_ac_df = pd.DataFrame(date_vec, columns=['date'])
new_ac_df = pd.merge(new_ac_df, ac_df, how='left', on='date')
new_ac_df = new_ac_df.drop(columns= ['index',  'updated_at'])#, 'is_in_progress'])

for col in new_ac_df.columns[1:]:
    new_ac_df[col] = new_ac_df[col].interpolate(method='polynomial', order=1)
ac_df = new_ac_df.dropna()
ac_df = ac_df[["date", "new_deaths", "new_confirmed", "recovered"]]

ac_df.head()

Unnamed: 0,date,new_deaths,new_confirmed,recovered
0,2020-01-22,0.0,0.0,0.0
1,2020-01-23,0.0,0.0,0.0
2,2020-01-24,0.0,0.029412,0.0
3,2020-01-25,0.0,0.058824,0.0
4,2020-01-26,0.0,0.088235,0.0


## Kaggle dataset

In [None]:

kaggle_df = pd.read_csv("./PG_IMT/datasets_549702_1257522_brazil_covid19_macro.csv")
kaggle_df = kaggle_df.dropna(subset=["recovered"])
kaggle_df["new_deaths"] = kaggle_df["deaths"].diff()
kaggle_df["new_confirmed"] = kaggle_df["cases"].diff()
kaggle_df = kaggle_df[["date", "new_deaths", "new_confirmed", "recovered"]].dropna()
kaggle_df['date'] = [datetime.fromisoformat(f) for f in kaggle_df['date']]

kaggle_df.head()

Unnamed: 0,date,new_deaths,new_confirmed,recovered
55,2020-04-20,113.0,1927.0,22991.0
56,2020-04-21,166.0,2498.0,24325.0
57,2020-04-22,165.0,2678.0,25318.0
58,2020-04-23,407.0,3735.0,26573.0
59,2020-04-24,357.0,3503.0,27655.0


In [6]:
import numpy as np
import plotly.graph_objects as go

fig = go.Figure()
# WL Cota data
fig.add_trace(go.Scatter(
    name="new_cases - wlcota",
    x=wlcota_df["date"],
    y=wlcota_df["new_confirmed"]))
fig.add_trace(go.Scatter(
    name="new_deaths - wlcota",
    x=wlcota_df["date"],
    y=wlcota_df["new_deaths"]))
fig.add_trace(go.Scatter(
    name="recovered - wlcota",
    x=wlcota_df["date"],
    y=wlcota_df["recovered"]))

# Turiga data
'''fig.add_trace(go.Scatter(
    name="new_cases - turiga",
    x=turiga_df["date"],
    y=turiga_df["new_confirmed"]))
fig.add_trace(go.Scatter(
    name="new_deaths - turiga",
    x=turiga_df["date"],
    y=turiga_df["new_deaths"]))'''

# About Corona data
fig.add_trace(go.Scatter(
    name="new_cases - aboutcorona",
    x=ac_df["date"],
    y=ac_df["new_confirmed"]))
fig.add_trace(go.Scatter(
    name="new_deaths - aboutcorona",
    x=ac_df["date"],
    y=ac_df["new_deaths"]))
fig.add_trace(go.Scatter(
    name="recovered - aboutcorona",
    x=ac_df["date"],
    y=ac_df["recovered"]))

# Kaggle data
'''fig.add_trace(go.Scatter(
    name="new_cases - kaggle",
    x=kaggle_df["date"],
    y=kaggle_df["new_confirmed"]))
fig.add_trace(go.Scatter(
    name="new_deaths - kaggle",
    x=kaggle_df["date"],
    y=kaggle_df["new_deaths"]))
fig.add_trace(go.Scatter(
    name="recovered - kaggle",
    x=kaggle_df["date"],
    y=kaggle_df["recovered"]))

fig.update_layout(template='plotly_dark',
                  xaxis_title='Data', yaxis_title='Indivíduos',
                  title_text="Valores iniciais de infectados ")

fig.show()



NameError: ignored

## Misturando os dados

In [None]:

df_all = pd.concat([kaggle_df, ac_df, turiga_df, wlcota_df], ignore_index=True, sort=True)
df_all = df_all.sort_values(by=["date"])
df_all = df_all.groupby("date").mean()
df_all["date"] = df_all.index
df_all.index = range(len(df_all))

df_all.head()

Unnamed: 0,new_confirmed,new_deaths,recovered,date
0,0.0,0.0,0.0,2020-01-22
1,0.0,0.0,0.0,2020-01-23
2,0.029412,0.0,0.0,2020-01-24
3,0.058824,0.0,0.0,2020-01-25
4,0.088235,0.0,0.0,2020-01-26


In [None]:

n_country = 209.5e6

# Getting the recovered
R = df_all["recovered"].values.tolist()
# Getting the infected
I = [df_all["new_confirmed"][0]]
for nc, nd, nr in zip(df_all["new_confirmed"][1:],
                      df_all["new_deaths"][1:],
                      np.diff(R)):
    I.append( I[-1] + nc - nd - nr )
I, R = np.array(I), np.array(R)
S = n_country - I - R

print("Size of suceptible: {}".format(len(S)))
print("Size of infected: {}".format(len(I)))
print("Size of recovered: {}".format(len(R)))

start_index = np.argmax(I >= 2000)
print("Epidemy starts at: {}".format(start_index))

fig = go.Figure()
fig.add_trace(go.Scatter(
    name="Recuperados",
    x=df_all["date"],
    y=R))
fig.add_trace(go.Scatter(
    name="Infectados",
    x=df_all["date"],
    y=I))

fig.update_layout(template='plotly_dark',
                  xaxis_title='Data', yaxis_title='Indivíduos',
                  title_text="Valores iniciais de infectados ")

fig.show()

Size of suceptible: 149
Size of infected: 149
Size of recovered: 149
Epidemy starts at: 62


## Obtendo um modelo com os dados misturados

In [None]:

# Getting the data window of the epidemy...
Sd, Id, Rd = S[start_index:], I[start_index:], R[start_index:]
t_ref = np.linspace(0, len(Id)-1, len(Id))

from models import *

model = ss.SIR(pop=n_country, focus=["S","I","R"])

model.fit(dict(S=Sd, I=Id, R=Rd), t_ref, 
                  search_pop=True,
                  pop_sens=[0.001, 0.1],
                  Ro_sens=[0.8, 15], 
                  D_sens=[5, 60])

initial = (Sd[0], Id[0], Rd[0])
t_pred = np.linspace(0, 120, 121)

y_pred = model.predict(initial, t_pred)


	 ├─ S(0) ─ I(0) ─ R(0) ─  [209497740.9852941, 2248.014705882353, 11.0]
	 ├─ Ro bound ─   0.8  ─  15
	 ├─ D  bound ─   5  ─  60
	 ├─ equation weights ─   [4.7788833510785726e-09, 7.742152197938622e-06, 8.551940538521265e-06]
	 ├─ Running on ─  differential_evolution SciPy Search Algorithm
	 └─ Defined at:  2.652965312572029  ─  20.019824878167153 



In [None]:
print("Parâmetros estimados: ", model.parameters)
print("Suposto Ro: ", model.parameters[0])
print("Valores de beta: {}, e r: {}".format(
    model.parameters[0]/model.parameters[1], 1/model.parameters[1]))
print("Dias contaminados: ", model.parameters[1])

Parâmetros estimados:  [2.65296531e+00 2.00198249e+01 7.07282237e-03]
Suposto Ro:  2.652965312572029
Valores de beta: 0.1325169090497515, e r: 0.049950486884156584
Dias contaminados:  20.019824878167153


In [None]:

import plotly.graph_objects as go
from plotly.subplots import make_subplots
from datetime import timedelta

date_pred = [df_all["date"][start_index] + timedelta(days=i) for i in t_pred]

figure = go.Figure()
figure.add_trace(go.Scatter(
                    x=df_all["date"], 
                    y=I,
                    name="Infectados",
                    mode='lines',
                    line_shape='spline',
                    line = dict(color='#ffca28', width=3)))
figure.add_trace(go.Scatter(
                    x=df_all["date"], 
                    y=R,
                    name="Recuperados",
                    mode="lines",
                    line_shape="spline",
                    line=dict(color='#76d275', width=3)))
figure.add_trace(go.Scatter(
                    x=date_pred,
                    y=y_pred[0],
                    name="Suscetíveis - modelo",
                    mode="lines",
                    line_shape="spline",
                    line=dict(color="#1e88e5", width=4, dash="dash")))
figure.add_trace(go.Scatter(
                    x=date_pred,
                    y=y_pred[2],
                    name="Recuperados - modelo",
                    mode="lines",
                    line_shape="spline",
                    line=dict(color="#43a047", width=4, dash="dash")))
figure.add_trace(go.Scatter(
                    x=date_pred,
                    y=y_pred[1],
                    name="Infectados - modelo",
                    mode="lines",
                    line_shape="spline",
                    line=dict(color="#f4511e", width=4, dash="dash")))
figure.update_layout(template='plotly_dark',
                  xaxis_title='Data', yaxis_title='Indivíduos',
                  title_text="Valores iniciais de infectados ")
# 'ggplot2', 'seaborn', 'simple_white', 'plotly',
# 'plotly_white', 'plotly_dark', 'presentation', 'xgridoff',
# 'ygridoff', 'gridon', 'none'
figure.show()



## Comparando com o último da núvem

In [None]:

import pandas_gbq
from google.oauth2 import service_account

PROJECT_ID    = "epidemicapp-280600"
PRED_TABLE_ID = "countries.last_predictions"

CREDENTIALS   = service_account.Credentials.from_service_account_file(
    './app/gkeys/epidemicapp-62d0d471b86f.json')
pandas_gbq.context.credentials = CREDENTIALS

query = """
    select I_pred as infected, R_pred as recovered, date as date
    from countries.last_predictions
    where country = "BR" 
"""

df = pandas_gbq.read_gbq(query, project_id=PROJECT_ID)
df = df.sort_values(by=["date"])

Downloading: 100%|██████████| 200/200 [00:01<00:00, 156.74rows/s]


In [None]:

fig = go.Figure()

fig.add_trace(go.Scatter(
    name="Infectados GCloud",
    x=df["date"],
    y=df["infected"]))
fig.add_trace(go.Scatter(
    name="Recuperados GCloud",
    x=df["date"],
    y=df["recovered"]))

fig.add_trace(go.Scatter(
    name="Infectados Dados mesclados",
    x=date_pred,
    y=y_pred[1],
    line=dict(dash="dash")))
fig.add_trace(go.Scatter(
    name="Recuperados Dados mesclados",
    x=date_pred,
    y=y_pred[2],
    line=dict(dash="dash")))

fig.update_layout(template='plotly_dark',
                  xaxis_title='Data', yaxis_title='Indivíduos',
                  title_text="Valores iniciais de infectados ")

fig.show()