### Objetivo do Notebook

O objetivo deste caderno é estudar o surto de COVID-19 com a ajuda do pandas

## Importando pacotes Python 

In [1]:
from zipfile import ZipFile
import warnings
warnings.filterwarnings('ignore')
import pandas as pd
import numpy as np
import datetime as dt
from datetime import timedelta

In [2]:
zip_file = ZipFile("./CSV/novel-corona-virus-2019-dataset.zip")
arq = zip_file.open('covid_19_data.csv')
covid=pd.read_csv(zip_file.open('covid_19_data.csv'))
covid.head()

Unnamed: 0,SNo,ObservationDate,Province/State,Country/Region,Last Update,Confirmed,Deaths,Recovered
0,1,01/22/2020,Anhui,Mainland China,1/22/2020 17:00,1.0,0.0,0.0
1,2,01/22/2020,Beijing,Mainland China,1/22/2020 17:00,14.0,0.0,0.0
2,3,01/22/2020,Chongqing,Mainland China,1/22/2020 17:00,6.0,0.0,0.0
3,4,01/22/2020,Fujian,Mainland China,1/22/2020 17:00,1.0,0.0,0.0
4,5,01/22/2020,Gansu,Mainland China,1/22/2020 17:00,0.0,0.0,0.0


In [3]:
print("Tamanho / forma do conjunto de dados: ",covid.shape)
print("Verificando valores null:\n",covid.isnull().sum())
print("Verificando tipos de dados de cada coluna:\n",covid.dtypes)

Tamanho / forma do conjunto de dados:  (25582, 8)
Verificando valores null:
 SNo                    0
ObservationDate        0
Province/State     13288
Country/Region         0
Last Update            0
Confirmed              0
Deaths                 0
Recovered              0
dtype: int64
Verificando tipos de dados de cada coluna:
 SNo                  int64
ObservationDate     object
Province/State      object
Country/Region      object
Last Update         object
Confirmed          float64
Deaths             float64
Recovered          float64
dtype: object


In [4]:
#Convertendo "Data de Observação" para o formato Datetime
covid["ObservationDate"]=pd.to_datetime(covid["ObservationDate"])

In [5]:
covid.drop(["SNo"],1,inplace=True)

In [6]:
#Agrupando tipos diferentes de casos conforme a data
datewise=covid.groupby(["ObservationDate"]).agg({"Confirmed":'sum',"Recovered":'sum',"Deaths":'sum'})
datewise["Days Since"]=datewise.index-datewise.index.min()

In [7]:
datewise

Unnamed: 0_level_0,Confirmed,Recovered,Deaths,Days Since
ObservationDate,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020-01-22,555.0,28.0,17.0,0 days
2020-01-23,653.0,30.0,18.0,1 days
2020-01-24,941.0,36.0,26.0,2 days
2020-01-25,1438.0,39.0,42.0,3 days
2020-01-26,2118.0,52.0,56.0,4 days
2020-01-27,2927.0,61.0,82.0,5 days
2020-01-28,5578.0,107.0,131.0,6 days
2020-01-29,6165.0,126.0,133.0,7 days
2020-01-30,8235.0,143.0,171.0,8 days
2020-01-31,9925.0,222.0,213.0,9 days


In [8]:
print("Informação Básica")
print("Número total de países com propagação de doenças: ",len(covid["Country/Region"].unique()))
print("Número total de casos confirmados em todo o mundo: ",datewise["Confirmed"].iloc[-1])
print("Número total de casos recuperados em todo o mundo: ",datewise["Recovered"].iloc[-1])
print("Número total de casos de mortes no mundo: ",datewise["Deaths"].iloc[-1])
print("Número total de casos ativos em todo o mundo: ",(datewise["Confirmed"].iloc[-1]-datewise["Recovered"].iloc[-1]-datewise["Deaths"].iloc[-1]))
print("Número total de casos encerrados no mundo: ",datewise["Recovered"].iloc[-1]+datewise["Deaths"].iloc[-1])
print("Número aproximado de casos confirmados por dia em todo o mundo: ",np.round(datewise["Confirmed"].iloc[-1]/datewise.shape[0]))
print("Número aproximado de casos recuperados por dia em todo o mundo: ",np.round(datewise["Recovered"].iloc[-1]/datewise.shape[0]))
print("Número aproximado de casos de morte por dia em todo o mundo: ",np.round(datewise["Deaths"].iloc[-1]/datewise.shape[0]))
print("Número aproximado de casos confirmados por hora em todo o mundo: ",np.round(datewise["Confirmed"].iloc[-1]/((datewise.shape[0])*24)))
print("Número aproximado de casos recuperados por hora em todo o mundo: ",np.round(datewise["Recovered"].iloc[-1]/((datewise.shape[0])*24)))
print("Número aproximado de casos de morte por hora em todo o mundo: ",np.round(datewise["Deaths"].iloc[-1]/((datewise.shape[0])*24)))
print("Número de casos confirmados nas últimas 24 horas: ",datewise["Confirmed"].iloc[-1]-datewise["Confirmed"].iloc[-2])
print("Número de casos recuperados nas últimas 24 horas: ",datewise["Recovered"].iloc[-1]-datewise["Recovered"].iloc[-2])
print("Número de casos de morte nas últimas 24 horas: ",datewise["Deaths"].iloc[-1]-datewise["Deaths"].iloc[-2])

Informação Básica
Número total de países com propagação de doenças:  223
Número total de casos confirmados em todo o mundo:  4634068.0
Número total de casos recuperados em todo o mundo:  1693197.0
Número total de casos de mortes no mundo:  311781.0
Número total de casos ativos em todo o mundo:  2629090.0
Número total de casos encerrados no mundo:  2004978.0
Número aproximado de casos confirmados por dia em todo o mundo:  39949.0
Número aproximado de casos recuperados por dia em todo o mundo:  14597.0
Número aproximado de casos de morte por dia em todo o mundo:  2688.0
Número aproximado de casos confirmados por hora em todo o mundo:  1665.0
Número aproximado de casos recuperados por hora em todo o mundo:  608.0
Número aproximado de casos de morte por hora em todo o mundo:  112.0
Número de casos confirmados nas últimas 24 horas:  91721.0
Número de casos recuperados nas últimas 24 horas:  56130.0
Número de casos de morte nas últimas 24 horas:  4115.0


In [9]:
print("Tamanho / forma do conjunto de dados: ",covid.shape)
print("Verificando valores nulos:\n",covid.isnull().sum())
print("Verificando o tipo de dados de cada coluna:\n",covid.dtypes)

Tamanho / forma do conjunto de dados:  (25582, 7)
Verificando valores nulos:
 ObservationDate        0
Province/State     13288
Country/Region         0
Last Update            0
Confirmed              0
Deaths                 0
Recovered              0
dtype: int64
Verificando o tipo de dados de cada coluna:
 ObservationDate    datetime64[ns]
Province/State             object
Country/Region             object
Last Update                object
Confirmed                 float64
Deaths                    float64
Recovered                 float64
dtype: object


In [10]:
#Convertendo "Data de Observação" para o formato Datetime
covid["ObservationDate"]=pd.to_datetime(covid["ObservationDate"])

## Análise por data 

In [11]:
#Agrupando tipos diferentes de casos conforme a data
analisedata=covid.groupby(["ObservationDate"]).agg({"Confirmed":'sum',"Recovered":'sum',"Deaths":'sum'})
analisedata["Days Since"]=datewise.index-datewise.index.min()

In [12]:
print("Informação básica")
print("Número total de países com propagação de doenças: ",len(covid["Country/Region"].unique()))
print("Número total de casos confirmados em todo o mundo: ",analisedata["Confirmed"].iloc[-1])
print("Número total de casos recuperados em todo o mundo: ",analisedata["Recovered"].iloc[-1])
print("Número total de casos de mortes no mundo: ",analisedata["Deaths"].iloc[-1])
print("Número total de casos ativos em todo o mundo: ",(analisedata["Confirmed"].iloc[-1]-analisedata["Recovered"].iloc[-1]-analisedata["Deaths"].iloc[-1]))
print("Número total de casos encerrados no mundo: ",analisedata["Recovered"].iloc[-1]+analisedata["Deaths"].iloc[-1])
print("Número aproximado de casos confirmados por dia em todo o mundo: ",np.round(analisedata["Confirmed"].iloc[-1]/analisedata.shape[0]))
print("Número aproximado de casos recuperados por dia em todo o mundo: ",np.round(analisedata["Recovered"].iloc[-1]/analisedata.shape[0]))
print("Número aproximado de casos de morte por dia em todo o mundo: ",np.round(analisedata["Deaths"].iloc[-1]/analisedata.shape[0]))
print("Número aproximado de casos confirmados por hora em todo o mundo: ",np.round(analisedata["Confirmed"].iloc[-1]/((analisedata.shape[0])*24)))
print("Número aproximado de casos recuperados por hora em todo o mundo: ",np.round(analisedata["Recovered"].iloc[-1]/((analisedata.shape[0])*24)))
print("Número aproximado de casos de morte por hora em todo o mundo: ",np.round(analisedata["Deaths"].iloc[-1]/((analisedata.shape[0])*24)))
print("Número de casos confirmados nas últimas 24 horas: ",analisedata["Confirmed"].iloc[-1]-analisedata["Confirmed"].iloc[-2])
print("Número de casos recuperados nas últimas 24 horas: ",analisedata["Recovered"].iloc[-1]-analisedata["Recovered"].iloc[-2])
print("Número de casos de morte nas últimas 24 horas: ",analisedata["Deaths"].iloc[-1]-analisedata["Deaths"].iloc[-2])

Informação básica
Número total de países com propagação de doenças:  223
Número total de casos confirmados em todo o mundo:  4634068.0
Número total de casos recuperados em todo o mundo:  1693197.0
Número total de casos de mortes no mundo:  311781.0
Número total de casos ativos em todo o mundo:  2629090.0
Número total de casos encerrados no mundo:  2004978.0
Número aproximado de casos confirmados por dia em todo o mundo:  39949.0
Número aproximado de casos recuperados por dia em todo o mundo:  14597.0
Número aproximado de casos de morte por dia em todo o mundo:  2688.0
Número aproximado de casos confirmados por hora em todo o mundo:  1665.0
Número aproximado de casos recuperados por hora em todo o mundo:  608.0
Número aproximado de casos de morte por hora em todo o mundo:  112.0
Número de casos confirmados nas últimas 24 horas:  91721.0
Número de casos recuperados nas últimas 24 horas:  56130.0
Número de casos de morte nas últimas 24 horas:  4115.0


In [13]:
c=1000
double_days=[]
C=[]
while(1):
    double_days.append(datewise[datewise["Confirmed"]<=c].iloc[[-1]]["Days Since"][0])
    C.append(c)
    c=c*2
    if(c<datewise["Confirmed"].max()):
        continue
    else:
        break

In [14]:
doubling_rate=pd.DataFrame(list(zip(C,double_days)),columns=["No. of cases","Days since first Case"])
doubling_rate["Number of days for doubling"]=doubling_rate["Days since first Case"].diff().fillna(doubling_rate["Days since first Case"])
doubling_rate

Unnamed: 0,No. of cases,Days since first Case,Number of days for doubling
0,1000,2 days,2 days
1,2000,3 days,1 days
2,4000,5 days,2 days
3,8000,7 days,2 days
4,16000,10 days,3 days
5,32000,15 days,5 days
6,64000,22 days,7 days
7,128000,49 days,27 days
8,256000,57 days,8 days
9,512000,63 days,6 days


In [15]:
c1=100000
days_100k=[]
C1=[]
while(1):
    days_100k.append(analisedata[analisedata["Confirmed"]<=c1].iloc[[-1]]["Days Since"][0])
    C1.append(c1)
    c1=c1+100000
    if(c1<analisedata["Confirmed"].max()):
        continue
    else:
        break

In [16]:
agrupamento_pais=covid.groupby(["Country/Region","ObservationDate"]).agg({"Confirmed":'sum',"Recovered":'sum',"Deaths":'sum'})

In [17]:
agrupamento_pais

Unnamed: 0_level_0,Unnamed: 1_level_0,Confirmed,Recovered,Deaths
Country/Region,ObservationDate,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Azerbaijan,2020-02-28,1.0,0.0,0.0
"('St. Martin',)",2020-03-10,2.0,0.0,0.0
Afghanistan,2020-02-24,1.0,0.0,0.0
Afghanistan,2020-02-25,1.0,0.0,0.0
Afghanistan,2020-02-26,1.0,0.0,0.0
Afghanistan,2020-02-27,1.0,0.0,0.0
Afghanistan,2020-02-28,1.0,0.0,0.0
Afghanistan,2020-02-29,1.0,0.0,0.0
Afghanistan,2020-03-01,1.0,0.0,0.0
Afghanistan,2020-03-02,1.0,0.0,0.0


In [18]:
agrupamento_pais["Active Cases"]=agrupamento_pais["Confirmed"]-agrupamento_pais["Recovered"]-agrupamento_pais["Deaths"]
agrupamento_pais["log_confirmed"]=np.log(agrupamento_pais["Confirmed"])
agrupamento_pais["log_active"]=np.log(agrupamento_pais["Active Cases"])

In [19]:
agrupamento_pais

Unnamed: 0_level_0,Unnamed: 1_level_0,Confirmed,Recovered,Deaths,Active Cases,log_confirmed,log_active
Country/Region,ObservationDate,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Azerbaijan,2020-02-28,1.0,0.0,0.0,1.0,0.000000,0.000000
"('St. Martin',)",2020-03-10,2.0,0.0,0.0,2.0,0.693147,0.693147
Afghanistan,2020-02-24,1.0,0.0,0.0,1.0,0.000000,0.000000
Afghanistan,2020-02-25,1.0,0.0,0.0,1.0,0.000000,0.000000
Afghanistan,2020-02-26,1.0,0.0,0.0,1.0,0.000000,0.000000
Afghanistan,2020-02-27,1.0,0.0,0.0,1.0,0.000000,0.000000
Afghanistan,2020-02-28,1.0,0.0,0.0,1.0,0.000000,0.000000
Afghanistan,2020-02-29,1.0,0.0,0.0,1.0,0.000000,0.000000
Afghanistan,2020-03-01,1.0,0.0,0.0,1.0,0.000000,0.000000
Afghanistan,2020-03-02,1.0,0.0,0.0,1.0,0.000000,0.000000


### Analise China, Italy, US, Spain e Resto do Mundo

In [20]:
china_data=covid[covid["Country/Region"]=="Mainland China"]
Italy_data=covid[covid["Country/Region"]=="Italy"]
US_data=covid[covid["Country/Region"]=="US"]
spain_data=covid[covid["Country/Region"]=="Spain"]
rest_of_world=covid[(covid["Country/Region"]!="Mainland China")&(covid["Country/Region"]!="Italy")&(covid["Country/Region"]!="US")&(covid["Country/Region"]!="Spain")]

analisedata_china=china_data.groupby(["ObservationDate"]).agg({"Confirmed":'sum',"Recovered":'sum',"Deaths":'sum'})
analisedata_Italy=Italy_data.groupby(["ObservationDate"]).agg({"Confirmed":'sum',"Recovered":'sum',"Deaths":'sum'})
analisedata_US=US_data.groupby(["ObservationDate"]).agg({"Confirmed":'sum',"Recovered":'sum',"Deaths":'sum'})
analisedata_Spain=spain_data.groupby(["ObservationDate"]).agg({"Confirmed":'sum',"Recovered":'sum',"Deaths":'sum'})
analisedata_restofworld=rest_of_world.groupby(["ObservationDate"]).agg({"Confirmed":'sum',"Recovered":'sum',"Deaths":'sum'})

In [21]:
analisedata_china["Mortality"]=(analisedata_china["Deaths"]/analisedata_china["Confirmed"])*100
analisedata_Italy["Mortality"]=(analisedata_Italy["Deaths"]/analisedata_Italy["Confirmed"])*100
analisedata_US["Mortality"]=(analisedata_US["Deaths"]/analisedata_US["Confirmed"])*100
analisedata_Spain["Mortality"]=(analisedata_Spain["Deaths"]/analisedata_Spain["Confirmed"])*100
analisedata_restofworld["Mortality"]=(analisedata_restofworld["Deaths"]/analisedata_restofworld["Confirmed"])*100

analisedata_china["Recovery"]=(analisedata_china["Recovered"]/analisedata_china["Confirmed"])*100
analisedata_Italy["Recovery"]=(analisedata_Italy["Recovered"]/analisedata_Italy["Confirmed"])*100
analisedata_US["Recovery"]=(analisedata_US["Recovered"]/analisedata_US["Confirmed"])*100
analisedata_Spain["Recovery"]=(analisedata_Spain["Recovered"]/analisedata_Spain["Confirmed"])*100
analisedata_restofworld["Recovery"]=(analisedata_restofworld["Recovered"]/analisedata_restofworld["Confirmed"])*100

In [45]:
analisedata_china

Unnamed: 0_level_0,Confirmed,Recovered,Deaths,Mortality,Recovery
ObservationDate,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-01-22,547.0,28.0,17.0,3.107861,5.118830
2020-01-23,639.0,30.0,18.0,2.816901,4.694836
2020-01-24,916.0,36.0,26.0,2.838428,3.930131
2020-01-25,1399.0,39.0,42.0,3.002144,2.787706
2020-01-26,2062.0,49.0,56.0,2.715810,2.376334
2020-01-27,2863.0,58.0,82.0,2.864129,2.025847
2020-01-28,5494.0,101.0,131.0,2.384419,1.838369
2020-01-29,6070.0,120.0,133.0,2.191104,1.976936
2020-01-30,8124.0,135.0,171.0,2.104874,1.661743
2020-01-31,9783.0,214.0,213.0,2.177246,2.187468


In [47]:
analisedata_Italy

Unnamed: 0_level_0,Confirmed,Recovered,Deaths,Mortality,Recovery
ObservationDate,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-01-31,2.0,0.0,0.0,0.000000,0.000000
2020-02-01,2.0,0.0,0.0,0.000000,0.000000
2020-02-02,2.0,0.0,0.0,0.000000,0.000000
2020-02-03,2.0,0.0,0.0,0.000000,0.000000
2020-02-04,2.0,0.0,0.0,0.000000,0.000000
2020-02-05,2.0,0.0,0.0,0.000000,0.000000
2020-02-06,2.0,0.0,0.0,0.000000,0.000000
2020-02-07,3.0,0.0,0.0,0.000000,0.000000
2020-02-08,3.0,0.0,0.0,0.000000,0.000000
2020-02-09,3.0,0.0,0.0,0.000000,0.000000


In [48]:
analisedata_restofworld

Unnamed: 0_level_0,Confirmed,Recovered,Deaths,Mortality,Recovery
ObservationDate,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-01-22,7.0,0.0,0.0,0.000000,0.000000
2020-01-23,13.0,0.0,0.0,0.000000,0.000000
2020-01-24,23.0,0.0,0.0,0.000000,0.000000
2020-01-25,37.0,0.0,0.0,0.000000,0.000000
2020-01-26,51.0,3.0,0.0,0.000000,5.882353
2020-01-27,59.0,3.0,0.0,0.000000,5.084746
2020-01-28,79.0,6.0,0.0,0.000000,7.594937
2020-01-29,90.0,6.0,0.0,0.000000,6.666667
2020-01-30,106.0,8.0,0.0,0.000000,7.547170
2020-01-31,134.0,8.0,0.0,0.000000,5.970149
