# Importação

In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup

# Coletando dados

In [2]:
covid = pd.read_html("https://www.statista.com/statistics/1104709/coronavirus-deaths-worldwide-per-million-inhabitants/")[0]

In [3]:
covid.rename(columns={"Unnamed: 0":"Country"}, inplace=True)

In [4]:
covid.head(30)

Unnamed: 0,Country,Confirmed cases (absolute),Cases in last 7 days,Confirmed deaths (absolute),Deaths in last 7 days,Daily increase (# deaths),Population (in millions),Deaths per million (total),Deaths per million (last 7 days)
0,Belgium,760809,14507,21988,167,32,11.48,1914.65,14.54
1,Czechia,1184352,61100,19682,943,145,10.67,1844.66,88.38
2,Slovenia,187005,4521,3802,47,10,2.09,1820.93,22.51
3,United Kingdom¹,4132452,61159,121516,2356,442,66.83,1818.17,35.25
4,Italy,2848564,83152,96666,1779,318,60.3,1603.15,29.5
5,Portugal,800586,7757,16136,382,50,10.27,1571.27,37.2
6,USA¹,28225001,433941,503726,12669,3202,328.24,1534.63,38.6
7,Bosnia and Herzegovina,129753,2618,5051,89,17,3.3,1530.14,26.96
8,Hungary,410129,16106,14552,517,102,9.77,1489.47,52.92
9,North Macedonia,100632,2458,3098,80,13,2.08,1486.95,38.4


# Análise

In [5]:
covid.sort_values("Deaths per million (last 7 days)", ascending=False).head()

Unnamed: 0,Country,Confirmed cases (absolute),Cases in last 7 days,Confirmed deaths (absolute),Deaths in last 7 days,Daily increase (# deaths),Population (in millions),Deaths per million (total),Deaths per million (last 7 days)
19,Slovakia,298337,12918,6775,504,104,5.45,1242.19,92.41
1,Czechia,1184352,61100,19682,943,145,10.67,1844.66,88.38
8,Hungary,410129,16106,14552,517,102,9.77,1489.47,52.92
11,Moldova,180150,5995,3846,129,3846,2.66,1447.15,48.54
41,Lebanon,362850,14040,4508,302,62,6.86,657.55,44.05


In [6]:
covid[["Cases in last 7 days", "Deaths in last 7 days"]].corr()

Unnamed: 0,Cases in last 7 days,Deaths in last 7 days
Cases in last 7 days,1.0,0.941965
Deaths in last 7 days,0.941965,1.0


In [7]:
covid[["Cases in last 7 days", "Population (in millions)"]].corr()

Unnamed: 0,Cases in last 7 days,Population (in millions)
Cases in last 7 days,1.0,0.261456
Population (in millions),0.261456,1.0


# Predição

In [8]:
from sklearn import linear_model

# Dividir os dados em conjutos de treinando/treinados
covid_treinando = covid[["Country", "Cases in last 7 days", "Deaths in last 7 days"]][:152]
covid_treinados = covid[["Country", "Cases in last 7 days", "Deaths in last 7 days"]][1:31]

covid_x_treinando = covid_treinando[["Cases in last 7 days"]]
covid_x_treinados = covid_treinados[["Cases in last 7 days"]]

covid_y_treinando = covid_treinando[["Deaths in last 7 days"]]
covid_y_treinados = covid_treinados[["Deaths in last 7 days"]]

# Criando um objeto de regressão linear
rl = linear_model.LinearRegression()

# Treinar o modelo usando um conjunto de dados
rl.fit(covid_x_treinando, covid_y_treinando)

# Fazer a predição usando os dados de treinados
covid_y_pred = rl.predict(covid_x_treinados)

In [9]:
covid_treinados['Predição'] = covid_y_pred

In [10]:
covid_treinados['Predição'] = covid_treinados['Predição'].apply(lambda x: 0 if x<0 else x)

In [11]:
covid_treinados.head()

Unnamed: 0,Country,Cases in last 7 days,Deaths in last 7 days,Predição
1,Czechia,61100,943,1584.180628
2,Slovenia,4521,47,97.64548
3,United Kingdom¹,61159,2356,1585.730771
4,Italy,83152,1779,2163.566497
5,Portugal,7757,382,182.666914


In [12]:
covid_treinados.sort_values("Predição", ascending=False).head()

Unnamed: 0,Country,Cases in last 7 days,Deaths in last 7 days,Predição
6,USA¹,433941,12669,11380.063393
20,Brazil,293837,6500,7699.024133
17,France¹,119737,1887,3124.786885
4,Italy,83152,1779,2163.566497
3,United Kingdom¹,61159,2356,1585.730771


In [13]:
brazil = covid_treinados.loc[covid_treinados['Country'] == 'Brazil']
print(brazil)

   Country  Cases in last 7 days  Deaths in last 7 days     Predição
20  Brazil                293837                   6500  7699.024133


In [14]:
covid_dif_treinados = abs(covid_treinados['Deaths in last 7 days'] - covid_treinados['Predição'])

In [15]:
covid_dif_treinados.describe()

count      30.000000
mean      393.713438
std       733.823419
min         4.789954
25%        41.467043
50%       114.217632
75%       368.701229
max      3723.276684
dtype: float64