# COVID-19 - Previsões

- Link Kaggle: https://www.kaggle.com/sudalairajkumar/novel-corona-virus-2019-dataset

# Importação das bibliotecas

In [None]:
import numpy as np 
import matplotlib.pyplot as plt 
import pandas as pd 
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error
import seaborn as sns

# Exploração das bases de dados

## Covid-19

In [None]:
covid = pd.read_csv('./datasets/covid_19_data.csv')

In [None]:
covid.shape

In [None]:
covid.head()

In [None]:
df_brasil = covid[covid['Country/Region'] == 'Brazil']
df_brasil.shape

In [None]:
df_brasil.head()

In [None]:
sns.lineplot(df_brasil['ObservationDate'], df_brasil['Confirmed']);

In [None]:
df_sum = covid.groupby('ObservationDate').agg({'Confirmed': 'sum', 'Deaths': 'sum', 'Recovered': 'sum'}).reset_index()

In [None]:
df_sum.head()

In [None]:
df_sum.tail()

In [None]:
plt.stackplot(df_sum['ObservationDate'], [df_sum['Confirmed'], df_sum['Deaths'], df_sum['Recovered']],
              labels = ['Confirmed', 'Deaths', 'Recovered'])
plt.legend(loc = 'upper left')

In [None]:
sns.pairplot(covid)

## Covid-19 line_list_data

In [None]:
line_list_df = pd.read_csv('./datasets/COVID19_line_list_data.csv')

In [None]:
line_list_df.shape

In [None]:
line_list_df.head(2)

In [None]:
sns.distplot(line_list_df['age'])

# Previsões de mortes por COVID-19

## Preparação da base de dados

- Preparação dos dados baseado em: https://www.kaggle.com/chaudharijay2000/prediction-of-death-and-confirmed-cases-covid-19

In [None]:
deaths_df = pd.read_csv('./datasets/time_series_covid_19_deaths.csv')

In [None]:
deaths_df.shape

In [None]:
deaths_df.head()

In [None]:
deaths_df.tail()

In [None]:
deaths_df[deaths_df['Country/Region'] == 'Brazil']

In [None]:
columns = deaths_df.keys()
columns

In [None]:
deaths_df = deaths_df.loc[:, columns[4]:columns[-1]]

In [None]:
deaths_df.head()

In [None]:
deaths_df.keys()

In [None]:
len(deaths_df.keys())

In [None]:
deaths_df['1/22/20'].sum()

In [None]:
deaths_df['3/26/20'].sum()

In [None]:
dates = deaths_df.keys()
y = []
for i in dates:
  #print(i)
  y.append(deaths_df[i].sum())

In [None]:
print(y)

In [None]:
len(y)

In [None]:
type(y)

In [None]:
y = np.array(y).reshape(-1,1)

In [None]:
y.shape

In [None]:
print(y)

In [None]:
X = np.arange(len(dates)).reshape(-1,1)

In [None]:
X.shape

In [None]:
print(X)

In [None]:
forecast = np.arange(len(dates) + 10).reshape(-1,1)

In [None]:
forecast.shape

In [None]:
print(forecast)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.15, shuffle = False)

In [None]:
X_test

In [None]:
y_test

In [None]:
X_train.shape

In [None]:
y_train.shape

In [None]:
X_test.shape

In [None]:
y_test.shape

## Regressão polinomial

### Construção do modelo

In [None]:
from sklearn.preprocessing import PolynomialFeatures

In [None]:
poly = PolynomialFeatures(degree=4)
X_train_poly = poly.fit_transform(X_train)
X_test_poly = poly.transform(X_test)

In [None]:
X_train_poly.shape

In [None]:
X_test_poly.shape

In [None]:
X_test_poly

In [None]:
from sklearn.linear_model import LinearRegression
regressor = LinearRegression()
regressor.fit(X_train_poly, y_train)

### Previsões

In [None]:
poly_test_pred = regressor.predict(X_test_poly)
poly_test_pred

In [None]:
y_test

In [None]:
plt.plot(poly_test_pred, linestyle = 'dashed')
plt.plot(y_test)

In [None]:
print('MAE:', mean_absolute_error(poly_test_pred, y_test))
print('MSE:', mean_squared_error(poly_test_pred, y_test))
print('RMSE:', np.sqrt(mean_absolute_error(poly_test_pred, y_test)))

In [None]:
forecast.shape

In [None]:
X_train_all = poly.transform(forecast)
X_train_all.shape

In [None]:
poly_pred_all = regressor.predict(X_train_all)
len(poly_pred_all)

In [None]:
plt.plot(forecast[:-10], y, color='red')
plt.plot(forecast, poly_pred_all, linestyle='dashed')
plt.title('Deaths of COVID-19')
plt.xlabel('Days since 1/22/2020')
plt.ylabel('Number of deaths')
plt.legend(['Death cases', 'Predictions']);