In [1]:
import numpy as np
import pandas as pd
import scipy.optimize as opt
import plotly.graph_objects as go
import plotly.express as px
%matplotlib inline

In [2]:
import os
import datetime as dt

In [3]:
import seaborn as sns
sns.set(palette='Set2')

In [4]:
from plotly.offline import iplot
import plotly.io as pio
pio.renderers.default = 'notebook_connected'

## Загружаем csv

In [5]:
path = os.listdir('csse_covid_19_daily_reports')
print(path)

['02-25-2020.csv', '03-25-2020.csv', '03-30-2020.csv', '03-17-2020.csv', '02-01-2020.csv', '02-23-2020.csv', '03-05-2020.csv', '01-31-2020.csv', '02-22-2020.csv', '02-13-2020.csv', '03-11-2020.csv', '01-30-2020.csv', '01-22-2020.csv', '02-14-2020.csv', '02-20-2020.csv', '03-02-2020.csv', '03-26-2020.csv', '02-26-2020.csv', '01-29-2020.csv', '03-10-2020.csv', '02-05-2020.csv', '03-21-2020.csv', '02-06-2020.csv', '03-14-2020.csv', '02-24-2020.csv', '03-01-2020.csv', '03-28-2020.csv', '03-13-2020.csv', '02-11-2020.csv', '03-09-2020.csv', '02-09-2020.csv', '03-07-2020.csv', '01-27-2020.csv', '01-28-2020.csv', '03-22-2020.csv', '02-04-2020.csv', '03-15-2020.csv', '03-29-2020.csv', '02-08-2020.csv', '02-27-2020.csv', '02-02-2020.csv', '03-19-2020.csv', '01-23-2020.csv', '03-03-2020.csv', '03-20-2020.csv', '03-27-2020.csv', '02-12-2020.csv', '02-07-2020.csv', '02-17-2020.csv', '03-06-2020.csv', '03-04-2020.csv', '01-26-2020.csv', '02-10-2020.csv', '02-29-2020.csv', '02-15-2020.csv', '03-18-20

In [6]:
corona_every_day = []
for file in path:
    print(file)
    corona = pd.read_csv('csse_covid_19_daily_reports/' + file)
    corona.rename(columns={'Country/Region': 'Country_Region'}, inplace=True)
    corona['Date'] = dt.datetime.strptime(file[0 : -4], '%m-%d-%Y').date()
    corona = corona[['Country_Region', 'Confirmed', 'Deaths', 'Recovered', 'Date']]
    corona_every_day.append(corona)

02-25-2020.csv
03-25-2020.csv
03-30-2020.csv
03-17-2020.csv
02-01-2020.csv
02-23-2020.csv
03-05-2020.csv
01-31-2020.csv
02-22-2020.csv
02-13-2020.csv
03-11-2020.csv
01-30-2020.csv
01-22-2020.csv
02-14-2020.csv
02-20-2020.csv
03-02-2020.csv
03-26-2020.csv
02-26-2020.csv
01-29-2020.csv
03-10-2020.csv
02-05-2020.csv
03-21-2020.csv
02-06-2020.csv
03-14-2020.csv
02-24-2020.csv
03-01-2020.csv
03-28-2020.csv
03-13-2020.csv
02-11-2020.csv
03-09-2020.csv
02-09-2020.csv
03-07-2020.csv
01-27-2020.csv
01-28-2020.csv
03-22-2020.csv
02-04-2020.csv
03-15-2020.csv
03-29-2020.csv
02-08-2020.csv
02-27-2020.csv
02-02-2020.csv
03-19-2020.csv
01-23-2020.csv
03-03-2020.csv
03-20-2020.csv
03-27-2020.csv
02-12-2020.csv
02-07-2020.csv
02-17-2020.csv
03-06-2020.csv
03-04-2020.csv
01-26-2020.csv
02-10-2020.csv
02-29-2020.csv
02-15-2020.csv
03-18-2020.csv
02-28-2020.csv
03-31-2020.csv
03-16-2020.csv
01-24-2020.csv
03-23-2020.csv
01-25-2020.csv
02-19-2020.csv
02-21-2020.csv
03-08-2020.csv
02-18-2020.csv
02-16-2020

## Создаем таблицу по России

In [7]:
Russia = pd.DataFrame(columns = ['Country_Region', 'Confirmed', 'Deaths', 'Recovered'])

for table in corona_every_day:
    russia = table[table['Country_Region'] == 'Russia']
    Russia = Russia.append(russia)
Russia


Unnamed: 0,Country_Region,Confirmed,Deaths,Recovered,Date
61,Russia,2,0.0,2.0,2020-02-25
3380,Russia,658,3.0,29.0,2020-03-25
3398,Russia,1836,9.0,66.0,2020-03-30
83,Russia,114,0.0,8.0,2020-03-17
47,Russia,2,0.0,0.0,2020-02-01
57,Russia,2,0.0,2.0,2020-02-23
95,Russia,4,0.0,2.0,2020-03-05
51,Russia,2,,,2020-01-31
57,Russia,2,0.0,2.0,2020-02-22
53,Russia,2,0.0,2.0,2020-02-13


In [8]:
Russia = Russia.fillna(0)

In [9]:
Russia = Russia.sort_values('Date')

In [10]:
dates = Russia['Date'] #array of dates
confirmed = Russia['Confirmed'] #array of confirmed
n_groups = confirmed.count() #amount of days
index = np.arange(n_groups)

## Аппроксимируем экспонентой

In [11]:
def exp(x, A, B, C, D):
    return A * np.exp(B * x + C) + D

popt, pcov = opt.curve_fit(exp, index, confirmed,  (1e3, 1e-2, 1., -1e1), maxfev=10**6) # approximate to exp
A, B, C, D = popt

#### Видим, что данные действительно хорошо аппроксимируются экспонентой

In [12]:
fig = go.Figure()

fig.add_trace(go.Bar(x=dates, y=confirmed, name="Подтвержденные", marker_color='orange'))

fig.add_trace(go.Scatter(x=dates, y=exp(index, A, B, C, D), name="Экспонента", marker_color='green'))


fig.update_layout(
    title_text="Коронавирус в России",
    title_font_size=20,
    xaxis_title="Дата",
    yaxis_title="Число зараженных",
)


iplot(fig)

## Попробуем предсказать на 15 дней

In [13]:
new_index = np.arange(n_groups - 1, n_groups + 15)

predict_dates = []
predict_confirmed = exp(new_index[1:], A, B, C, D)

for i in range(16):
    predict_dates.append(dates.iloc[-1] + dt.timedelta(days=i))

In [14]:
fig.add_trace(go.Bar(x=predict_dates[1:], y=predict_confirmed, name="Аппроксимация", marker_color='red'))

fig.add_trace(go.Scatter(x=predict_dates, y=exp(new_index, A, B, C, D), name="Экспонента", mode='lines', marker_color='green'))