<a href="https://colab.research.google.com/github/DorotaJanosz/machine-learning-bootcamp/blob/master/unsupervised/05_case_studies/02_coronavirus.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

###Import bibliotek

In [1]:
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

np.random.seed(42)

###Wczytanie danych

In [2]:
# dane od 22.01.2020 do 17.02.2020
url = 'https://storage.googleapis.com/esmartdata-courses-files/ml-course/coronavirus.csv'
data = pd.read_csv(url, parse_dates=['Date', 'Last Update'])
data.head()

Unnamed: 0,Sno,Date,Province/State,Country,Last Update,Confirmed,Deaths,Recovered
0,1,2020-01-22 12:00:00,Anhui,China,2020-01-22 12:00:00,1.0,0.0,0.0
1,2,2020-01-22 12:00:00,Beijing,China,2020-01-22 12:00:00,14.0,0.0,0.0
2,3,2020-01-22 12:00:00,Chongqing,China,2020-01-22 12:00:00,6.0,0.0,0.0
3,4,2020-01-22 12:00:00,Fujian,China,2020-01-22 12:00:00,1.0,0.0,0.0
4,5,2020-01-22 12:00:00,Gansu,China,2020-01-22 12:00:00,0.0,0.0,0.0


###Eksploracja i przygotowanie danych

In [3]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1719 entries, 0 to 1718
Data columns (total 8 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   Sno             1719 non-null   int64         
 1   Date            1719 non-null   datetime64[ns]
 2   Province/State  1257 non-null   object        
 3   Country         1719 non-null   object        
 4   Last Update     1719 non-null   datetime64[ns]
 5   Confirmed       1719 non-null   float64       
 6   Deaths          1719 non-null   float64       
 7   Recovered       1719 non-null   float64       
dtypes: datetime64[ns](2), float64(3), int64(1), object(2)
memory usage: 107.6+ KB


In [7]:
data.isnull().sum()

Sno                 0
Date                0
Province/State    462
Country             0
Last Update         0
Confirmed           0
Deaths              0
Recovered           0
dtype: int64

In [11]:
#brak Province/State -> Country

data['Province/State'] = np.where(data['Province/State'].isnull(), data['Country'], data['Province/State'])
data.isnull().sum()

Sno               0
Date              0
Province/State    0
Country           0
Last Update       0
Confirmed         0
Deaths            0
Recovered         0
dtype: int64

In [13]:
data['Country'].value_counts().nlargest(10).reset_index()

Unnamed: 0,index,Country
0,Mainland China,801
1,US,188
2,Australia,84
3,Canada,59
4,China,34
5,South Korea,27
6,Japan,27
7,Thailand,27
8,Hong Kong,26
9,Singapore,26


In [15]:
data['Country'] = np.where(data['Country'] == 'Mainland China', 'China', data['Country'])
data['Country'].value_counts().nlargest(10).reset_index()

Unnamed: 0,index,Country
0,China,835
1,US,188
2,Australia,84
3,Canada,59
4,South Korea,27
5,Japan,27
6,Thailand,27
7,Vietnam,26
8,Hong Kong,26
9,Singapore,26


In [16]:
tmp = data['Country'].value_counts().nlargest(15).reset_index()
tmp.columns = ['Country', 'Count']
tmp = tmp.sort_values(by=['Count', 'Country'], ascending=[False, True])
tmp['iso_alpha'] = ['CHN', 'USA', 'AUS', 'CAN', 'JPN', 'KOR', 'THA', 'HKG', np.nan, 'SGP', 'TWN', 'VNM', 'FRA', 'MYS', 'NPL'] 
tmp

Unnamed: 0,Country,Count,iso_alpha
0,China,835,CHN
1,US,188,USA
2,Australia,84,AUS
3,Canada,59,CAN
5,Japan,27,JPN
4,South Korea,27,KOR
6,Thailand,27,THA
8,Hong Kong,26,HKG
11,Macau,26,
9,Singapore,26,SGP


In [35]:
px.scatter_geo(tmp, locations='iso_alpha', size='Count', size_max=40, color='Count', template='plotly_dark', projection='natural earth', 
               width=950, height=500, color_continuous_scale='reds', text='Country',
               title='Liczba przypadków koronawirusa - top 15')

In [36]:
px.scatter_geo(tmp, locations='iso_alpha', size='Count', size_max=40, color='Count', template='plotly_dark', projection='natural earth', scope='asia', 
               width=950, height=500, color_continuous_scale='reds', text='Country',
               title='Liczba przypadków koronawirusa - top 15')

In [39]:
px.bar(tmp, x='Country', y='Count', width=950, height=500, title='Liczba przypadków koronawizura w rozbiciu na kraje')

In [42]:
px.bar(tmp.query("Country != 'China'"), x='Country', y='Count', width=950, height=500, title='Liczba przypadków koronawizura w rozbiciu na kraje (bez Chin)')

In [49]:
tmp = data.groupby(data['Date'].dt.date)[['Confirmed', 'Deaths', 'Recovered']].sum().reset_index()
tmp

Unnamed: 0,Date,Confirmed,Deaths,Recovered
0,2020-01-22,555.0,0.0,0.0
1,2020-01-23,653.0,18.0,30.0
2,2020-01-24,941.0,26.0,36.0
3,2020-01-25,2019.0,56.0,49.0
4,2020-01-26,2794.0,80.0,54.0
5,2020-01-27,4473.0,107.0,63.0
6,2020-01-28,6057.0,132.0,110.0
7,2020-01-29,7783.0,170.0,133.0
8,2020-01-30,9776.0,213.0,187.0
9,2020-01-31,11374.0,259.0,252.0


In [52]:
fig = go.Figure()

trace1 = go.Scatter(x=tmp['Date'], y=tmp['Confirmed'], mode='markers+lines', name='Confirmed')
trace2 = go.Scatter(x=tmp['Date'], y=tmp['Deaths'], mode='markers+lines', name='Deaths')
trace3 = go.Scatter(x=tmp['Date'], y=tmp['Recovered'], mode='markers+lines', name='Recovered')

fig.add_trace(trace1)
fig.add_trace(trace2)
fig.add_trace(trace3)

fig.update_layout(template='plotly_dark', width=950, title='Koronawirus (22.01.2020 - 17.02.2020)')
fig.show()

In [57]:
data_confirmed = tmp[['Date', 'Confirmed']]
data_confirmed.columns = ['ds', 'y']
data_confirmed.head()

Unnamed: 0,ds,y
0,2020-01-22,555.0
1,2020-01-23,653.0
2,2020-01-24,941.0
3,2020-01-25,2019.0
4,2020-01-26,2794.0


In [61]:
fig = go.Figure()

trace = go.Scatter(x=data_confirmed['ds'], y=data_confirmed['y'], mode='markers+lines', name='Confirmed', fill='tozeroy')
fig.add_trace(trace)
fig.update_layout(title='Liczba potwierdzonych przypadków (22.01 - 12.02)', template='plotly_dark', width=950)

fig.show()

###Budowa modelu

In [63]:
from fbprophet import Prophet

#dopasowanie modelu
model = Prophet(daily_seasonality=False, weekly_seasonality=False, yearly_seasonality=False)
model.fit(data_confirmed)

INFO:fbprophet:n_changepoints greater than number of observations. Using 20.


<fbprophet.forecaster.Prophet at 0x7ff1a2ba5a90>

In [65]:
#predykcja 
future = model.make_future_dataframe(periods=7, freq='D')
forecast = model.predict(future)

from fbprophet.plot import plot_plotly
plot_plotly(model, forecast)