<a href="https://colab.research.google.com/github/Paulina9555/045_coronavirus/blob/main/045_coronavirus.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Spis treści:
1. [Import bibliotek](#0)
2. [Wczytanie danych](#1)
3. [Eksploracja i przygotowanie danych](#2)
4. [Budowa modelu](#3)

### <a name='0'></a> Import bibliotek

In [1]:
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

np.random.seed(42)

### <a name='1'></a> Wczytanie danych

In [2]:
url = 'https://storage.googleapis.com/esmartdata-courses-files/ml-course/coronavirus.csv'
data = pd.read_csv(url, parse_dates=['Date', 'Last Update'])
data.head()

Unnamed: 0,Sno,Date,Province/State,Country,Last Update,Confirmed,Deaths,Recovered
0,1,2020-01-22 12:00:00,Anhui,China,01/22/2020 12:00:00,1.0,0.0,0.0
1,2,2020-01-22 12:00:00,Beijing,China,01/22/2020 12:00:00,14.0,0.0,0.0
2,3,2020-01-22 12:00:00,Chongqing,China,01/22/2020 12:00:00,6.0,0.0,0.0
3,4,2020-01-22 12:00:00,Fujian,China,01/22/2020 12:00:00,1.0,0.0,0.0
4,5,2020-01-22 12:00:00,Gansu,China,01/22/2020 12:00:00,0.0,0.0,0.0


### <a name='2'></a> Eksploracja i przygotowanie danych

In [3]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1719 entries, 0 to 1718
Data columns (total 8 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   Sno             1719 non-null   int64         
 1   Date            1719 non-null   datetime64[ns]
 2   Province/State  1257 non-null   object        
 3   Country         1719 non-null   object        
 4   Last Update     1719 non-null   object        
 5   Confirmed       1719 non-null   float64       
 6   Deaths          1719 non-null   float64       
 7   Recovered       1719 non-null   float64       
dtypes: datetime64[ns](1), float64(3), int64(1), object(3)
memory usage: 107.6+ KB


In [4]:
data.isnull().sum()

Unnamed: 0,0
Sno,0
Date,0
Province/State,462
Country,0
Last Update,0
Confirmed,0
Deaths,0
Recovered,0


In [5]:
data['Province/State'] = np.where(data['Province/State'].isnull(), data['Country'], data['Province/State'])
data.isnull().sum()

Unnamed: 0,0
Sno,0
Date,0
Province/State,0
Country,0
Last Update,0
Confirmed,0
Deaths,0
Recovered,0


In [6]:
data['Country'].value_counts().nlargest(10)

Unnamed: 0_level_0,count
Country,Unnamed: 1_level_1
Mainland China,801
US,188
Australia,84
Canada,59
China,34
Thailand,27
South Korea,27
Japan,27
Taiwan,26
Hong Kong,26


In [7]:
data['Country'] = np.where(data['Country'] == 'Mainland China', 'China', data['Country'])
data['Country'].value_counts().nlargest(10)

Unnamed: 0_level_0,count
Country,Unnamed: 1_level_1
China,835
US,188
Australia,84
Canada,59
Thailand,27
South Korea,27
Japan,27
Taiwan,26
Hong Kong,26
Singapore,26


In [8]:
tmp = data['Country'].value_counts().nlargest(15).reset_index()
tmp.columns = ['Country', 'Count']
tmp = tmp.sort_values(by=['Count', 'Country'], ascending=[False, True])
tmp['iso_alpha'] = ['CHN', 'USA', 'AUS', 'CAN', 'JPN', 'KOR', 'THA', 'HKG', np.nan, 'SPG', 'TWN', 'VNM', 'FRA', 'MYS', 'NPL']
tmp

Unnamed: 0,Country,Count,iso_alpha
0,China,835,CHN
1,US,188,USA
2,Australia,84,AUS
3,Canada,59,CAN
6,Japan,27,JPN
5,South Korea,27,KOR
4,Thailand,27,THA
8,Hong Kong,26,HKG
11,Macau,26,
9,Singapore,26,SPG


In [10]:
px.scatter_geo(tmp, locations='iso_alpha', size='Count', size_max = 40, template='plotly_dark', color='Count', text = 'Country', projection = 'natural earth',
               color_continuous_scale = 'reds', width=950, title='Liczba przypadków koronawirusa na świecie - TOP 15')

In [12]:
px.scatter_geo(tmp, locations='iso_alpha', size='Count', size_max = 40, template='plotly_dark', color='Count', text = 'Country', projection = 'natural earth',
               color_continuous_scale = 'reds', scope='asia', width=950, title='Liczba przypadków koronawirusa w Azji - TOP 15')

In [13]:
px.bar(tmp, template='plotly_dark', y='Count', x = 'Country',
       color_continuous_scale = ['#42f5c8'], width=950, title='Liczba przypadków koronawirusa w rozbiciu na kraje')

In [14]:
px.bar(tmp.query("Country != 'China'"), template='plotly_dark', y='Count', x = 'Country',
       color_continuous_scale = ['#42f5c8'], width=950, title='Liczba przypadków koronawirusa w rozbiciu na kraje (poza Chinami)')

In [16]:
tmp = data.groupby(by=data['Date'].dt.date)[['Confirmed', 'Deaths', 'Recovered']].sum().reset_index()
tmp

Unnamed: 0,Date,Confirmed,Deaths,Recovered
0,2020-01-22,555.0,0.0,0.0
1,2020-01-23,653.0,18.0,30.0
2,2020-01-24,941.0,26.0,36.0
3,2020-01-25,2019.0,56.0,49.0
4,2020-01-26,2794.0,80.0,54.0
5,2020-01-27,4473.0,107.0,63.0
6,2020-01-28,6057.0,132.0,110.0
7,2020-01-29,7783.0,170.0,133.0
8,2020-01-30,9776.0,213.0,187.0
9,2020-01-31,11374.0,259.0,252.0


In [18]:
fig = go.Figure()

trace1 = go.Scatter(x=tmp['Date'], y=tmp['Confirmed'], mode='markers+lines', name='Confirmed')
trace2 = go.Scatter(x=tmp['Date'], y=tmp['Deaths'], mode='markers+lines', name='Deaths')
trace3 = go.Scatter(x=tmp['Date'], y=tmp['Recovered'], mode='markers+lines', name='Recovered')

fig.add_trace(trace1)
fig.add_trace(trace2)
fig.add_trace(trace3)

fig.update_layout(template='plotly_dark', width=950, title='Koronawirus (22.01-17.02.2020)')

In [19]:
data_confirmed = tmp[['Date', 'Confirmed']]
data_confirmed.columns = ['ds', 'y']
data_confirmed.head()

Unnamed: 0,ds,y
0,2020-01-22,555.0
1,2020-01-23,653.0
2,2020-01-24,941.0
3,2020-01-25,2019.0
4,2020-01-26,2794.0


In [20]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=data_confirmed['ds'], y=data_confirmed['y'], mode='markers+lines', name='Confirmed', fill='tozeroy'))
fig.update_layout(template='plotly_dark', width=950, title='Liczba potwierdzonych przypadków (22.01-17.02.2020)')

### <a name='3'></a> Budowa modelu

In [28]:
from prophet import Prophet
from prophet.plot import plot_plotly

model = Prophet(yearly_seasonality=False, weekly_seasonality=False, daily_seasonality=False)
model.fit(data_confirmed)

future = model.make_future_dataframe(periods=7, freq='D')
forcast = model.predict(future)
plot_plotly(model, forcast)

INFO:prophet:n_changepoints greater than number of observations. Using 20.
DEBUG:cmdstanpy:input tempfile: /tmp/tmplkch7h8s/_n64_wb5.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmplkch7h8s/vsr7efdu.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.12/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=73409', 'data', 'file=/tmp/tmplkch7h8s/_n64_wb5.json', 'init=/tmp/tmplkch7h8s/vsr7efdu.json', 'output', 'file=/tmp/tmplkch7h8s/prophet_modelrns5u_gs/prophet_model-20251004191220.csv', 'method=optimize', 'algorithm=newton', 'iter=10000']
19:12:20 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
19:12:20 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing
