In [None]:
!pip install pywaffle
!pip install circlify
!pip install calmap
!pip install -U kaleido
!pip install pytrends
!pip install pyramid-arima

In [None]:
# Hide/Unhide your raw input code to focus only on results
from IPython.display import HTML

HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Hide/Unhide code"></form>''')

# Temat prezentacji: Jak Data Science może pomóc w walce z pandemia? Analiza trendów i zależności na podstawie danych dotyczących COVID-19

# Agenda<br>
<font size="4">
1. Wprowadzenie<br>
2. Przedstawienie danych<br>
3. Analiza zachorowan (Świat / Polska)<br>
4. Analiza szczepień w Polsce i w innych krajach<br>
5. Google Trends a koronawirus<br>
6. Koronawirus a bogactwo i szczęście narodów :)<br>
7. Modelowanie i prognoza nowych zachorowań<br>
8. Q&A<font>

# 1. Wprowadzenie

### Kilka słów o NatWest

![NatWest](https://upload.wikimedia.org/wikipedia/commons/8/87/Natwest-logo.jpg)

<font size="4">National Westminster Bank jest częścią grupy NatWest Group, która jest jedną z największych instytucji finansowych w Wielkiej Brytanii.<br>
    <br>
**[Informacje o NatWest](https://www.natwestgroup.com/who-we-are/worldwide-locations/rbs-international-branch-network/poland/nwg-global-hub-europe-pl.html)**
    <br>
    <br>
**[Otwarte rekrutacje](https://jobs.natwestgroup.com/search/jobs)**<br><font>

![DataScience](https://www.kdnuggets.com/wp-content/uploads/Fig1-Abisiga-top-10-lists-data-science.jpg)

### COVID-19
<font size="4">COVID-19 (od ang. coronavirus disease 2019) – ostra choroba zakaźna układu oddechowego wywołana zakażeniem wirusem SARS-CoV-2. Została po raz pierwszy rozpoznana i opisana w listopadzie 2019, w środkowych Chinach (miasto Wuhan, w prowincji Hubei) podczas serii zachorowań zapoczątkowujących pandemię tej choroby 
Na dzień 31 października 2021, zindetyfikowano **259 milionów przypadków**  i ponad **5 milionów śmierci**.<br><font>

![Covid19](https://upload.wikimedia.org/wikipedia/commons/thumb/9/96/3D_medical_animation_coronavirus_structure.jpg/1200px-3D_medical_animation_coronavirus_structure.jpg)
Image credits: [Wikimedia.org](http://commons.wikimedia.org/)

# 2. Przedstawienie danych

In [None]:
# Data analysis
import numpy as np
import pandas as pd

# Add a comma and keep to two d.p. in df columns
pd.options.display.float_format = '{:,.2f}'.format

# Visualization
import calmap
import circlify
import matplotlib.dates as mdates
import matplotlib.patches as mpatches
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
%matplotlib inline
import plotly
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from pywaffle import Waffle
import seaborn as sns

# Text transformation
import datetime as dt
from datetime import timedelta
from textwrap import wrap
import random

# Switiching off warnings
import warnings
warnings.filterwarnings('ignore')


# Statistical modelling
from sklearn.metrics import mean_squared_error
import six
import sys
sys.modules['sklearn.externals.six'] = six
import joblib
sys.modules['sklearn.externals.joblib'] = joblib
from pyramid.arima import auto_arima

### Import danych

Dane dostepne: 
* https://ourworldindata.org/ 
* https://github.com/owid/covid-19-data/tree/master/public/data
* https://www.kaggle.com/unsdsn/world-happiness

In [None]:
world_data = pd.read_csv('owid-covid-data_nov.csv')
vaccinations_data = pd.read_csv('country_vaccinations_nov.csv')
vaccinations_per_manu = pd.read_csv('country_vaccinations_by_manufacturer_nov.csv')
whr_2019 = pd.read_csv('whr_2019.csv')

In [None]:
print(f'Wymiary tabeli world_data to: {world_data.shape[0]} wierszy i {world_data.shape[1]} kolumn')
print(f'Wymiary tabeli vaccinations_data to: {vaccinations_data.shape[0]} wierszy i {vaccinations_data.shape[1]} kolumn')
print(f'Wymiary tabeli vaccinations_per_manu to: {vaccinations_per_manu.shape[0]} wierszy i {vaccinations_per_manu.shape[1]} kolumn')
print(f'Wymiary tabeli world_happiness_report_2019 to: {whr_2019.shape[0]} wierszy i {whr_2019.shape[1]} kolumn')

![alt text](https://dylancastillo.co/content/images/2021/07/logo_article.jpg)

### Procesowanie danych

In [None]:
# Convert date to datetime
world_data['date'] = pd.to_datetime(world_data['date'], format='%Y-%m-%d', utc=False)
vaccinations_data['date'] = pd.to_datetime(vaccinations_data['date'],format='%Y-%m-%d', utc=False)
vaccinations_per_manu['date'] = pd.to_datetime(vaccinations_per_manu['date'],format='%Y-%m-%d', utc=False)

In [None]:
# Restricting data to 30th of November
world_data = world_data[world_data['date']<='2021-11-30']
vaccinations_data = vaccinations_data[vaccinations_data['date']<='2021-11-30']
vaccinations_per_manu = vaccinations_per_manu[vaccinations_per_manu['date']<='2021-11-30']

In [None]:
# Using date as an index in df
world_data.index = world_data['date']
vaccinations_data.index = vaccinations_data['date']
vaccinations_per_manu.index = vaccinations_per_manu['date']

In [None]:
# Calculating % of vaccinated
world_data['people_vaccinated_per'] = world_data['people_vaccinated']/world_data['population']*100
world_data['people_fully_vaccinated_per'] = world_data['people_fully_vaccinated']/world_data['population']*100

In [None]:
# Poland stats from world_data
pl_stats = world_data[world_data.location=='Poland']

In [None]:
# Poland stats about vaccines
pl_vac_per_manu = vaccinations_per_manu[vaccinations_per_manu.location=='Poland']

#### Worldwide Covid data

In [None]:
world_data.head()

#### Vaccinations per country

In [None]:
vaccinations_data.head()

#### Vaccinations per manufacturer

In [None]:
vaccinations_per_manu.head()

#### World Happiness Report 2019

In [None]:
whr_2019.head()

# 3. Analiza zachorowan (Świat / Polska)

## Świat

In [None]:
# Define the color scheme
dark_pl = '#003078'
pl = '#1d70b8'

blue = '#2800D7'
others = 'lightgray'
background_color = "#fafafa"


#Date stamp for the graphs
data_date = dt.date(2021, 11, 30)

In [None]:
# Extracting latest data
world_data_latest = world_data[world_data['date']=='2021-11-30']
world_data_latest.dropna(subset=['continent'], inplace=True)

In [None]:
# Plotly chropleths
fig = go.Figure(data=go.Choropleth(
    locations = world_data_latest['location'],
    locationmode='country names',
    z = world_data_latest['total_cases'],
    hovertemplate='%{z:,}<br>%{location}<extra></extra>',
    colorscale = 'Blues',
    marker_line_color='darkgray',
    marker_line_width=0.5,
    colorbar_title = 'Total<br>Cases',
))

fig.update_layout(
    title_text='<b>World total cases<b>',
#     title_font_family='serif',
    title_x = 0.5,

    titlefont={'size': 30}, paper_bgcolor=background_color,
    geo=dict(
        showframe=False,
        showcoastlines=False,
        projection_type='equirectangular'
    )
)

fig.show(renderer='notebook')

In [None]:
fig = go.Figure(data=go.Choropleth(
    locations = world_data_latest['location'],
    locationmode='country names',
    z = world_data_latest['total_deaths'],
    hovertemplate='%{z:,}<br>%{location}<extra></extra>',
    colorscale = 'Blues',
    marker_line_color='darkgray',
    marker_line_width=0.5,
    colorbar_title = 'Total<br>Cases',
))


fig.update_layout(
    title_text='<b>World total deaths<b>',
#     title_font_family='serif',
    title_x = 0.5,
    titlefont={'size': 30}, paper_bgcolor=background_color,
    width=950,
    height=1000,
    geo=dict(
        showframe=False,
        showcoastlines=False,
        projection_type='equirectangular'
    )
)
fig.update_geos(projection_type="orthographic")
fig.show(renderer='notebook')

In [None]:
fig = go.Figure()

df_dict={
    "Confirmed per mln": world_data_latest['total_cases_per_million'],
    "Deaths per mln": world_data_latest['total_deaths_per_million'],
    "New cases per million": world_data_latest['new_cases_per_million']
}
for filter_name in ['Confirmed per mln', 'Deaths per mln', 'New cases per million']:
                    fig.add_trace(go.Choropleth(locations = world_data_latest['location'],
                                                locationmode='country names',
                                                z = df_dict[filter_name],
                                                hovertemplate='%{z:,.0f}<br>%{location}<extra></extra>',
                                                colorscale = 'Blues',
                                                marker_line_color='darkgray',
                                                marker_line_width=0.5,
                                                colorbar_title = 'Cases',
                                                showscale=False,
                                                visible=False
                                               ))


fig.update_layout(
    title_text='<b># of cases<b>',
    title_font_family='Arial',
    title_x = 1,
    titlefont={'size': 35}, paper_bgcolor=background_color,
    width=950,
    height=650,
    autosize=False,
    margin=dict(t=100, b=0, l=0, r=0),
)

# Update 3D scene options
fig.update_scenes(
    aspectratio=dict(x=1, y=1, z=0.7),
    aspectmode="manual"
)

# Add drowdowns
button_layer_1_height = 1.2
button_layer_2_height = 1.14
button_layer_3_height = 1.06

fig.update_layout(
    updatemenus=[
        dict(
        buttons=list(
            [dict(label = 'Confirmed per mln',
                  method = 'update',
                  args = [{'visible': [True, False, False]},
                          {'title': 'Confirmed per mln',
                           'showscale':True}]),
             dict(label = 'Deaths per mln',
                  method = 'update',
                  args = [{'visible': [False, True, False]},
                          {'title': 'Deaths per mln',
                           'showscale':True}]),
             dict(label = 'New cases per million',
                  method = 'update',
                  args = [{'visible': [False, False, True]},
                          {'title': 'New cases per million',
                           'showscale':True}]),
            ]),
             type = "buttons",
             direction="right",
             showactive=True,
             x=0.05,
             xanchor="left",
             y=button_layer_1_height,
             yanchor="top"
        ),
        dict(
            buttons=list([
                dict(
                    args=["colorscale", "Viridis"],
                    label="Viridis",
                    method="restyle"
                ),
                dict(
                    args=["colorscale", "Cividis"],
                    label="Cividis",
                    method="restyle"
                ),
                dict(
                    args=["colorscale", "Blues"],
                    label="Blues",
                    method="restyle"
                ),
                dict(
                    args=["colorscale", "Greens"],
                    label="Greens",
                    method="restyle"
                ),
            ]),
            type = "buttons",
            direction="right",
            pad={"r": 10, "t": 10},
            showactive=True,
            x=0.05,
            xanchor="left",
            y=button_layer_2_height,
            yanchor="top"
        ),
        dict(
            buttons=list([
                dict(
                    args=["reversescale", False],
                    label="False",
                    method="restyle"
                ),
                dict(
                    args=["reversescale", True],
                    label="True",
                    method="restyle"
                )
            ]),
            type = "buttons",
            direction="right",
            pad={"r": 10, "t": 10},
            showactive=True,
            x=0.08,
            xanchor="left",
            y=button_layer_3_height,
            yanchor="top"
        )
        
    ]
)

fig.update_layout(
    annotations=[
        dict(text="Type", x=0, xref="paper", y=button_layer_1_height-0.01, yref="paper",
                             align="left", showarrow=False),
        dict(text="Color", x=0, xref="paper", y=button_layer_2_height-0.03, yref="paper",
                             align="left", showarrow=False),
        dict(text="Reverse<br>Colorscale", x=0, xref="paper", y=button_layer_3_height-0.02,
                             yref="paper", align="left", showarrow=False)
    ])
fig.show(renderer='notebook')

In [None]:
# Plotly Treemap
fig = px.treemap(world_data_latest, path=[px.Constant("World"),'continent','location'], values='total_cases_per_million',
                 color='total_cases_per_million',
                 color_continuous_scale='RdBu',
                 labels={"total_cases_per_million":"Total cases per mln"},
                 width=1000, height=600
                )
fig.update_layout(title="<b>Countries share in total cases per mln<b>",
                  titlefont={'size': 30},
                  margin = dict(t=50, l=25, r=25, b=25))
fig.data[0].hovertemplate = '%{label}<br>%{value:,.0f}'
fig.show(renderer='notebook')

In [None]:
world_data_clean = world_data.copy()
world_data_clean['date'] = world_data_clean['date'].astype(str) # runing animation in plotly requires date as str type
world_data_clean.sort_index(inplace=True)

In [None]:
#Plotly animation in time
fig = px.choropleth(world_data_clean,locations='location',locationmode='country names',color='stringency_index',
                    animation_frame='date',
                    color_continuous_scale='Reds', # choosing the colors palette
                    range_color = [0,100], # fix the range color scale
                    labels={"stringency_index":"Stringency index", "date":"Date", "location":"Location"} # customize label
                    
                          )
fig.update_layout(title="<b>Stringency index in time<b>",
                  titlefont={'size': 30},
#                   title_font_family='serif',
                  paper_bgcolor=background_color,
                  title_x = 0.5,
                  geo=dict(showframe = False,showcoastlines = False,)
                  )
fig.layout.updatemenus[0].buttons[0].args[1]['frame']['duration'] = 1
fig.layout.updatemenus[0].buttons[0].args[1]['transition']['duration'] = 0.1
fig.update_geos(projection_type="equirectangular", visible=True, resolution=110)
fig.show(renderer='notebook')

## Polska

In [None]:
# Visualisations using Matplotlib
daily_case = world_data.loc[world_data.location=='Poland'].dropna(subset=['new_cases'])
daily_case_smooth = world_data.loc[world_data.location=='Poland'].dropna(subset=['new_cases_smoothed'])
pl_death = world_data.loc[world_data.location=='Poland'].dropna(subset=['new_deaths'])
pl_death = pl_death.loc[pl_death.new_deaths > 0]


## form the figure
fig, ax = plt.subplots(figsize=(12,9), facecolor=background_color)
ax.plot(daily_case.date, daily_case.new_cases, color='slategrey', alpha=0.6, label='Daily Case')
ax.plot(daily_case.date, daily_case.new_cases_smoothed, color=blue, label='7-day Moving Avg')
ax.plot(pl_death.date, pl_death.new_deaths, color=dark_pl, label='Death')
ax.set_xlabel('Date')
ax.set_ylabel('Infection')
ax.legend(facecolor=background_color, framealpha=0, loc='upper right', bbox_to_anchor=(0.95, 1))

## add patches to show the total confirmed cases and deaths
fig.patches.extend([plt.Rectangle((0.71,0.7),0.16,0.08,
                                  fill=True, color='grey', alpha=0.1, zorder=1000,
                                  transform=fig.transFigure, figure=fig)])
fig.text(0.715,0.76,'Total Cases',fontfamily='monospace',fontsize=10)
fig.text(0.715,0.715,f"{int(world_data.loc[world_data.location=='Poland']['total_cases'].max()):,d}",fontfamily='monospace', 
         color=blue,fontsize=25)


fig.patches.extend([plt.Rectangle((0.71,0.6),0.16,0.08,
                                  fill=True, color='grey', alpha=0.1, zorder=1000,
                                  transform=fig.transFigure, figure=fig)])
fig.text(0.715,0.66,'Total Deaths',fontfamily='monospace',fontsize=10)
fig.text(0.715,0.615,f"{int(world_data.loc[world_data.location=='Poland']['total_deaths'].max()):,d}",
         fontfamily='monospace', color=dark_pl,fontsize=25)


## title and subtitle and date
fig.text(0.055,1.02,'Daily Updates in Poland',fontsize=40,fontweight='bold')
fig.text(0.055,0.92,f'After 3 waves of infections, the 4th one is in progress.\nWhy the 1st one is not visible?',color='gray',fontsize=20)
fig.text(0.055,0.02,f'as on {data_date}',color='gray',fontsize=13, style='italic')

# Removing axis and spines
for s in ['top', 'right', 'bottom', 'left']:
    ax.spines[s].set_visible(False) 

# remove y ticks
ax.tick_params(axis=u'both', which=u'both',length=0)

ax.set_facecolor(background_color)
# plt.savefig('daily_updates.png', dpi=300,bbox_inches = "tight")
plt.show()


In [None]:
## form the figure
fig, ax = plt.subplots(figsize=(12,9), facecolor=background_color)
ax = sns.lineplot(data=world_data, x='date',y=world_data['total_cases_per_million']/1000,hue='location', palette='Greys_r', alpha=0.1, legend=False)
ax = sns.lineplot(data=world_data[world_data.location=='Poland'], x='date',y=world_data[world_data.location=='Poland']['total_cases_per_million']/1000, label='Poland', color=blue, legend=False)
dummy_date = world_data.date[0]
ax.hlines(0, xmin=dummy_date, xmax=dummy_date, label='World',color='silver')
ax.set_xlabel('Date',fontsize=11)
ax.set_ylabel("Cummulated Infection per Million (000s)", fontsize=11)

## plot a point in PL
pl_per_mil = world_data[world_data.location=='Poland']['total_cases_per_million'].tolist()[-1]/1000
pl_date = world_data[world_data.location=='Poland']['date'].tolist()[-1]
ax.scatter(y=pl_per_mil, x=pl_date, s=100, color=blue, edgecolor='white', linewidths= 1, zorder=20) ## end scatter points
ax.legend(facecolor=background_color, framealpha=0, loc='upper right', bbox_to_anchor=(0.95, 1))

## title and subtitle and date
fig.text(0.055,0.98,"Infections per Million",fontsize=40,fontweight='bold')
fig.text(0.055,0.90,f"Not great not terrible\ncomparing to rest of the World.",color='gray',fontsize=20)
fig.text(0.055,0.05,f'as on {data_date}',color='gray',fontsize=13, style='italic')


# Removing axis and spines and set spine color
for s in ['top', 'right', 'bottom', 'left']:
    ax.spines[s].set_visible(False)
ax.spines['left'].set_color('#3b3b3b')    
ax.spines['bottom'].set_color('#3b3b3b')
    
## remove ticks on x and y axis
ax.tick_params(axis=u'both', which=u'both',length=0)
ax.xaxis.set_tick_params(labelsize=11)
fig.set_facecolor(background_color)
ax.set_facecolor(background_color)
# plt.savefig('covid_per_million.png',dpi=300,bbox_inches='tight')

# 4. Analiza szczepień w Polsce i w innych krajach

In [None]:
pl_vac_per_manu_pivot = pl_vac_per_manu.pivot(index='date', columns='vaccine', values='total_vaccinations')

In [None]:
cst_vac = pd.DataFrame(columns=['Cost Per Dose'],
                      data = [7,10,27,37,38])

Manufacturers ='Oxford/AstraZeneca','Johnson & Johnson','Sinovac','Moderna','Pfizer/BioNTech'

cst_vac['Provider'] = Manufacturers



fig = plt.figure(figsize=(10, 4),dpi=150, facecolor=background_color)
gs = fig.add_gridspec(1, 1)
gs.update(wspace=0, hspace=0)
ax0 = fig.add_subplot(gs[0, 0])
ax0.set_facecolor(background_color)
for s in ["right", "top","bottom","left"]:
    ax0.spines[s].set_visible(False)

ax0.tick_params(axis = "both", which = "both", left=False, bottom=False,labelbottom=False)

ax0.text(0, -1, 'How much does a dose of each vaccine cost?', color='black', fontsize=15, ha='left', va='bottom', weight='bold')
ax0.text(0, -0.93, 'Oxford/AstraZeneca and Johnoson and Johnson are the most affordable vaccines.', 
         color='gray', fontsize=12, ha='left', va='top')

color_map = [others for _ in range(5)]
color_map[0] = pl # color highlight
sns.barplot(ax=ax0, y=cst_vac['Provider'], x=cst_vac['Cost Per Dose']*1.1,color=background_color, zorder=2, orient='h')

sns.barplot(ax=ax0, y=cst_vac['Provider'], x=cst_vac['Cost Per Dose'],palette=color_map, zorder=2, orient='h')

ax0.set_xlabel("")
ax0.set_ylabel("")
ax0.grid(False)
ax0.tick_params(labelsize=5)

labels = [ '\n'.join(wrap(l, 40)) for l in cst_vac['Provider'] ]
ax0.set_yticklabels([])

for i in range(1,5):
    ax0.annotate(cst_vac['Provider'][i], 
                   xy=(cst_vac['Cost Per Dose'][i]-0.1, i), 
                   va = 'center', ha='right',fontweight='light', fontfamily='serif',fontsize=8, color='black',rotation=0)

    ax0.annotate(f"${cst_vac['Cost Per Dose'][i]}", 
                   xy=(cst_vac['Cost Per Dose'][i]+3, i), 
                   va = 'center', ha='right',fontweight='light', fontfamily='serif',fontsize=15, color=others,rotation=0)
# diff color text
for i in range(0,1):
    ax0.annotate('$7', 
                   xy=(cst_vac['Cost Per Dose'][i]+2, i),
                    va = 'center', ha='right',fontweight='light', fontfamily='serif',fontsize=15, color=pl,rotation=0)
    ax0.annotate('Oxford/\nAstraZeneca', 
                   xy=(cst_vac['Cost Per Dose'][i]-0.1, i),
                    va = 'center', ha='right',fontweight='light', fontfamily='serif',fontsize=8, color='black',rotation=0)
    
for i in range(4,5):
    ax0.annotate('$38', 
                   xy=(cst_vac['Cost Per Dose'][i]+3, i),
                    va = 'center', ha='right',fontweight='light', fontfamily='serif',fontsize=15, color='#9b1b30',rotation=0)
fig.text(0.12,0.1,f'based on www.statista.com (March 2021)',color='gray',fontfamily='serif',fontsize=5, style='italic');
# plt.savefig('vaccine_cost.png', dpi=300, bbox_inches = "tight")

In [None]:
pl_population = pl_stats['population'].iloc[-1]
cst_vac['Cost for mean Country Population'] = cst_vac['Cost Per Dose']*pl_population

In [None]:
circles = circlify.circlify(
    cst_vac['Cost for mean Country Population'].tolist(), 
    show_enclosure=False, 
    target_enclosure=circlify.Circle(x=0, y=0, r=1)
)

In [None]:
fig = plt.figure(figsize=(7, 7),dpi=150, facecolor=background_color)
gs = fig.add_gridspec(1, 1)
gs.update(wspace=0, hspace=0)
ax = fig.add_subplot(gs[0, 0])
ax.set_facecolor(background_color)
for s in ["right", "top","bottom","left"]:
    ax.spines[s].set_visible(False)

ax.set_facecolor(background_color)

# Title
ax.text(-1,1.3,'How much does it cost to purchase\nenough doses for PL?',fontsize=15,fontweight='bold')
ax.text(-1,1.15,'Taking into consideration over 37mln population - that is used here.\nVariation in cost is staggering - and this does not include logistical or manufacturing costs.',color='gray',fontsize=10)

# Remove axes
ax.axis('off')

# Find axis boundaries
lim = max(
    max(
        abs(circle.x) + circle.r,
        abs(circle.y) + circle.r,
    )
    for circle in circles
)
plt.xlim(-lim, lim)
plt.ylim(-lim, lim)

# list of labels
labels = cst_vac['Provider']
costs = cst_vac['Cost for mean Country Population']
# print circles
for circle, label in zip(circles, labels):
    if label == 'Oxford/AstraZeneca':
        x, y, r = circle
        ax.add_patch(plt.Circle((x, y), r, alpha=0.9, linewidth=1, facecolor=pl, edgecolor="black"))
        plt.annotate('Oxford/\nAstraZeneca', (x,y+0.05) ,va='center', ha='center',color='white', fontsize=8)
        
    else:
        x, y, r = circle
        ax.add_patch(plt.Circle((x, y), r, alpha=0.5, linewidth=0.3, facecolor=others, edgecolor="black"))
        plt.annotate(label, (x,y+0.05) ,va='center', ha='center', fontsize=8, alpha=0.5)
    
for circle, costs in zip(circles, costs):
        x, y, r = circle
        plt.annotate(f"${format(round(costs), ',')}", (x,y-0.03) ,va='center', ha='center', fontsize=8) #bbox=dict(facecolor='white', edgecolor='black', boxstyle='round', pad=.5))
# plt.savefig('circles.png', dpi=300, bbox_inches = "tight")

In [None]:
fig, ax = plt.subplots(figsize=(16,10))
ax.fill_between(x=pl_vac_per_manu_pivot.index, y1=pl_vac_per_manu_pivot['Johnson&Johnson'], color='#1d70b8')
ax.fill_between(x=pl_vac_per_manu_pivot.index, y1=pl_vac_per_manu_pivot['Moderna'], color='#1d70b8', alpha=0.8)
ax.fill_between(x=pl_vac_per_manu_pivot.index, y1=pl_vac_per_manu_pivot['Oxford/AstraZeneca'], color='#1d70b8', alpha=0.5)
ax.fill_between(x=pl_vac_per_manu_pivot.index, y1=pl_vac_per_manu_pivot['Pfizer/BioNTech'], color='#1d70b8', alpha=0.3)

pfizer_tot = pl_vac_per_manu_pivot['Pfizer/BioNTech'][-1]
moderna_tot = pl_vac_per_manu_pivot['Moderna'][-1]
az_tot = pl_vac_per_manu_pivot['Oxford/AstraZeneca'][-1]
jj_tot = pl_vac_per_manu_pivot['Johnson&Johnson'][-1]

## annotate the graph
ax.annotate(f"{'%d' % (pfizer_tot/1000000)}mln Pfizer doses",
            xy=(700, 300), xycoords='figure pixels',va = 'center', ha='center',fontsize=30, fontweight='light', fontfamily='serif', color='white')
ax.annotate(f"{'%d' % (az_tot/1000000)}mln Astra Zeneca doses",
            xy=(700, 160), xycoords='figure pixels',va = 'center', ha='center',fontsize=20, fontweight='light', fontfamily='serif', color='white')
ax.annotate(f"{'%d' % (moderna_tot/1000000)}mln Moderna doses",
            xy=(700, 130), xycoords='figure pixels',va = 'center', ha='center',fontsize=15, fontweight='light', fontfamily='serif', color='white')
ax.annotate(f"{'%d' % (jj_tot/1000000)}mln Johnson&Johnson doses",
            xy=(700, 100), xycoords='figure pixels',va = 'center', ha='center',fontsize=15, fontweight='light', fontfamily='serif', color='white')

#y axis formater
scale_y = 1e6
ticks_y = ticker.FuncFormatter(lambda x, pos: '{0:g}'.format(x/scale_y))
ax.yaxis.set_major_formatter(ticks_y)
ax.set_ylabel('Doses in millions', fontsize=12)

## set x axis labels and font
ax.set_xlabel('Date', fontsize=12)

## title and subtitle and date
fig.text(0.145,0.9,'What type of vaccine?',fontsize=40,fontweight='bold')
fig.text(0.145,0.82,f'With over {pfizer_tot:,d} doses administered Pfzer\BioNTech, \ndominated the vaccine market in Poland.',color='gray',fontsize=20)
fig.text(0.1,0.05,f'as on {data_date}',color='gray',fontsize=10, style='italic')

fig.set_facecolor(background_color)
ax.set_facecolor(background_color)
# plt.savefig('vacine_type.png', dpi=300,bbox_inches = "tight")

In [None]:
fig = plt.figure(figsize=(5, 5),dpi=150,facecolor=background_color,
    FigureClass=Waffle,
    rows=10,colors=[dark_pl,pl,others],
    columns=10,vertical=True,
    values={'Second Dose %': 52,'First Dose %': (53-52), 'Awaiting %': 47})

fig.text(0.035,1.09,'PL Vaccination uptake',fontsize=20,fontweight='bold')
fig.text(0.035,1.01,'Nearly 55% of the PL adult population\nhas received the first dose of the vaccine.',color='gray',fontsize=10)

first_patch = mpatches.Patch(color=pl, label='First dose %')
second_patch = mpatches.Patch(color=dark_pl, label='Second dose %')

plt.legend(handles=[first_patch,second_patch],bbox_to_anchor=(0.5,-0.12),loc='lower center',ncol=2,
          facecolor=background_color, framealpha=0);
fig.text(0.035,0.01,f'as on {data_date}',color='gray',fontsize=5, style='italic');
# plt.savefig('vaccine_ratio.png', dpi=300, bbox_inches = "tight")

In [None]:
pl_stats_weekly = pl_stats.dropna(subset = ['people_vaccinated_per'])

In [None]:
fig = plt.figure(figsize=(10, 4), dpi=150,facecolor=background_color)
gs = fig.add_gridspec(1, 1)
gs.update(wspace=0, hspace=0)
ax0 = fig.add_subplot(gs[0, 0])
ax0.set_facecolor(background_color)
for s in ["right", "top"]:
    ax0.spines[s].set_visible(False)
    
sns.barplot(x='date',  y="people_vaccinated_per", data=pl_stats_weekly, color=pl);

sns.barplot(x='date', y="people_fully_vaccinated_per", data=pl_stats_weekly, color=dark_pl);

Xstart, Xend = ax0.get_xlim()
Ystart, Yend = ax0.get_ylim()

ax0.set_xlabel('Date', fontsize=8)
ax0.set_xticklabels([x.strftime("%Y-%m") for x in pl_stats_weekly.index])
ax0.locator_params(axis='x', nbins=8)
ax0.set_ylabel("Population Vaccinated (%)",loc='top', fontsize=8)

ax0.set_ylim([0,Yend])
ax0.text(Xstart,Yend+(Yend*0.15), 'The PL initially focused on administering first doses', fontsize=15, ha='left', va='bottom', weight='bold')
ax0.text(Xstart,Yend+(Yend*0.13), 'In PL, there are ~4 weeks between doses depending on vaccine\nThough due to lack of vaccines in mid 2021 first dosage patients had priority', color='gray', fontsize=10, ha='left', va='top')
fig.text(0.1,-0.1,f'as on {data_date}',color='gray',fontsize=5, style='italic');
# plt.savefig('vaccine_progress.png', dpi=300, bbox_inches = "tight")

In [None]:
world_vac = world_data.dropna(subset = ['people_vaccinated_per'])



## timeseries of vaccination progress - pl vs regional countries
countries = ['Spain', 'Germany', 'Czechia', 'Hungary','United Kingdom', 'France', 'Latvia']

## time series analysis of vaccination in pl
fig, ax = plt.subplots(figsize=(12,9), facecolor=background_color)
ax = sns.lineplot(data=world_vac[world_vac.location.isin(countries)], x='date',y='people_vaccinated_per',hue='location', palette='gray', alpha=0.8, legend=False)
ax = sns.lineplot(data=world_vac[world_vac.location=='Poland'], x='date',y='people_vaccinated_per', color=pl, legend=False)
ax.set_xlabel('Date',fontsize=11)
ax.set_ylabel("Cummulated Vaccination Progress (%)", fontsize=11)

## plot the dot at the end of the line
world_ = world_vac[world_vac.location.isin(countries+['Poland'])].groupby('location',as_index=False).agg({'date':'max','people_vaccinated_per':'max'})
ax = sns.scatterplot(data=world_.loc[world_.location!='Poland'], x='date', y='people_vaccinated_per', s=100, hue='location', palette='gray', legend=False)
ax = sns.scatterplot(data=world_.loc[world_.location=='Poland'], x='date', y='people_vaccinated_per', s=180, color=pl, legend=False)

for i in range(len(world_)):
    ax.annotate(f'{world_.location[i]} ({round(world_.people_vaccinated_per[i],2)}%)', (max(world_.date)+timedelta(days=10),world_.people_vaccinated_per[i]))

## title and subtitle and date
fig.text(0.055,1.0,"Vaccination Progress, Regionwise",fontsize=40,fontweight='bold')
fig.text(0.055,0.95,f"How does Poland's vaccination progress compare to other countries in the region?",color='gray',fontsize=20)
fig.text(0.055,0.05,f'as on {data_date}',color='gray',fontsize=13, style='italic')

# removing axis and spines and set spine color
for s in ['top', 'right']:
    ax.spines[s].set_visible(False)
ax.spines['left'].set_color('#3b3b3b')    
ax.spines['bottom'].set_color('#3b3b3b')
    
## remove ticks on x and y axis
ax.tick_params(axis=u'both', which=u'both',length=0)
ax.xaxis.set_tick_params(labelsize=11)
fig.set_facecolor(background_color)
ax.set_facecolor(background_color)
# plt.savefig('vac_progress_region.png',dpi=300,bbox_inches='tight')
plt.show()


In [None]:
## vaccination rate vs rest of the world
world_latest = world_data.groupby(['location', 'iso_code'],as_index=False).agg({'people_vaccinated_per':'max','people_fully_vaccinated_per':'max'})

## sort the dataframe by vac. rate
world_latest = world_latest.dropna(subset=['people_vaccinated_per','people_fully_vaccinated_per']).sort_values('people_vaccinated_per', ascending=True).reset_index(drop=True)
top = 10
world_latest_top = world_latest.tail(top)
world_latest_pl = world_latest.loc[world_latest.location == 'Poland'].reset_index()
pl_position = len(world_latest)+1-world_latest.loc[world_latest.location=='Poland'].index.tolist()[0]


## form the figure
fig, (ax1,ax2) = plt.subplots(2,1, figsize=(12,7), sharex=True, gridspec_kw={'height_ratios': [top, 0.7]})
ax1.barh(y=[f'{x} {y}' for x,y in zip(range(1,len(world_latest_top)+1)[::-1],world_latest_top.location)], width=world_latest_top.people_vaccinated_per, color='lightgrey', height=0.7)
ax1.barh(y=[f'{x} {y}' for x,y in zip(range(1,len(world_latest_top)+1)[::-1],world_latest_top.location)], width=world_latest_top.people_fully_vaccinated_per, color='slategrey', alpha=0.7, height=0.7)
ax2.barh(y=f'{pl_position} Poland', width=world_latest_pl.people_vaccinated_per, color=pl, alpha=0.5, height=0.3)
ax2.barh(y=f'{pl_position} Poland', width=world_latest_pl.people_fully_vaccinated_per, color=dark_pl, alpha=0.7, height=0.3)
ax2.set_xlabel("Vaccination Progress (%)", fontsize=11)
fig.tight_layout()

## annotate the values
for x, y in zip([f'{x} {y}' for x,y in zip(range(1,len(world_latest_top)+1)[::-1],world_latest_top.location)],world_latest_top.people_vaccinated_per):
    ax1.annotate(f'{round(y,1)}%', xy=(y+1, x), fontsize=10)
for x, y in zip([f'{x} {y}' for x,y in zip(range(1,len(world_latest_top)+1)[::-1],world_latest_top.location)],world_latest_top.people_fully_vaccinated_per):
    ax1.annotate(f'{round(y,1)}%', xy=(y/2, x), fontsize=10, color='white')

ax2.annotate(f'{round(world_latest_pl.people_vaccinated_per[0],1)}%', xy=(world_latest_pl.people_vaccinated_per[0]+1, f'{pl_position} Poland'), fontsize=10, color='black')
ax2.annotate(f'{round(world_latest_pl.people_fully_vaccinated_per[0],1)}%', xy=(world_latest_pl.people_fully_vaccinated_per[0]/2, f'{pl_position} Poland'), fontsize=10, color='white')

    
## customize legend with mpatches
first_patch = mpatches.Patch(color='slategrey', label='Fully Vaccinated')
second_patch = mpatches.Patch(color='lightgrey', label='Vaccinated')
plt.legend(handles=[first_patch,second_patch],bbox_to_anchor=(0.87,0.45),loc='lower center',ncol=1,
          facecolor=background_color, framealpha=0)

## title and subtitle and date
fig.text(0.035,1.06,"Vaccination Progress",fontsize=40,fontweight='bold')
fig.text(0.035,1.01,f"How does Poland's vaccination progress compare to the rest of the world?",color='gray',fontsize=20)
fig.text(0.035,-0.05,f'as on {data_date}',color='gray',fontsize=13, style='italic')
fig.text(0.5,0.17,f'...',color='lightgrey',fontsize=30, fontweight='bold')


# removing axis and spines
for s in ['top', 'right', 'bottom', 'left']:
    ax1.spines[s].set_visible(False)  
    ax2.spines[s].set_visible(False)  
    
## remove ticks on x and y axis
ax1.tick_params(axis=u'both', which=u'both',length=0)
ax2.tick_params(axis=u'both', which=u'both',length=0)
ax2.xaxis.set_tick_params(labelsize=11)
fig.set_facecolor(background_color)
ax1.set_facecolor(background_color)
ax2.set_facecolor(background_color)
# plt.savefig('vac_progress_barh.png', dpi=300,bbox_inches = "tight")
plt.show()

![Mapa](https://www.spc.int/sites/default/files/styles/max_1300x1300/public/2018-06/Pitcairn%20-%20pacific%20community%20SPC%20copryrights.jpg?itok=MedPGRUz)

![Pitcairn Islands](https://www.visitpitcairn.pn/files/slide_image_8382.jpg)

<font size="4">Jako zawodowi analitycy musimy potwierdzić czy nasze dane są prawidłowe. Z informacji znalezionychn w Internecie. Giblartar jest pierwszym krajem na świecie który zaszczepił całą dorsłą populację. Ale to przecież nie składa się na 120%. Dodatkowe procenty wynikają z tego, że szczepiepione tam są również osoby nie będące rezydentem tego kraju. Gibraltar ma sporą populację pracowników z innych krajów, np. z Hiszpanii. Zaszepione zostały wszystkie osoby powyżej 16 roku życia, niezależnie od ich statusu<font><br>
[Źródło](https://www.news.com.au/world/coronavirus/global/most-vaxxed-country-jabbed-119-per-cent-of-its-eligible-population/news-story/28dbeed5fbb5a821cdf24f4a25036280)<br>
<font size="4">Niestety z uwagi na duży wzrost przypadków w tym kraju rząd przewiduje odwołanie wszystkich oficjalnych uroczystości związanych ze Świętami Bożego narodzenia<font><br>
[Źródło](https://www.standard.co.uk/news/world/christmas-cancelled-gibraltar-vaccinations-b966816.html)<br>
   

![Gibraltar](https://www.gospanews.net/en/wp-content/uploads/2021/11/GIBRALTAR-CHRISTMAS.jpg)

In [None]:
plt.figure(figsize=(12,10), facecolor='grey')
calmap.calendarplot(pl_stats[pl_stats['date']>='2021-01-01']['new_vaccinations_smoothed'],fig_kws={'figsize': (12,9), 'facecolor': background_color}, yearlabels=False, fillcolor='whitesmoke', yearlabel_kws={'color':'black', 'fontsize':14})
plt.text(-1,10,'Vaccination Heatmap', fontsize=30, fontweight='bold')
plt.text(-1,8.3,'How the vaccination program is progressing in 2021 for PL.', fontsize=20, color='gray');
# plt.savefig('vac_heatmap.png', dpi=300, bbox_inches = "tight")

In [None]:
## which weekday more popular for vaccination

## get weekday from a datetime object
pl_stats['weekday'] = pl_stats.date.apply(lambda x: x.strftime("%A"))
weekday_vac = pl_stats.groupby('weekday').agg({'new_vaccinations': 'mean'})
weekday_vac['order'] = [4,0,5,6,3,1,2]
weekday_vac = weekday_vac.reset_index().set_index('order').sort_values('order')
weekday_vac
daily_max = weekday_vac[weekday_vac['new_vaccinations']==weekday_vac['new_vaccinations'].max()]

# form the figure
fig, ax = plt.subplots(figsize=(12,6))
ax.hlines(y=weekday_vac.weekday[::-1], xmin=0, xmax=weekday_vac['new_vaccinations'][::-1], color='grey', linewidth=3, alpha=0.4)
ax.scatter(y=weekday_vac.weekday[::-1], x=weekday_vac['new_vaccinations'][::-1], s=200, color=dark_pl)
ax.scatter(y=daily_max.weekday, x=daily_max['new_vaccinations'], s=900, color=pl)
plt.yticks(fontname = "serif",fontsize=12)
plt.xticks(fontname = "serif",fontsize=12)

## xlabel & adjust position
ax.set_xlabel('Avg. Daily Dose',fontfamily='serif', fontsize=12)
ax.xaxis.set_label_coords(0.45, -0.055)

## highest
ax.annotate(f'{int(round(daily_max["new_vaccinations"],-3)/1000)}K', xy=(daily_max['new_vaccinations'],daily_max.weekday),
            color='white', fontsize=10, fontfamily='serif', va='center', ha='center', zorder=20)

## title and subtitle and date
fig.text(0.035,1.05,'The best day for a vaccine in PL?',fontsize=40,fontweight='bold')
fig.text(0.035,0.93,f'You can expect a very crowded vaccination centre \nin the middle of the week.',color='gray',fontsize=20)
fig.text(0.035,0.01,f'as on {data_date}',color='gray',fontsize=13, style='italic')

# Removing axis and spines
for s in ['top', 'right', 'bottom', 'left']:
    ax.spines[s].set_visible(False)   
    
## remove ticks on x and y axis
ax.tick_params(axis=u'both', which=u'both',length=0)
    
fig.set_facecolor(background_color)
ax.set_facecolor(background_color)
# plt.savefig('best_day.png', dpi=300,bbox_inches = "tight")

In [None]:
eu_vac = vaccinations_per_manu[vaccinations_per_manu['location']=='European Union']
us_vac = vaccinations_per_manu[vaccinations_per_manu['location']=='United States']

In [None]:
fig = px.line(eu_vac, x="date", y="total_vaccinations", color="vaccine", custom_data=['vaccine'],
              labels={"total_vaccinations":"Total vaccinations", "date":"Date"}, 
              color_discrete_sequence=px.colors.sequential.ice, width=900, height=500)
fig.update_traces(mode="lines", hovertemplate='%{y:,}<br><extra>%{customdata[0]}</extra>')
fig.update_layout(hovermode="x")
fig.update_layout(
    title_text='<b>Vaccination progress in EU,  by manufacturer<b>',
    title_x = 0.5,
    titlefont={'size': 30}, 
    title_font_color='black',
    paper_bgcolor=background_color,
    showlegend=False,
    plot_bgcolor='white')
fig.show(renderer='notebook')

In [None]:
fig = px.line(us_vac, x="date", y="total_vaccinations", color="vaccine", custom_data=['vaccine'],
              labels={"total_vaccinations":"Total vaccinations", "date":"Date"}, 
              color_discrete_sequence=px.colors.sequential.ice, width=900, height=500)
fig.update_traces(mode="lines", hovertemplate='%{y:,}<br><extra>%{customdata[0]}</extra>')
fig.update_layout(hovermode="x")
fig.update_layout(
    title_text='<b>Vaccination progress in USA, by manufacturer<b>',
#     title_font_family='serif',
    title_x = 0.5,
    titlefont={'size': 30},
    title_font_color='black',
    paper_bgcolor=background_color,
    showlegend=False,
    plot_bgcolor='white')
fig.show(renderer='notebook')

# 5. Google Trends a koronawirus

In [None]:
from pytrends.request import TrendReq

In [None]:
keywords = ['covid','SARS-CoV-2','pandemia COVID-19','objawy','utrata wechu','utrata smaku','dusznosci','kwarantanna','szczepionka na COVID-19','szczepionka','koronawirus','l4','zwolnienie','chore dziecko','zwolnienie na dziecko','przypadki','wakacje koronawirus','wakacje bez testow']

In [None]:
pytrend = TrendReq(hl='en-GB', tz=360)
pytrend.build_payload(
    kw_list=[keywords[0]],
    cat=0,
    timeframe='2020-03-15 2021-11-30',
    geo='PL')
df_result = pd.DataFrame(pytrend.interest_over_time()).drop(columns='isPartial')
for keyword in keywords[1:]:
    print(f'Sprawdzam w Google Trends "{keyword}"" dla dat miedzy 2020-03-15 a 2021-11-30')
    pytrend.build_payload(
         kw_list=[keyword],
         cat=0,
         timeframe='2020-03-15 2021-11-30',
         geo='PL')
    df_ot = pd.DataFrame(pytrend.interest_over_time()).drop(columns='isPartial')
    df_result = pd.merge(df_result, df_ot, left_index=True, right_index=True)
    

In [None]:
df_result.to_csv('google_trends_new.csv')
# df_results = pd.read_csv('google_trends_new.csv') #in case google trends is not able to connect
df_result.head()

In [None]:
pl_trends_new = pd.merge(pl_stats, df_result, left_index=True, right_index=True)

In [None]:
# Create figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

fig.add_trace(go.Scatter(x=pl_trends_new.index, y=pl_trends_new['new_cases'], name="New cases",
                    line_shape='linear', line=dict(color="navy")))
fig.add_trace(go.Scatter(x=pl_trends_new.index, y=pl_trends_new['covid'], name="Search words: Covid",
                    line_shape='linear', line=dict(color="darkgray")), secondary_y=True)

fig.update_traces(mode="lines", hovertemplate='%{y:,}<br>')
fig.update_layout(hovermode="x")
fig.update_layout(
    title_text='<b>New cases vs. covid keyword<b>',
    title_x = 0.5,
    titlefont={'size': 30},
    title_font_color='black',
    paper_bgcolor=background_color,
    showlegend=False,
    plot_bgcolor='white')

fig.show(renderer='notebook')

In [None]:
# Create figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

fig.add_trace(go.Scatter(x=pl_trends_new.index, y=pl_trends_new['reproduction_rate'], name="Reproduction rate",
                    line_shape='linear', line=dict(color="navy")))
fig.add_trace(go.Scatter(x=pl_trends_new.index, y=pl_trends_new['objawy'], name="Search words: Objawy",
                    line_shape='linear', line=dict(color="darkgray")), secondary_y=True)
fig.update_traces(mode="lines", hovertemplate='%{y:,}<br>')
fig.update_layout(hovermode="x")
fig.update_layout(
    title_text='<b>Reproduction rate vs. objawy keyword<b>',
    title_x = 0.5,
    titlefont={'size': 30},
    title_font_color='black',
    paper_bgcolor=background_color,
    showlegend=False,
    plot_bgcolor='white')

fig.show(renderer='notebook')

In [None]:
fig = make_subplots(specs=[[{"secondary_y": True}]])

fig.add_trace(go.Scatter(x=pl_trends_new.index, y=pl_trends_new['new_vaccinations_smoothed'], name="New vaccinations smoothed",
                    line_shape='linear', line=dict(color="navy")))
fig.add_trace(go.Scatter(x=pl_trends_new.index, y=pl_trends_new['wakacje koronawirus'], name="Search words: Wakacje koronawirus",
                    line_shape='linear', line=dict(color="darkgray")), secondary_y=True)
fig.update_traces(mode="lines", hovertemplate='%{y:,}<br>')
fig.update_layout(hovermode="x")
fig.update_layout(
    title_text='<b>New vaccinations vs. wakacje koronawirus keyword<b>',
    title_x = 0.5,
    titlefont={'size': 30},
    title_font_color='black',
    paper_bgcolor=background_color,
    showlegend=False,
    plot_bgcolor='white')

fig.show(renderer='notebook')

# 6. Koronawirus a bogactwo i szczęście narodów :)

In [None]:
world_data_latest['death_rate'] = world_data_latest['total_deaths'] / world_data_latest['total_cases']

vaccinations_data_latest = vaccinations_data[vaccinations_data['date']=='2021-11-26']

total_df = vaccinations_data_latest.merge(world_data_latest, on = 'iso_code', suffixes = ('_vacc', '_world'))

total_df['log_gdp_per_capita'] = np.log(total_df['gdp_per_capita'])

In [None]:
fig  = px.scatter(total_df, y='people_vaccinated_per_hundred_vacc', x='log_gdp_per_capita', color='continent', hover_data=['location'],
          labels={"people_vaccinated_per_hundred_vacc":"People vaccinated per hundred", 
                                                   "log_gdp_per_capita":"Log GDP per Capita", "continent":"Continent", "location":"Location"})
fig.update_layout(
    title_text='<b>People vaccinated per hundred vs. Log GDP per Capita<b>',
    title_x = 0.5,
    titlefont={'size': 30},
    title_font_color='black',
    paper_bgcolor=background_color,
#     showlegend=False,
    plot_bgcolor='white')
fig.show(renderer='notebook') 

In [None]:
fig = px.scatter(total_df, y='people_vaccinated_per_hundred_vacc', x='human_development_index', color='continent', hover_data=['location'],
          labels={"people_vaccinated_per_hundred_vacc":"People vaccinated per hundred", 
                                                   "human_development_index":"Human development Index", "continent":"Continent", "location":"Location"})
fig.update_layout(
    title_text='<b>People vaccinated per hundred vs. Human development index<b>',
    title_x = 0.5,
    titlefont={'size': 25},
    title_font_color='black',
    paper_bgcolor=background_color,
#     showlegend=False,
    plot_bgcolor='white')
fig.show(renderer='notebook') 

In [None]:
fig = px.scatter(total_df, y='total_deaths_per_million', x='median_age', color='continent', hover_data=['location'],
                labels={"total_deaths_per_million":"Total Deaths per Million", 
                                                   "median_age":"Median Age", "continent":"Continent", "location":"Location"})
fig.update_layout(
    title_text='<b>Total deaths per mln vs. Median age<b>',
    title_x = 0.5,
    titlefont={'size': 30},
    title_font_color='black',
    paper_bgcolor=background_color,
#     showlegend=False,
    plot_bgcolor='white')
fig.show(renderer='notebook') 

In [None]:
# Extending the dataset with World Happiness Report 2019
total_df_ext = total_df.merge(whr_2019, left_on = 'location', right_on = 'Country')

In [None]:
fig = px.scatter(total_df_ext, y='people_vaccinated_per_hundred_vacc', x='Trust (Government Corruption)', color='continent', hover_data=['location'],
          labels={"people_vaccinated_per_hundred_vacc":"People vaccinated per hundred", "continent":"Continent", "location":"Location"})
fig.update_layout(
    title_text='<b>People vaccinated per hundred vs. Trust to government<b>',
    title_x = 0.5,
    titlefont={'size': 30},
    title_font_color='black',
    paper_bgcolor=background_color,
#     showlegend=False,
    plot_bgcolor='white')
fig.show(renderer='notebook') 

In [None]:
fig, ax = plt.subplots(figsize=(15,7))
sns.regplot(x="Happiness Score", y="people_vaccinated_per_hundred_vacc", 
            data = total_df_ext,
           scatter_kws={"color": "darkgray"}, line_kws={"color": "navy"}).set_title('People vaccinated per hundred vs. Hapiness score',
                                                                                                       fontsize=30)
ax.set_ylabel('People vaccinated per hundred')
plt.show()

In [None]:
fig, ax = plt.subplots(figsize=(15,7))
sns.regplot(x="Economy (GDP per Capita)", y="Happiness Score", data = total_df_ext,
           scatter_kws={"color": "darkgray"}, line_kws={"color": "navy"}).set_title('Hapiness score vs. GDP per capita',
                                                                                                       fontsize=30)

plt.show()

# 7. Modelowanie i prognoza nowych zachorowań

In [None]:
## form the figure
fig, ax = plt.subplots(figsize=(12,9), facecolor=background_color)
ax.plot(daily_case.date, daily_case.new_cases, color='navy', alpha=0.6)

ax.set_xlabel('Date')
ax.set_ylabel('Infection')
ax.legend(facecolor=background_color, framealpha=0, loc='upper right', bbox_to_anchor=(0.95, 1))


## title and subtitle and date
fig.text(0.055,1.02,'New cases in Poland',fontsize=40,fontweight='bold')

# Removing axis and spines
for s in ['top', 'right', 'bottom', 'left']:
    ax.spines[s].set_visible(False) 

# remove y ticks
ax.tick_params(axis=u'both', which=u'both',length=0)

ax.set_facecolor(background_color)

plt.show()


In [None]:
# Create a series with new cases in Poland
pl_series_new_cases = pl_stats['new_cases']

pl_series_new_cases.dropna(inplace = True)

pl_series_new_cases = pl_series_new_cases.reset_index()

In [None]:
pl_series_new_cases['date_orig'] = pl_series_new_cases['date']

In [None]:
pl_series_new_cases['date'] = pl_series_new_cases['date'].map(dt.datetime.toordinal)
pl_series_new_cases['date'] = pl_series_new_cases['date'] - pl_series_new_cases['date'].loc[0]

## SARIMA model - long forecast (30 day)
#### SARIMA = Seasonal Autoregressive Integrated Moving Average

In [None]:
train_ml = pl_series_new_cases.iloc[:(pl_series_new_cases.shape[0] - 30)]
valid_ml = pl_series_new_cases.iloc[(pl_series_new_cases.shape[0] - 30):]

In [None]:
model_sarima= auto_arima(train_ml["new_cases"],trace=True, error_action='ignore', start_p=0,start_q=0,max_p=3,max_q=3, m = 7,
                   suppress_warnings=True,stepwise=False,seasonal=True)
model_sarima.fit(train_ml["new_cases"])

In [None]:
prediction_sarima=model_sarima.predict(len(valid_ml))
print("Root Mean Square Error for 30-day SARIMA Model: ",np.sqrt(mean_squared_error(valid_ml['new_cases'],prediction_sarima)))

In [None]:
fig=go.Figure()
fig.add_trace(go.Scatter(x=train_ml['date_orig'], y=train_ml["new_cases"],
                    mode='lines',name="Train Data for Confirmed Cases", line=dict(color="navy")))
fig.add_trace(go.Scatter(x=valid_ml['date_orig'], y=valid_ml['new_cases'],
                     mode='lines',name="Validation Data for Confirmed Cases",))
fig.add_trace(go.Scatter(x=valid_ml['date_orig'], y=prediction_sarima,
                     mode='lines',name="Prediction of Confirmed Cases",))
fig.update_layout(xaxis_title="Date",yaxis_title="Confirmed Cases",legend=dict(x=0,y=1,traceorder="normal"))
fig.update_layout(
    title_text='<b>Confirmed Cases SARIMA Model 30-day Prediction<b>',
    title_x = 0.5,
    titlefont={'size': 25},
    title_font_color='black',
    paper_bgcolor=background_color,
    plot_bgcolor='white')
fig.show(renderer='notebook')

## SARIMA model - short forecast (7 day)

In [None]:
train_ml=pl_series_new_cases.iloc[:(pl_series_new_cases.shape[0] - 7)]
valid_ml=pl_series_new_cases.iloc[(pl_series_new_cases.shape[0] - 7):]

In [None]:
model_sarima= auto_arima(train_ml["new_cases"],trace=True, error_action='ignore', start_p=0,start_q=0,max_p=3,max_q=3, m = 7,
                   suppress_warnings=True,stepwise=False,seasonal=True)
model_sarima.fit(train_ml["new_cases"])

In [None]:
prediction_sarima=model_sarima.predict(len(valid_ml))
print("Root Mean Square Error for 7-day SARIMA Model: ",np.sqrt(mean_squared_error(valid_ml['new_cases'],prediction_sarima)))

In [None]:
fig=go.Figure()
fig.add_trace(go.Scatter(x=train_ml['date_orig'], y=train_ml["new_cases"],
                    mode='lines',name="Train Data for Confirmed Cases", line=dict(color="navy")))
fig.add_trace(go.Scatter(x=valid_ml['date_orig'], y=valid_ml['new_cases'],
                     mode='lines',name="Validation Data for Confirmed Cases",))
fig.add_trace(go.Scatter(x=valid_ml['date_orig'], y=prediction_sarima,
                     mode='lines',name="Prediction of Confirmed Cases",))
fig.update_layout(xaxis_title="Date",yaxis_title="Confirmed Cases",legend=dict(x=0,y=1,traceorder="normal"))
fig.update_layout(
    title_text='<b>Confirmed Cases SARIMA Model 7-day Prediction<b>',
    title_x = 0.5,
    titlefont={'size': 25},
    title_font_color='black',
    paper_bgcolor=background_color,
    plot_bgcolor='white')
fig.show(renderer='notebook')

# 8. Q&A

![Thank You](https://tse3.mm.bing.net/th?id=OIP.7p-gmqQqDuoFYpQze14AOQHaDu&pid=Api)

# 9. Z czego korzystaliśmy

https://www.kaggle.com/sandeep2812/covid19-case-study-dailyupdated-hopking-university<br>
https://www.kaggle.com/jaimebecerraguerrero/is-a-vaccinated-country-a-happy-one-data-analysis<br>
https://www.kaggle.com/pranjalverma08/covid-19-in-depth-analysis<br>
https://www.kaggle.com/jhnyc3/hong-kong-covid-19-data-visualization<br>
https://www.kaggle.com/imdevskp/covid-19-analysis-visualization-comparisons#Bubble-Plot<br>
https://www.kaggle.com/neelkudu28/covid-19-visualizations-predictions-forecasting/notebook<br>
https://www.kaggle.com/jaimebecerraguerrero/is-a-vaccinated-country-a-happy-one-data-analysis<br>
https://plotly.com/<br>
https://matplotlib.org/<br>
https://trends.google.com/trends/