In [None]:
import plotly.express as px
import plotly.graph_objects as go
import plotly.figure_factory as ff
from plotly.subplots import make_subplots
from plotly import subplots
import plotly.figure_factory as ff
import matplotlib.pyplot as plt

from pandas_profiling import ProfileReport
import seaborn as sns
from sklearn import metrics
from scipy import stats

from copy import deepcopy

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
# Defining all our palette colours.
primary_blue = "#496595"
primary_blue2 = "#85a1c1"
primary_blue3 = "#3f4d63"
primary_grey = "#c6ccd8"
primary_grey2 = "#696969"
primary_black = "#202022"
primary_bgcolor = "#f4f0ea"

primary_green = px.colors.qualitative.Plotly[2]

In [None]:
df = pd.read_csv('../input/covid-world-vaccination-progress/country_vaccinations.csv')
df.head()

In [None]:
#Unique dates
print(df.date.unique())
print(df.date.nunique())

In [None]:
print(df.country.unique())
print(df.country.nunique())

In [None]:
np.around(df.describe())

In [None]:
df.dtypes

In [None]:
df.isna().sum()

In [None]:
df.shape

In [None]:
report = ProfileReport(df)

In [None]:
report

<h2>Data cleaning

In [None]:
df[df['iso_code'].isnull()]

In [None]:
df.columns

In [None]:
#As we already have daily_vaccinations, with less missing values
df = df.drop('daily_vaccinations_raw', axis=1)

In [None]:
df.date

In [None]:
df['date'] = pd.to_datetime(df['date'])
df = df.sort_values('date', ascending=True)

In [None]:
df.dtypes

In [None]:
df['date'] = df['date'].dt.strftime('%Y-%m-%d')

In [None]:
df['date']

In [None]:
uniques = df['date'].unique()
uniques

In [None]:
#df.loc - Access a group of rows and columns by label(s) or a boolean array.
#Use df.loc[len(df)] - to add a new row at the end of the dataframe

In [None]:
for iso_code in df['iso_code'].unique():
    for inc_date in uniques:
        if df.loc[df['iso_code'] == iso_code, 'date'].str.contains(inc_date).any():
            continue
        else:
            df.loc[len(df)] = [None, iso_code, inc_date] + 11 * [None]

In [None]:
df['date'] = pd.to_datetime(df['date'])
df = df.sort_values('date', ascending=True)

df['date'] = df['date'].dt.strftime('%m-%d-%Y')

df.head()

In [None]:
[None, iso_code, inc_date] + 11 * [None]

<h2>Data visualizations - Country

In [None]:
plt.rcParams['figure.dpi'] = 300
fig = plt.figure(figsize=(5, 0.8), facecolor='#f6f5f5')
gs = fig.add_gridspec(1, 1)
gs.update(wspace=0, hspace=0)

background_color = "#f6f5f5"

ax0 = fig.add_subplot(gs[0, 0])
ax0.set_facecolor(primary_bgcolor)
for s in ["top","right", 'left', 'bottom']:
    ax0.spines[s].set_visible(False)
ax0.set_xticks([])
ax0.set_yticks([])

ax0.grid(which='major', axis='y', zorder=0, color='#EEEEEE')
ax0.text(-0.12, 0.8, 'General Overview', color=primary_black, fontsize=8, ha='left', weight='bold', va='bottom')
ax0.text(-0.12, 0.79, 'A quick glance of world vaccination progress', color='#292929', fontsize=6, ha='left', va='top')

ax0.text(0, 0, '150', color=primary_blue, fontsize=20, ha='center', weight='bold', va='bottom')
ax0.text(0, 0, 'country with\nVaccination Program', color=primary_grey2, fontsize=6, ha='center', va='top', weight='bold')

ax0.text(0.2, 0, '10', color=primary_blue, fontsize=20, ha='center', weight='bold', va='bottom')
ax0.text(0.2, 0, 'vaccines\nused', color=primary_grey2, fontsize=6, ha='center', va='top', weight='bold')

ax0.text(0.4, 0, '475', color=primary_blue, fontsize=20, ha='center', weight='bold', va='bottom')
ax0.text(0.4, 0, 'million of\ntotal vaccinations', color=primary_grey2, fontsize=6, ha='center', va='top', weight='bold')

ax0.text(0.65, 0, '269', color=primary_blue, fontsize=20, ha='center', weight='bold', va='bottom')
ax0.text(0.65, 0, 'million of\npeople vaccinated', color=primary_grey2, fontsize=6, ha='center', va='top', weight='bold')

ax0.text(0.9, 0, '103', color=primary_blue, fontsize=20, ha='center', weight='bold', va='bottom')
ax0.text(0.9, 0, 'million of people\nfully vaccinated', color=primary_grey2, fontsize=6, ha='center', va='top', weight='bold')

plt.show()

In [None]:
df_copy = df.copy()

In [None]:
df_copy.isna().sum()

In [None]:
for iso_code in df_copy['iso_code'].unique():
    df_copy.loc[df_copy['iso_code'] == iso_code, :] = df_copy.loc[df_copy['iso_code'] == iso_code, :].fillna(method='ffill').fillna(0)

In [None]:
df_copy.isna().sum()

In [None]:
df_copy.head()

In [None]:
fig = px.choropleth(
    df_copy,                         
    locations="iso_code",         
    color="total_vaccinations",              
    hover_name="country",           
    animation_frame="date",
    color_continuous_scale= 'viridis',
    projection="natural earth",      
    range_color=[0,5000000],
    title='<span style="font-size:36px; font-family:Times New Roman">Number of vaccinations per country</span>',
)      
fig.show() 

In [None]:
fig = px.choropleth(
    df_copy,                   
    locations="iso_code",         
    color="daily_vaccinations",    
    hover_name="country",          
    animation_frame="date",
    color_continuous_scale= 'viridis',
    projection="natural earth",   
    range_color=[0,5000000],
    title='<span style="font-size:36px; font-family:Times New Roman">Daily vaccinations per country</span>',
) 
fig.show() 

<h2>Top vaccine laboratories

In [None]:
dff = df.copy()
dff.columns

In [None]:
dff.head()

In [None]:
dff = dff.dropna(subset=['vaccines'])
dff = dff.groupby(['iso_code', 'vaccines']).max()
dff

In [None]:
dff = dff.reset_index()
dff['vaccine_split'] = dff['vaccines'].apply(lambda x: [w.strip() for w in x.split(',')])
dff.head()

In [None]:
#Use pd.dummies if we have one categorical value in a cell of a column

In [None]:
dff.shape

In [None]:
from sklearn.preprocessing import MultiLabelBinarizer
one_hot = MultiLabelBinarizer()
data = one_hot.fit_transform(dff['vaccine_split'])
vac_names = one_hot.classes_
vac_countries=dff['country']

final_vac_df = pd.DataFrame(data=data, columns=vac_names, index=vac_countries)
final_vac_df = final_vac_df.reset_index()
final_vac_df.head()

In [None]:
final_vac_df.shape

In [None]:
vac_names

In [None]:
df_country = final_vac_df[vac_names].sum(axis=0).sort_values()
df_country

In [None]:
colors =  [primary_grey]*4 + [primary_blue2]*4 + [primary_blue]*2 
fig = go.Figure(go.Bar(
                x = df_country.values,
                y = df_country.index,
                orientation = 'h'))
fig.update_traces(
                marker_color = colors,
                marker_line_color = primary_black,
                marker_line_width = 1.5,
                opacity = 0.6)
fig.update_layout(
    title='Top vaccines distributed')

<h2>Country wise

In [None]:
df_regions = pd.read_csv('/kaggle/input/countries-iso-codes-continent-flags-url/countries_continents_codes_flags_url.csv')
df_regions.head()

In [None]:
df_merge = df.merge(df_regions[['alpha-3', 'region', 'image_url']], left_on='iso_code', right_on='alpha-3')
world_summary_df = pd.read_csv('/kaggle/input/covid19-global-dataset/worldometer_coronavirus_summary_data.csv')
full_df = df_merge.merge(world_summary_df, on='country', how='left')
full_df.head()

In [None]:
full_df.columns

In [None]:
full_df['total_deaths_ratio'] = full_df['total_deaths'] / full_df['population']
full_df['total_confirmed_ratio'] = full_df['total_confirmed'] / full_df['population']
full_df['total_recovered_ratio'] = full_df['total_recovered'] / full_df['population']

world_summary_df['total_deaths_ratio'] = world_summary_df['total_deaths'] / world_summary_df['population']
world_summary_df['total_confirmed_ratio'] = world_summary_df['total_confirmed'] / world_summary_df['population']
world_summary_df['total_recovered_ratio'] = world_summary_df['total_recovered'] / world_summary_df['population']

In [None]:
def get_multi_line_title(title:str, subtitle:str):
    return f'<span style="font-size:32px; font-family:Times New Roman">{title}<br><sub>{subtitle}</sub></span>'

So xanchor and yanchor can be thought of as the place on an image where x and y refer to. In your annotations you have the options x, y, xanchor, yanchor, and so on. For example, let’s say you set your variables to

xanchor='left'
x=2
This means that the left side of your image will be positioned at x=2.If xnachor was set to middle, then the middle of the image would be at x=2.

By default, text annotations have xref and yref set to "x" and "y", respectively, meaning that their x/y coordinates are with respect to the axes of the plot. This means that panning the plot will cause the annotations to move. Setting xref and/or yref to "paper" will cause the x and y attributes to be interpreted in paper coordinates.

In [None]:
def bar_plot(data, xcolumn, ycolumn, title, colors, ylabel="Count", n=None):
        hovertemplate ='<br><b>%{x}</b>'+f'<br><b>{ylabel}: </b>'+'%{y}<br><extra></extra>'    
        #Sort according to y and drop any missing y-values
        data = data.sort_values(ycolumn, ascending=False).dropna(subset=[ycolumn])
        if n is not None:
            data = data.iloc[:n] #first n values
        else:
            n = ""
        fig = go.Figure(go.Bar(
        hoverinfo = 'skip',
        x = data[xcolumn],
        y = data[ycolumn],
        hovertemplate = hovertemplate,
        marker = dict(
            color = data[ycolumn],
            colorscale=colors)))
        
        max_y_val = data[ycolumn].max()
        for country, flag_url, ppl_vac in zip(data[xcolumn], data['image_url'], data[ycolumn]):
            if not flag_url or not isinstance(flag_url, str):
                continue
            fig.add_layout_image(
            dict(
                source=flag_url,
                x=country,
                y = ppl_vac + 0.05 * max_y_val,
                sizex=0.5,
                sizey=0.08*max_y_val,
                xanchor = "center",
                yanchor="bottom",
                sizing="stretch",
                xref='x',
                yref='y'))
            
        fig.update_yaxes(range=[0, max_y_val + 0.15*max_y_val])
        
        fig.update_layout(
        title=title,
        xaxis_title=f"Top {n} {xcolumn.title()}",
        yaxis_title=ylabel,
        plot_bgcolor='rgba(0,0,0,0)',
        hovermode="x",
        )

        fig.show()

In [None]:
d_ff = full_df.sort_values('people_vaccinated', ascending=False).drop_duplicates(subset=['country'], keep='first', ignore_index=True)
d_ff.head()

In [None]:
title = get_multi_line_title("People Vaccinated", "Individuals who received the first dose of the vaccine")
bar_plot(d_ff, 'country', "people_vaccinated", title, "Blugrn", n=5)

In [None]:
tdf = full_df.copy()
tdf = tdf.sort_values('people_vaccinated_per_hundred', ascending=False).\
    drop_duplicates(subset=['country'], keep='first', ignore_index=True)

In [None]:
title = get_multi_line_title("People Vaccinated per Hundred", "Percent of individuals who received the first dose of the vaccine")
bar_plot(tdf, 'country', "people_vaccinated_per_hundred", title, "Blugrn", n=10, ylabel='Percent')

In [None]:
tdf = full_df.copy()
tdf = tdf.sort_values('total_vaccinations', ascending=False).\
    drop_duplicates(subset=['country'], keep='first', ignore_index=True)
title = get_multi_line_title("Total Vaccinations", "Total number of vaccinations by country")
bar_plot(tdf, 'country', "total_vaccinations", title, "Purp", n=10)

In [None]:
tdf = full_df.copy()
tdf = tdf.sort_values('total_vaccinations_per_hundred', ascending=False).\
    drop_duplicates(subset=['country'], keep='first', ignore_index=True)
title = get_multi_line_title("Total Vaccinations per Hundred", "Ratio between vaccination number and total population up to the date in the country;")
bar_plot(tdf, 'country', "total_vaccinations_per_hundred", title, "Purp", n=10)

In [None]:
df_regions = pd.read_csv('/kaggle/input/countries-iso-codes-continent-flags-url/countries_continents_codes_flags_url.csv')
df_regions.head()

In [None]:
tdf = df.merge(df_regions[['alpha-3', 'region', 'image_url']], left_on='iso_code', right_on='alpha-3')
continents_tdf = tdf.sort_values('people_vaccinated_per_hundred', ascending=False).\
    drop_duplicates(subset=['country'], keep='first', ignore_index=True)

continents_tdf.head()

In [None]:
continents_tdf.region.unique()

In [None]:
continents = ['Europe', 'Oceania', 'Americas', 'Africa', 'Asia']
fig = go.Figure()

for continent in continents:
    temp_df = continents_tdf.query(f'region=="{continent}"')
    temp_df = temp_df.sort_values('people_vaccinated_per_hundred', ascending=False).iloc[:5]
    
    fig.add_trace(
        go.Bar(
                y=temp_df['people_vaccinated_per_hundred'],
                x=temp_df['country'],
                name=continent,
                marker={'color':temp_df['people_vaccinated_per_hundred'],'colorscale':'tealgrn'},
                visible=True if continent=='Europe' else False))
buttons=[]
for i, continent in enumerate(continents):
    buttons.append(dict(
    label=continent,
    method="update",
    args=[{"visible":[False]*i+[True]+[False]*(2-i+1)},
         {'title':f'Top 5 countries for {continent}'}]))
        
fig.update_layout(
    updatemenus=[
        dict(
        type="buttons",
        direction="right",
        active=0,
        x=1.57,
        y=1.2,
        buttons=buttons)
    ])

fig.update_layout(
title_text="Top 5 publishers per region",
xaxis_domain=[0.05, 1.0])

fig.show()

In [None]:
world_summary_df.head()

In [None]:
df_1 = world_summary_df.sort_values('total_deaths_ratio', ascending=False).drop_duplicates(subset=['country'], keep='first', ignore_index=True)
df_1.head()

In [None]:
df_1 = df_1.dropna(subset=['total_deaths_ratio'])

In [None]:
fig = px.scatter_geo(
    df_1,
    locations='country',
    color='continent',
    locationmode='country names', #What to determine on map
    hover_name='country',
    size='total_deaths_ratio'
)

fig.update_layout(
title_text='Deaths ratio by country',
legend_orientation='h',
geo=dict(
showframe=False,
showcoastlines=False,
projection_type='equirectangular'),
font=dict(
family='TimesNewRoman',
size=18,
color='Black'
)
)

In [None]:
df_2 = world_summary_df.sort_values('total_confirmed_ratio', ascending=False).drop_duplicates(subset=['country'], keep='first', ignore_index=True)
df_2.head()

In [None]:
df_2 = df_2.dropna(subset=['total_confirmed_ratio'])

In [None]:
fig = px.scatter_geo(
    df_1,
    locations='country',
    color='continent',
    locationmode='country names', 
    hover_name='country',
    size='total_confirmed_ratio'
)

fig.update_layout(
title_text='Total confirmed ratio by country<br><sub>Total confirmed cases divided by population</sub>',
legend_orientation='h',
geo=dict(
showframe=False,
showcoastlines=False,
projection_type='equirectangular'),
font=dict(
family='TimesNewRoman',
size=18,
color='Black'
)
)

<h2>World summaries
    

In [None]:
world_summary_df.head()

In [None]:
world_summary_df.describe()

In [None]:
corrl = world_summary_df.drop(columns=['country', 'continent'])

corrl = corrl.corr()
mask = np.triu(np.ones_like(corrl, dtype=np.bool))
corr1 = corrl.mask(mask)

In [None]:
corrl

In [None]:
mask

In [None]:
fig = ff.create_annotated_heatmap(
    z = corr1.to_numpy().round(2),
    x = list(corr1.index.values),
    y = list(corr1.columns.values),
    xgap=3,
    ygap=3,
    colorscale='blugrn',
    colorbar_thickness=30,
    colorbar_ticklen=3
)

fig.update_layout(
    title_text='<span style="font-size:32px; font-family:Times New Roman">Features Correlation Matrix</span>', 
    font_family="Serif",
    titlefont={'size': 24},
    width=800, height=700,
    xaxis={'side': 'bottom'},
    xaxis_showgrid=False,
    yaxis_showgrid=False,
    yaxis_autorange='reversed', 
    paper_bgcolor=primary_bgcolor,
    plot_bgcolor=primary_bgcolor,
    margin=dict(l=70, r=70, t=70, b=70, pad=1),
)
fig.update_xaxes(
    ticklabelposition="outside bottom",
)

fig.show()

<h2>Confirmed cases

In [None]:
regions_df = pd.read_csv('/kaggle/input/countries-iso-codes-continent-flags-url/countries_continents_codes_flags_url.csv')
full_df = df.merge(regions_df[['alpha-3', 'region', 'image_url']], left_on='iso_code', right_on='alpha-3')

full_df.head()

In [None]:
vac_df1 = full_df.groupby(
    ['region', 'date']).agg(
    {'people_vaccinated': 'sum', 'daily_vaccinations': 'sum', 'people_vaccinated_per_hundred': 'sum'})

In [None]:
vac_df1

In [None]:
vac_df1 = vac_df1.reset_index().sort_values('date')

In [None]:
vac_df1 = vac_df1.query('date> "01-03-2021" and date<"04-27-2021"')
vac_df1.head()

In [None]:
fig = go.Figure()
for region in vac_df1['region'].unique():
    fig.add_traces(go.Scatter(
    x=vac_df1.query(f'region=="{region}"')['date'],
    y=vac_df1.query(f'region=="{region}"')['people_vaccinated'],
    fill='tozeroy',
    mode='lines',
    name=region, #for legend
    ))
    
fig.update_layout(
    # Set the name of the map
    title_text='People vaccinated over time <br><sub>Total number of persons vaccinated between 03/01 and 27/04</sub>',
    font=dict(
       family='Serif',
       size=18, 
       color='black'
    )
)
fig.show()

In [None]:
fig = px.area(vac_df1, x='date', y='people_vaccinated_per_hundred', color='region')
fig.update_layout(
    # Set the name of the map
    title_text='People vaccinated per hundred over time',
    font=dict(
       family='Serif',
       size=18, 
       color='black'
    )
)
fig.show()

<h2>Mortality rates
   

In [None]:
covid_cum = pd.read_csv('../input/covid19-global-dataset/worldometer_coronavirus_summary_data.csv')
covid_cum.head()

In [None]:
covid_cum.country.unique()

In [None]:
countries= ['Spain', 'France', 'Germany', 'Turkey', 'UK', 'Italy']
europe_df = covid_cum[covid_cum['country'].isin(countries)].sort_values('total_deaths_per_1m_population')

fig = go.Figure(go.Bar(
x=europe_df['total_deaths_per_1m_population'],
y=europe_df['country'],
orientation='h',
marker={'color': europe_df['total_deaths_per_1m_population'], 'colorscale': 'tealgrn'})
               , layout=go.Layout(height=400, width=700))

fig.update_traces(
marker_line_color='rgb(76,12,123)',
marker_line_width=1,
opacity=0.8)

fig.update_layout(
    title_text="<span style='font-size:30px; font-family:Serif'>Mortality rate - Some European countries</span>",
    xaxis_title='Total deaths per 1m population',
)
fig.show()

In [None]:
countries= ['Spain', 'France', 'Germany', 'Turkey', 'UK', 'Italy']
europe_df = covid_cum[covid_cum['country'].isin(countries)].sort_values('total_cases_per_1m_population')

fig = go.Figure(go.Bar(
x=europe_df['total_cases_per_1m_population'],
y=europe_df['country'],
orientation='h',
marker={'color': europe_df['total_cases_per_1m_population'], 'colorscale': 'tealgrn'})
               , layout=go.Layout(height=400, width=700))

fig.update_traces(
marker_line_color='rgb(76,12,123)',
marker_line_width=1,
opacity=0.8)

fig.update_layout(
    title_text="<span style='font-size:30px; font-family:Serif'>Mortality rate - Some European countries</span>",
    xaxis_title='Total cases per 1m population',
)
fig.show()