In [None]:
#!pip install pandas matplotlib plotly==5.3.1

In [None]:
import pandas as pd
import math
from PIL import ImageColor
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import json
owid_covid_data = pd.read_csv('owid_covid_data_reduce.csv')
world_geo = json.load(open('countries.geojson'))
countries_coord = pd.read_csv('countries_codes_and_coordinates.csv')

In [None]:
owid_covid_data.head()
owid_covid_data = owid_covid_data[owid_covid_data['continent'].notna()]
countries_coord['Latitude (average)'] = countries_coord['Latitude (average)'].astype(float)
countries_coord['Longitude (average)'] = countries_coord['Longitude (average)'].astype(float)

In [None]:
observe_regions = ['Southern Europe','Western Europe','East Asia','South East Asia','North America','Australia and New Zealand','Middle East','South Asia']


# get 8 regions
owid_covid_data.set_index('region',inplace=True)
observe_regions_data = owid_covid_data.loc[observe_regions,:]
observe_regions_data.reset_index(inplace=True)
owid_covid_data.reset_index(inplace=True)

*Geographic map of people full vaccinated*

In [None]:

people_group_vaccinated = owid_covid_data[[
    'iso_code', 'people_fully_vaccinated_per_hundred','total_deaths']].groupby('iso_code').last()

people_group_vaccinated.reset_index(inplace=True)

people_group_vaccinated = people_group_vaccinated.fillna(0.0)




countries_coord.rename(columns={'Latitude (average)':'lat','Longitude (average)':'long',
'Alpha-3 code': 'iso_code'},inplace=True)

countries_coord['iso_code'] = countries_coord['iso_code'].str.strip(' ')

countries_coord_cut = countries_coord[['lat','long','iso_code']].groupby('iso_code').last()


countries_coord_cut.reset_index(inplace=True)
countries_coord_cut.set_index('iso_code',inplace=True)


def getcoord(x,type):
      try:
         return countries_coord_cut.loc[x,type]
      except:
         return 'NOCOOR'

people_group_vaccinated['lat'] = people_group_vaccinated['iso_code'].apply(lambda x : getcoord(x,'lat'))
people_group_vaccinated['long'] = people_group_vaccinated['iso_code'].apply(lambda x : getcoord(x,'long'))

people_group_vaccinated = people_group_vaccinated[people_group_vaccinated['lat'] != 'NOCOOR']


people_group_vaccinated['size'] = people_group_vaccinated['total_deaths'].apply(lambda x : 80 * x / people_group_vaccinated['total_deaths'].max())



fig = px.choropleth_mapbox(people_group_vaccinated, geojson=world_geo, locations='iso_code', color='people_fully_vaccinated_per_hundred',
                           color_continuous_scale="Viridis",
                           range_color=(
                               0, people_group_vaccinated['people_fully_vaccinated_per_hundred'].max()),
                           mapbox_style="carto-positron",
                           zoom=0.6, center={"lat": 40, "lon": 0},
                           featureidkey="properties.ISO_A3",
                           opacity=0.5,
                           labels={
                               'people_fully_vaccinated_per_hundred': 'People fully <br>vaccinated per 100'}
                           )

fig.add_scattermapbox(
    lat = people_group_vaccinated.lat,
    lon = people_group_vaccinated.long,
    mode = 'markers+text',
    text = people_group_vaccinated['iso_code'],
    marker_size= people_group_vaccinated['size'],
    marker_color='rgb(255, 51, 0)',
    opacity = 0.6
)


fig.update_layout(margin={"r": 0, "t": 40, "l": 0, "b": 0},
                  title=dict(text='Geographic map of people fully vaccinated and bubble deaths ratio'), height=550, width=850,
                  paper_bgcolor='rgba(0,0,0,0)',
                  plot_bgcolor='rgba(0,0,0,0)',
                  legend_title_font_color='#333333',
                  legend_font_color='#333333',
                  title_font_color='#333333',font_color='#333333')
fig.show()


fig.write_html("./elementHTML_export/geomap.html",
               include_plotlyjs=False, full_html=False)


*The bar plot comparing per hundred people who are fully and vaccinated throughout world regions  2/11/202*

In [None]:
# lấy max trong nước và lấy trung bình trong châu lục 2 lần group
vaccinated   = observe_regions_data[['region','location','people_fully_vaccinated_per_hundred','people_vaccinated_per_hundred','total_vaccinations_per_hundred']].groupby('location').last().groupby('region').mean()

vaccinated.reset_index(inplace=True)

region8vaccinated = vaccinated.sort_values('people_fully_vaccinated_per_hundred',ascending=False)

#doi ten cot
region8vaccinated.rename(columns={
    'people_fully_vaccinated_per_hundred': 'People fully vaccinated per hundred',
    'people_vaccinated_per_hundred': 'People vaccinated per hundred',
    'total_vaccinations_per_hundred' : 'Total vaccinations per hundred',
    'region':'Regions'
},inplace=True)

region8vaccinated_sns = pd.melt(region8vaccinated,id_vars=['Regions'],value_vars=['People fully vaccinated per hundred','People vaccinated per hundred','Total vaccinations per hundred']
,var_name='Criteria',value_name='Percent')


fig = px.bar(region8vaccinated_sns, x="Regions", y='Percent',
             color='Criteria',barmode='group',
             height=600,width=800)
fig.update_layout(title_text='The bar plot comparing per hundred people who are fully <br>     and vaccinated throughout world regions  2/11/2021',paper_bgcolor='rgba(0,0,0,0)',
plot_bgcolor='rgba(0,0,0,0)',yaxis=dict(gridcolor='#333333'),xaxis=dict(gridcolor='#333333'),
legend_title_font_color='#333333',title_font_color='#333333',legend_font_color='#333333',font_color='#333333')        
fig.show()
fig.write_html("./elementHTML_export/barchart.html",include_plotlyjs=False,full_html=False)

*The line plot shows the vaccinated speed per hundred of the regions from the beginning of 2020 to the end of 2021*


In [None]:
observe_regions_data['date'] = pd.to_datetime(observe_regions_data['date'])


region = observe_regions_data['region'].unique()

observe_regions_data['date'] = observe_regions_data['date'].dt.to_period('M')
observe_regions_data['date'] = observe_regions_data['date'].astype(str)


container = []
for er in region:
    df_erea = observe_regions_data[observe_regions_data['region'] == er]
    df_erea = df_erea[['date', 'total_vaccinations_per_hundred', 'new_deaths','new_cases']].groupby(
        'date').mean()
    df_erea['region'] = er
    df_erea.reset_index(inplace=True)
    container.append(df_erea)

area_time_sr = pd.concat(container, axis=0)


area_time_sr = observe_regions_data.groupby(['date', 'region']).mean()

area_time_sr.reset_index(inplace=True)

area_time_sr.rename(columns={
    'total_vaccinations_per_hundred': 'Total vaccinations per hundred',
    'date': 'Date',
    'region': 'Regions',
    'new_deaths': 'New Deaths',
    'new_cases': 'New Cases'
}, inplace=True)


fig = make_subplots(rows=3, cols=1)

idc = 0



for reg in ['']:
    df_ref = area_time_sr[area_time_sr['Regions'] == reg]

    df_ref['New Deaths'] = df_ref['New Deaths'].apply(lambda x : 100*x /df_ref['New Deaths'].max()) 
    df_ref['New Cases'] = df_ref['New Cases'].apply(lambda x : 100*x/ df_ref['New Cases'].max())

    fig.add_trace(go.Scatter(x=df_ref['Date'],
                             y=df_ref['Total vaccinations per hundred'],
                             marker=dict(
                                 color=px.colors.qualitative.Plotly[idc]),
                             legendgroup=reg,
                             name=reg,
                             
                             ),
                  row=1, col=1)
    fig.add_trace(go.Scatter(x=df_ref['Date'],
                             y=df_ref['New Deaths'],
                             marker=dict(
                                 color=px.colors.qualitative.Plotly[idc]),
                             showlegend=False,
                             legendgroup=reg,
                             name=reg
                             ),
                  row=2, col=1)
    fig.add_trace(go.Scatter(x=df_ref['Date'],
                             y=df_ref['New Cases'],
                             marker=dict(
        color=px.colors.qualitative.Plotly[idc]),
        showlegend=False,
        legendgroup=reg,
        name=reg
    ),
                 row=3, col=1)
    idc += 1


fig.update_layout(paper_bgcolor='rgba(0,0,0,0)',
                  plot_bgcolor='rgba(0,0,0,0)', 
                  width=850, height=650, 
                  title_text="Line chart of the number of vaccines injected per 100 people, the number <br>of deaths, the number of infections",
                  legend_title_font_color='#333333',title_font_color='#333333',font_color='#333333'
                )

fig.update_yaxes(title_text="Total vaccinations per hundred",gridcolor='#333333', row=1, col=1,title_font_color='#333333')
fig.update_yaxes(title_text="New Deaths/max",gridcolor='#333333', row=2, col=1,title_font_color='#333333')
fig.update_yaxes(title_text="New Cases/max",gridcolor='#333333', row=3, col=1,title_font_color='#333333')
fig.update_xaxes(gridcolor='#333333', row=1, col=1)
fig.update_xaxes(gridcolor='#333333', row=2, col=1)
fig.update_xaxes(gridcolor='#333333', row=3, col=1) 

fig.show()


fig.write_html("./elementHTML_export/linechart.html",
               include_plotlyjs=False, full_html=False)


*The polar plot comparing parameters between regions*


In [None]:

Ability8_base_location = observe_regions_data[['region','location','stringency_index'
,'human_development_index','gdp_per_capita','hospital_beds_per_thousand']].groupby('location').last()


Ability8_base_region = Ability8_base_location.groupby('region').mean()


# tẻ lệ gpd trê nuocs lớn nhất
Ability8_base_region['gdp_per_capita'] *= 100 / Ability8_base_region['gdp_per_capita'].max()
Ability8_base_region['stringency_index'] *= 100 / Ability8_base_region['stringency_index'].max()
Ability8_base_region['human_development_index'] *= 100 / Ability8_base_region['human_development_index'].max()
Ability8_base_region['hospital_beds_per_thousand'] *= 100 / Ability8_base_region['hospital_beds_per_thousand'].max()
# #  --------------------------------------------------------------------


Ability8_base_region.head(8)

figgo = go.Figure()
idc = 0


for er in Ability8_base_region.index:
      record = Ability8_base_region.loc[er]
      det = record.tolist()
      aColor = px.colors.qualitative.Plotly[idc]
      r,g,b = ImageColor.getcolor(aColor, "RGB")
      the = Ability8_base_region.columns.tolist()
      figgo.add_trace(go.Scatterpolar(
      r= det,
      theta= the,
      line = dict(color= aColor),
      fill='toself',
      fillcolor = "rgba({r}, {g}, {b}, 0.09)".format(r=r,g=g,b=b),
      name=er
      ))
      idc+=1

figgo.update_layout(
  polar=dict(
    radialaxis=dict(
      visible=True,
      range=[0, 100]
    )),
  showlegend=True,
  title= dict(text='The polar plot comparing parameters between regions'),
  height=600,width=820,
  paper_bgcolor='rgba(0,0,0,0)',
  plot_bgcolor='rgba(0,0,0,0)',
  yaxis=dict(gridcolor='#333333'),xaxis=dict(gridcolor='#333333')
  ,font_color='#333333'
)

figgo.show()

figgo.write_html("./elementHTML_export/radarchart.html",include_plotlyjs=False,full_html=False)


*The pie plot of the rate of population, covid cases, dead, vaccinated, between region*

In [None]:

fig = make_subplots(rows=2, cols=2, specs=[[{'type':'domain'}, {'type':'domain'}],[{'type':'domain'}, {'type':'domain'}]])


c4date = observe_regions_data[['region','location','population','total_cases','total_deaths','total_vaccinations']].groupby('location').last().groupby('region').sum()


labels = c4date.index

fig.add_trace(go.Pie(labels=labels, values=c4date.loc[c4date.index,'population'], name="Share Population"),
              1, 1)
fig.add_trace(go.Pie(labels=labels, values=c4date.loc[c4date.index,'total_cases'].tolist(), name="Share Cases"),
              1, 2)
fig.add_trace(go.Pie(labels=labels, values=c4date.loc[c4date.index,'total_deaths'].tolist(), name="Share Deaths"),
              2, 1)
fig.add_trace(go.Pie(labels=labels, values=c4date.loc[c4date.index,'total_vaccinations'].tolist(), name="Share Vaccinations"),
              2, 2)        

fig.update_traces(hole=.4, hoverinfo="label+percent+name")

fig.update_layout(
    title_text="The pie plot of the rate of population, covid cases, dead, vaccinations, among regions",
    margin={"r":5,"t":40,"l":5,"b":5},
    height= 550,
    width = 800,
    # Add annotations in the center of the donut pies.
    annotations=[dict(text='Population', x= 0.165, y=0.805, font_size=13, showarrow=False),
                 dict(text='Case', x=0.807, y=0.805, font_size=13, showarrow=False),
                 dict(text='Death',x=0.18, y= 0.19, font_size=13, showarrow=False),
                 dict(text='Vaccination', x=0.834, y= 0.19, font_size=13, showarrow=False)
                 ],
    paper_bgcolor='rgba(0,0,0,0)',
    plot_bgcolor='rgba(0,0,0,0)',font_color='#333333')
fig.show()

fig.write_html("./elementHTML_export/piechart.html",include_plotlyjs=False,full_html=False)

  *RELATION BETWEEN VACCINATION AND NEW CASE ,NEW DEATH*

In [None]:
# HEAT MAP people_vaccinated,people_fully_vaccinated,total_boosters,NEW CASE,NEW DEATH


relation5_el = observe_regions_data[['people_vaccinated','people_fully_vaccinated'
,'new_cases','new_deaths','location','date','stringency_index','new_vaccinations','hospital_beds_per_thousand']]

relation5_el= relation5_el.groupby('date').mean()

corr_relation5_el = relation5_el.corr()

corr_relation5_el.head()

fig = px.imshow(corr_relation5_el
, title="Heatmap checks the correlation between factors together",width=600)

fig.update_layout(paper_bgcolor='rgba(0,0,0,0)',
    plot_bgcolor='rgba(0,0,0,0)',font_color='#333333')

fig.show()

fig.write_html("./elementHTML_export/heatmap.html",include_plotlyjs=False,full_html=False)

*BOX PLOT NEW VACCINATIONS*

In [None]:

#use date above
erea = observe_regions_data['region'].unique()

container = []

for er in erea:
    df_erea = observe_regions_data[observe_regions_data['region'] == er].groupby('date').mean()
    df_erea['region'] = er
    df_erea.reset_index(inplace=True)
    df_erea = df_erea[['region','new_vaccinations','new_deaths','new_cases']]
    container.append(df_erea)

boxerea = pd.concat(container,axis=0)

boxerea = boxerea.dropna()

boxerea_melt = boxerea.melt(id_vars=['region'],value_vars=['new_vaccinations','new_deaths','new_cases'],var_name='Criteria',
value_name='count')

boxerea_melt['count'] = boxerea_melt['count'].apply(lambda x : math.log10(x) if x > 1 else 0)



boxerea_melt.rename(columns={'region':'Regions'},inplace=True)

fig = px.box(boxerea_melt, y="count",x = 'Criteria',color="Regions",height= 550,width=600,
 labels={   "count": "count log 10"})

fig.update_layout(title_text='The Box Plot of the number of new vaccines,new deaths <br> new cases according to the log10 of the regions',margin={"r":0,"t":40,"l":0,"b":0},paper_bgcolor='rgba(0,0,0,0)',
    plot_bgcolor='rgba(0,0,0,0)',yaxis=dict(gridcolor='#333333'),xaxis=dict(gridcolor='#333333'),width=750,font_color='#333333')
fig.show()

fig.write_html("./elementHTML_export/boxplot.html",include_plotlyjs=False,full_html=False)


# SHARE VACCINATIONS BY SUNBURTS

In [None]:
df_sunburst_plot = observe_regions_data[['region','location','total_vaccinations','total_deaths']].dropna().groupby('location').last()

df_sunburst_plot.reset_index(inplace=True)


df_sunburst_plot.rename(columns={'total_deaths':'Total deaths'},inplace=True)

fig = px.sunburst(df_sunburst_plot, path = ['region','location'], values ='total_vaccinations', hover_name = 'location', color = 'Total deaths', height = 550, width = 600
,color_continuous_scale='aggrnyl')

fig.update_layout(title_text='The sunburst plot shows the share of vaccines given <br>and deaths by regions as of November 2, 2021',
margin={"r":0,"t":100,"l":3,"b":3},paper_bgcolor='rgba(0,0,0,0)',
    plot_bgcolor='rgba(0,0,0,0)',font_color='#333333')

fig.show()
fig.write_html("./elementHTML_export/sunburst.html",include_plotlyjs=False,full_html=False)