In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))


/kaggle/input/covid-world-vaccination-progress/country_vaccinations_by_manufacturer.csv
/kaggle/input/covid-world-vaccination-progress/country_vaccinations.csv


In [2]:
df=pd.read_csv("/kaggle/input/covid-world-vaccination-progress/country_vaccinations_by_manufacturer.csv")
df.head()

Unnamed: 0,location,date,vaccine,total_vaccinations
0,Austria,2021-01-08,Johnson&Johnson,0
1,Austria,2021-01-08,Moderna,0
2,Austria,2021-01-08,Oxford/AstraZeneca,0
3,Austria,2021-01-08,Pfizer/BioNTech,31020
4,Austria,2021-01-15,Johnson&Johnson,0


#### Let's look at the vaccine progress in countries have most population at our data

In [3]:
df["date"]=pd.to_datetime(df["date"])
df1=df.groupby("vaccine")["total_vaccinations"].sum()
df_usa=df[df["location"]=="United States"]

In [4]:
fig = px.line(df_usa, x="date", y="total_vaccinations", color="vaccine",
              title="Vaccination Progress in USA",
              line_group="vaccine",width=1200, height=600)
fig.show()

In [5]:
df_germany=df[df["location"]=="Germany"]
fig = px.line(df_germany, x="date", y="total_vaccinations", color="vaccine",
              title="Vaccination Progress in Germany",
              line_group="vaccine",width=1200, height=600)
fig.show()

In [6]:
df_france=df[df["location"]=="France"]
fig = px.line(df_france, x="date", y="total_vaccinations", color="vaccine",
              title="Vaccination Progress in France",
              line_group="vaccine",width=1200, height=600)
fig.show()

In [7]:
df_italy=df[df["location"]=="Italy"]
fig = px.line(df_italy, x="date", y="total_vaccinations", color="vaccine",
              title="Vaccination Progress in Italy",
              line_group="vaccine",width=1200, height=600)
fig.show()

In [8]:
df_japan=df[df["location"]=="Japan"]
fig = px.line(df_japan, x="date", y="total_vaccinations", color="vaccine",
              title="Vaccination Progress in Japan",
              line_group="vaccine",width=1200, height=600)
fig.show()

In [9]:
df_spain=df[df["location"]=="Spain"]
fig = px.line(df_spain, x="date", y="total_vaccinations", color="vaccine",
              title="Vaccination Progress in Spain",
              line_group="vaccine",width=1200, height=600)
fig.show()

In [10]:
df=pd.read_csv("/kaggle/input/covid-world-vaccination-progress/country_vaccinations.csv")
df.head()

Unnamed: 0,country,iso_code,date,total_vaccinations,people_vaccinated,people_fully_vaccinated,daily_vaccinations_raw,daily_vaccinations,total_vaccinations_per_hundred,people_vaccinated_per_hundred,people_fully_vaccinated_per_hundred,daily_vaccinations_per_million,vaccines,source_name,source_website
0,Afghanistan,AFG,2021-02-22,0.0,0.0,,,,0.0,0.0,,,"Johnson&Johnson, Oxford/AstraZeneca, Pfizer/Bi...",World Health Organization,https://covid19.who.int/
1,Afghanistan,AFG,2021-02-23,,,,,1367.0,,,,35.0,"Johnson&Johnson, Oxford/AstraZeneca, Pfizer/Bi...",World Health Organization,https://covid19.who.int/
2,Afghanistan,AFG,2021-02-24,,,,,1367.0,,,,35.0,"Johnson&Johnson, Oxford/AstraZeneca, Pfizer/Bi...",World Health Organization,https://covid19.who.int/
3,Afghanistan,AFG,2021-02-25,,,,,1367.0,,,,35.0,"Johnson&Johnson, Oxford/AstraZeneca, Pfizer/Bi...",World Health Organization,https://covid19.who.int/
4,Afghanistan,AFG,2021-02-26,,,,,1367.0,,,,35.0,"Johnson&Johnson, Oxford/AstraZeneca, Pfizer/Bi...",World Health Organization,https://covid19.who.int/


In [11]:
df=df.fillna(0)

In [12]:
df1=df.groupby("country")["total_vaccinations"].max().reset_index().sort_values("total_vaccinations",ascending=False)[:20]
df1.head()

Unnamed: 0,country,total_vaccinations
40,China,1782525000.0
90,India,508664800.0
208,United States,351400900.0
27,Brazil,152989000.0
99,Japan,99651090.0


#### Top 20 countries with the most total vaccinations

In [13]:
fig=px.bar(df1, x="country", y="total_vaccinations", color="total_vaccinations")
fig.show()

#### Top 20 countries to have at least one dose of vaccine

In [14]:
df1=df.groupby("country")["people_vaccinated"].max().reset_index().sort_values("people_vaccinated", ascending=False)[:20]
fig=px.bar(df1, x="country", y="people_vaccinated", color="people_vaccinated")
fig.show()

#### Top 20 Countries with the most double dose vaccines

In [15]:
df1=df.groupby("country")["people_fully_vaccinated"].max().reset_index().sort_values("people_fully_vaccinated", ascending=False)[:20]
fig=px.bar(df1, x="country", y="people_fully_vaccinated", color="people_fully_vaccinated")
fig.show()

#### Most successful countries based on vaccination rates

In [16]:
df1=df.groupby("country")["total_vaccinations_per_hundred","people_vaccinated_per_hundred","people_fully_vaccinated_per_hundred"].max().reset_index()

fig=px.bar(df1.sort_values("people_vaccinated_per_hundred",ascending=False)[:40],x="country",y="people_vaccinated_per_hundred",color="people_vaccinated_per_hundred")
fig.show()
fig=px.bar(df1.sort_values("total_vaccinations_per_hundred",ascending=False)[:40],x="country",y="total_vaccinations_per_hundred",color="total_vaccinations_per_hundred")
fig.show()
fig=px.bar(df1.sort_values("people_fully_vaccinated_per_hundred",ascending=False)[:40],x="country",y="people_fully_vaccinated_per_hundred",color="people_fully_vaccinated_per_hundred")
fig.show()


Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.



#### Let's look at the same datas on the world map

In [17]:
df1=df.groupby("iso_code")["total_vaccinations","people_fully_vaccinated","people_vaccinated"].max().reset_index()
df1=df1.fillna(0)
fig = px.scatter_geo(df1, locations="iso_code", color="iso_code",
                     size="total_vaccinations",title="Total Vaccinations",
                     projection="mercator")
fig.show()

fig = px.scatter_geo(df1, locations="iso_code", color="iso_code",
                     size="people_fully_vaccinated",title="Fully Vaccinated People",
                     projection="mercator")
fig.show()

fig = px.scatter_geo(df1, locations="iso_code", color="iso_code",
                     size="people_vaccinated",title="People Vaccinated",
                     projection="mercator")
fig.show()


Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.



#### Now we will analyze european countries

In [18]:
europe=["AUT","BEL","BGR","HRV","CYP","CZE","DNK","EST","FIN",
       "FRA","DEU","GRC","TUR","HUN","IRL","ITA","LVA","LTU","LUX",
       "MLT","NLD","POL","PRT","ROU","SVK","SVN","ESP","SWE","GBR"]

df_europe=df.loc[df["iso_code"]=="BEL"]
for i in europe:
    df1=df.loc[df["iso_code"]==i]
    df_europe=pd.concat([df1,df_europe])
df_europe.drop_duplicates(keep=False, inplace=True)

df_europe.head()

Unnamed: 0,country,iso_code,date,total_vaccinations,people_vaccinated,people_fully_vaccinated,daily_vaccinations_raw,daily_vaccinations,total_vaccinations_per_hundred,people_vaccinated_per_hundred,people_fully_vaccinated_per_hundred,daily_vaccinations_per_million,vaccines,source_name,source_website
34210,United Kingdom,GBR,2020-12-13,86465.0,86465.0,0.0,0.0,0.0,0.13,0.13,0.0,0.0,"Moderna, Oxford/AstraZeneca, Pfizer/BioNTech",Government of the United Kingdom,https://coronavirus.data.gov.uk/details/vaccin...
34211,United Kingdom,GBR,2020-12-14,0.0,0.0,0.0,0.0,84117.0,0.0,0.0,0.0,1239.0,"Moderna, Oxford/AstraZeneca, Pfizer/BioNTech",Government of the United Kingdom,https://coronavirus.data.gov.uk/details/vaccin...
34212,United Kingdom,GBR,2020-12-15,0.0,0.0,0.0,0.0,84117.0,0.0,0.0,0.0,1239.0,"Moderna, Oxford/AstraZeneca, Pfizer/BioNTech",Government of the United Kingdom,https://coronavirus.data.gov.uk/details/vaccin...
34213,United Kingdom,GBR,2020-12-16,0.0,0.0,0.0,0.0,84117.0,0.0,0.0,0.0,1239.0,"Moderna, Oxford/AstraZeneca, Pfizer/BioNTech",Government of the United Kingdom,https://coronavirus.data.gov.uk/details/vaccin...
34214,United Kingdom,GBR,2020-12-17,0.0,0.0,0.0,0.0,84117.0,0.0,0.0,0.0,1239.0,"Moderna, Oxford/AstraZeneca, Pfizer/BioNTech",Government of the United Kingdom,https://coronavirus.data.gov.uk/details/vaccin...


In [19]:
df1=df_europe.groupby("country")["total_vaccinations"].max().reset_index().sort_values("total_vaccinations",ascending=False)
fig=px.bar(df1, x="country", y="total_vaccinations", color="total_vaccinations")
fig.show()


df1=df_europe.groupby("country")["people_vaccinated"].max().reset_index().sort_values("people_vaccinated", ascending=False)
fig=px.bar(df1, x="country", y="people_vaccinated", color="people_vaccinated")
fig.show()

df1=df_europe.groupby("country")["people_fully_vaccinated"].max().reset_index().sort_values("people_fully_vaccinated", ascending=False)
fig=px.bar(df1, x="country", y="people_fully_vaccinated", color="people_fully_vaccinated")
fig.show()


#### Most successful countries based on vaccination rates in Europe

In [20]:
df1=df_europe.groupby("country")["total_vaccinations_per_hundred","people_vaccinated_per_hundred","people_fully_vaccinated_per_hundred"].max().reset_index()

fig=px.bar(df1.sort_values("people_vaccinated_per_hundred",ascending=False),x="country",y="people_vaccinated_per_hundred",color="people_vaccinated_per_hundred")
fig.show()
fig=px.bar(df1.sort_values("total_vaccinations_per_hundred",ascending=False),x="country",y="total_vaccinations_per_hundred",color="total_vaccinations_per_hundred")
fig.show()
fig=px.bar(df1.sort_values("people_fully_vaccinated_per_hundred",ascending=False),x="country",y="people_fully_vaccinated_per_hundred",color="people_fully_vaccinated_per_hundred")
fig.show()


Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.



In [21]:
df1=df_europe.groupby("iso_code")["total_vaccinations","people_fully_vaccinated","people_vaccinated"].max().reset_index()
df1=df1.fillna(0)
fig = px.scatter_geo(df1, locations="iso_code", color="iso_code",
                     size="total_vaccinations", title="Total Vaccinations",
                     projection="mercator")
fig.show()

fig = px.scatter_geo(df1, locations="iso_code", color="iso_code",
                     size="people_fully_vaccinated", title="People Fully Vaccinated",
                     projection="mercator")
fig.show()

fig = px.scatter_geo(df1, locations="iso_code", color="iso_code",
                     size="people_vaccinated", title="People Vaccinated",
                     projection="mercator")
fig.show()


Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.



#### Let's look at the vaccination progress among the countries with the most vaccines

In [22]:
most_vacc_europe=["DEU","GBR","TUR","FRA","ITA"]
df2=df.loc[df["iso_code"]=="GBR"]
for i in most_vacc_europe:
    df1=df.loc[df["iso_code"]==i]
    df2=pd.concat([df1,df2])
df2.drop_duplicates(keep=False, inplace=True)

df2.head()

Unnamed: 0,country,iso_code,date,total_vaccinations,people_vaccinated,people_fully_vaccinated,daily_vaccinations_raw,daily_vaccinations,total_vaccinations_per_hundred,people_vaccinated_per_hundred,people_fully_vaccinated_per_hundred,daily_vaccinations_per_million,vaccines,source_name,source_website
16160,Italy,ITA,2020-12-27,7237.0,7237.0,0.0,0.0,0.0,0.01,0.01,0.0,0.0,"Johnson&Johnson, Moderna, Oxford/AstraZeneca, ...",Extraordinary commissioner for the Covid-19 em...,https://raw.githubusercontent.com/italia/covid...
16161,Italy,ITA,2020-12-28,8679.0,8679.0,0.0,1442.0,1442.0,0.01,0.01,0.0,24.0,"Johnson&Johnson, Moderna, Oxford/AstraZeneca, ...",Extraordinary commissioner for the Covid-19 em...,https://raw.githubusercontent.com/italia/covid...
16162,Italy,ITA,2020-12-29,9707.0,9707.0,0.0,1028.0,1235.0,0.02,0.02,0.0,20.0,"Johnson&Johnson, Moderna, Oxford/AstraZeneca, ...",Extraordinary commissioner for the Covid-19 em...,https://raw.githubusercontent.com/italia/covid...
16163,Italy,ITA,2020-12-30,14472.0,14472.0,0.0,4765.0,2412.0,0.02,0.02,0.0,40.0,"Johnson&Johnson, Moderna, Oxford/AstraZeneca, ...",Extraordinary commissioner for the Covid-19 em...,https://raw.githubusercontent.com/italia/covid...
16164,Italy,ITA,2020-12-31,40054.0,40054.0,0.0,25582.0,8204.0,0.07,0.07,0.0,136.0,"Johnson&Johnson, Moderna, Oxford/AstraZeneca, ...",Extraordinary commissioner for the Covid-19 em...,https://raw.githubusercontent.com/italia/covid...


In [23]:
fig = px.line(df2, x="date", y="total_vaccinations", color="country",
              line_group="country" ,width=1000, height=500)
fig.show()

###### Note: There is a problem on the Turkey's data. This problem occurred because we change the nan values to 0.

#### Lastly we will visualize manufacturers

In [24]:
manufacturer=pd.read_csv("/kaggle/input/covid-world-vaccination-progress/country_vaccinations_by_manufacturer.csv")
manufacturer.head()

Unnamed: 0,location,date,vaccine,total_vaccinations
0,Austria,2021-01-08,Johnson&Johnson,0
1,Austria,2021-01-08,Moderna,0
2,Austria,2021-01-08,Oxford/AstraZeneca,0
3,Austria,2021-01-08,Pfizer/BioNTech,31020
4,Austria,2021-01-15,Johnson&Johnson,0


#### Which vaccine was used the most in which country?

In [25]:
manufacturer1=manufacturer.groupby(["location","vaccine"])["total_vaccinations"].max().reset_index()

pfizer=manufacturer1[manufacturer1.vaccine=="Pfizer/BioNTech"].sort_values("total_vaccinations",ascending=False)[:10]
fig=px.bar(pfizer, x="total_vaccinations",y="location",color="total_vaccinations", title="Pfizer/BioNtech",)
fig.show()

moderna=manufacturer1[manufacturer1.vaccine=="Moderna"].sort_values("total_vaccinations",ascending=False)[:10]
fig=px.bar(moderna, x="total_vaccinations",y="location",color="total_vaccinations", title="Moderna",)
fig.show()

Oxford_AstraZeneca=manufacturer1[manufacturer1.vaccine=="Oxford/AstraZeneca"].sort_values("total_vaccinations",ascending=False)[:10]
fig=px.bar(Oxford_AstraZeneca, x="total_vaccinations",y="location",color="total_vaccinations", title="Oxford/AstraZeneca",)
fig.show()

Johnson=manufacturer1[manufacturer1.vaccine=="Johnson&Johnson"].sort_values("total_vaccinations",ascending=False)[:10]
fig=px.bar(Johnson, x="total_vaccinations",y="location",color="total_vaccinations", title="Johnson&Johnson",)
fig.show()

#### Vaccine Usage Rate

In [26]:
manufacturer2=manufacturer1.groupby("vaccine")["total_vaccinations"].sum().reset_index()

fig=px.pie(manufacturer2, values="total_vaccinations",names="vaccine")
fig.show()