## Import Necessary dependencies and data

In [None]:
# Mount drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Kaggle
# Import OS for navigation and environment set up
import os
# Check current location, '/content' is the Colab virtual machine
os.getcwd()
# Enable the Kaggle environment, use the path to the directory your Kaggle API JSON is stored in
os.environ['KAGGLE_CONFIG_DIR'] = '/content/drive/MyDrive/kaggle'

In [None]:
!pip install kaggle



In [None]:
# Importing Covid Dataset from kaggle
# Navigate into Drive where you want to store your Kaggle data
os.chdir('/content/drive/MyDrive/kaggle')
# Paste and run the copied API command, the data will download to the current directory
!kaggle datasets download -d yamqwe/omicron-covid19-variant-daily-cases
# Check contents of directory, you should see the .zip file for the competition in your Drive
os.listdir()

Downloading omicron-covid19-variant-daily-cases.zip to /content/drive/MyDrive/kaggle
  0% 0.00/432k [00:00<?, ?B/s]
100% 432k/432k [00:00<00:00, 28.3MB/s]


['globalterrorismdb_0718dist.csv',
 'TSLA.csv',
 'kaggle.json',
 'tesla-inc-tsla-dataset.zip',
 'gtd.zip',
 'manufacturingcombenergyenduse-gwh-2014-csv-2.csv',
 'logs.log',
 'omicron-covid19-variant-daily-cases.zip']

In [None]:
# Complete path to storage location of the .zip file of data
zip_path = '/content/drive/MyDrive/kaggle/covid-19.zip'
# Check current directory (be sure you're in the directory where Colab operates: '/content')
os.getcwd()
# Copy the .zip file into the present directory
!cp '{zip_path}' .
# Unzip quietly 
!unzip -q 'covid-19.zip'
# View the unzipped contents in the virtual machine
os.listdir()

cp: '/content/drive/MyDrive/kaggle/covid-19.zip' and './covid-19.zip' are the same file


['globalterrorismdb_0718dist.csv',
 'TSLA.csv',
 'kaggle.json',
 'tesla-inc-tsla-dataset.zip',
 'gtd.zip',
 'manufacturingcombenergyenduse-gwh-2014-csv-2.csv',
 'logs.log',
 'covid-19.zip',
 'covid-variants.csv']

## EDA

In [None]:
import pandas as pd
import numpy as np
import plotly.graph_objs as go
import  plotly.express as px

In [None]:
#/content/drive/MyDrive/kaggle/covid-variants.csv
df = pd.read_csv('/content/drive/MyDrive/kaggle/covid-variants.csv')
df.head()

Unnamed: 0,location,date,variant,num_sequences,perc_sequences,num_sequences_total
0,Angola,2020-07-06,Alpha,0,0.0,3
1,Angola,2020-07-06,B.1.1.277,0,0.0,3
2,Angola,2020-07-06,B.1.1.302,0,0.0,3
3,Angola,2020-07-06,B.1.1.519,0,0.0,3
4,Angola,2020-07-06,B.1.160,0,0.0,3


In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100416 entries, 0 to 100415
Data columns (total 6 columns):
 #   Column               Non-Null Count   Dtype  
---  ------               --------------   -----  
 0   location             100416 non-null  object 
 1   date                 100416 non-null  object 
 2   variant              100416 non-null  object 
 3   num_sequences        100416 non-null  int64  
 4   perc_sequences       100416 non-null  float64
 5   num_sequences_total  100416 non-null  int64  
dtypes: float64(1), int64(2), object(3)
memory usage: 4.6+ MB


In [None]:
# Rename the columns 
df = df.rename(columns = {"num_sequences_total":"Total"})
df = df.rename(columns = {"date":"Date"})
df = df.rename(columns = {"location":"Location"})

In [None]:
# The list of locations 
df['Location'].unique()

array(['Angola', 'Argentina', 'Aruba', 'Australia', 'Austria', 'Bahrain',
       'Bangladesh', 'Belgium', 'Belize', 'Benin',
       'Bosnia and Herzegovina', 'Botswana', 'Brazil', 'Brunei',
       'Bulgaria', 'Cambodia', 'Cameroon', 'Canada', 'Chile', 'Colombia',
       'Costa Rica', 'Croatia', 'Curacao', 'Cyprus', 'Czechia', 'Denmark',
       'Djibouti', 'Dominican Republic', 'Ecuador', 'Egypt', 'Estonia',
       'Ethiopia', 'Fiji', 'Finland', 'France', 'Gambia', 'Georgia',
       'Germany', 'Ghana', 'Greece', 'Guatemala', 'Hong Kong', 'Hungary',
       'Iceland', 'India', 'Indonesia', 'Iran', 'Iraq', 'Ireland',
       'Israel', 'Italy', 'Jamaica', 'Japan', 'Jordan', 'Kazakhstan',
       'Kenya', 'Kosovo', 'Kuwait', 'Latvia', 'Lebanon', 'Liechtenstein',
       'Lithuania', 'Luxembourg', 'Madagascar', 'Malawi', 'Malaysia',
       'Maldives', 'Malta', 'Mauritius', 'Mexico', 'Moldova', 'Monaco',
       'Mongolia', 'Montenegro', 'Morocco', 'Mozambique', 'Nepal',
       'Netherlands', 'New

In [None]:
dfomidate = df.groupby('Date')['Total'].sum().reset_index()
fig = px.bar(dfomidate,
             x='Date', 
             y='Total', 
             color='Total',
             color_continuous_scale = 'reds')

fig.update_layout(title = '<b>World-Total confirmed cases(Omicron)-From 2020 to 2022<b>',
                  title_x = 0.5,
                  title_font = dict(size= 18, color = 'DarkRed'),
                  yaxis = dict(title = 'Total Cases'))
fig.show()

In [None]:
dfomilo = df.groupby('Location')['Total'].sum().sort_values(ascending = False).reset_index().head(20)
fig = px.bar(dfomilo,
             x='Location', 
             y='Total', 
             color='Total',
             color_continuous_scale = 'reds')

fig.update_layout(title = '<b>Top 20 countries with the most confirmed cases(Omicron)<b>',
                  title_x = 0.5,
                  title_font = dict(size= 18, color = 'DarkRed'),
                  yaxis = dict(title = 'Total Cases'))
fig.show()

In [None]:
# Five Countries - Top 3(United Stated, United Kingdom, Germany) and Aisa(Japan,South Korea)
dfus = df[df['Location']=='United States']
dfuk = df[df['Location']=='United Kingdom']
dfger = df[df['Location']=='Germany']
dfja = df[df['Location']=='Japan']
dfsk = df[df['Location']=='South Korea']
dfph = df[df['Location']=='Philippines']

dfuaomi = dfus.groupby('Date')['Total'].sum().reset_index()
dfukomi = dfuk.groupby('Date')['Total'].sum().reset_index()
dfgeromi = dfger.groupby('Date')['Total'].sum().reset_index()
dfjaomi = dfja.groupby('Date')['Total'].sum().reset_index()
dfskomi = dfsk.groupby('Date')['Total'].sum().reset_index()
dfphomi = dfph.groupby('Date')['Total'].sum().reset_index()

fig = go.Figure()

fig.add_trace(go.Scatter(x = dfuaomi['Date'],
                         y = dfuaomi['Total'],
                         mode = 'lines',
                         name = 'United States',
                         marker_color = 'DarkBlue'))

fig.add_trace(go.Scatter(x = dfukomi['Date'],
                         y = dfukomi['Total'],
                         mode = 'lines',
                         name = 'United Kingdom',
                         marker_color = 'DarkOrchid',
                         line = dict(dash = 'dot')))

fig.add_trace(go.Scatter(x = dfgeromi['Date'],
                         y = dfgeromi['Total'],
                         mode = 'lines',
                         name = 'Germany',
                         marker_color = 'RoyalBlue',
                         line = dict(dash = 'dashdot')))

fig.add_trace(go.Scatter(x = dfjaomi['Date'],
                         y = dfjaomi['Total'],
                         mode = 'lines',
                         name = 'Japan',
                         marker_color = 'Crimson',
                         line = dict(dash = 'dash')))

fig.add_trace(go.Scatter(x = dfskomi['Date'],
                         y = dfskomi['Total'],
                         mode = 'lines',
                         name = 'South Korea',
                         marker_color = 'LightSeaGreen'))

fig.add_trace(go.Scatter(x = dfskomi['Date'],
                         y = dfskomi['Total'],
                         mode = 'lines',
                         name = 'Philippines',
                         marker_color = 'Black'))

fig.update_layout(title = '<b>Six Countries-Total confirmed cases(Omicron)-From 2020 to 2022<b>',
                  title_x = 0.5,
                  title_font= dict(size = 18, color = 'Darkred'),
                  template = 'plotly_white')

fig.show()

In [None]:
fig = go.Figure()

location_list = list(df['Location'].unique())

for location in location_list:
    fig.add_trace(
        go.Scatter(
            x = df['Date'][df['Location']==location],
            y = df['Total'][df['Location']==location],
            name = location, 
            visible = True))
    
buttons = []

for i, location in enumerate(location_list):
    args = [False] * len(location_list)
    args[i] = True
    
    button = dict(label = location,
                  method = "update",
                  args=[{"visible": args}])
    
    buttons.append(button)
    
fig.update_layout(
    updatemenus=[dict(direction="down", 
                      pad={"r": 10, "t": 10},
                      showactive=True, 
                      x=0.15, 
                      xanchor="left",
                      y=1.12, 
                      yanchor="top",
                      buttons=buttons)])

fig.update_layout(
    annotations=[
        dict(text="<b>Choose:<b>", 
             showarrow=False,
             x=0, 
             y=1.08, 
             xref= 'paper',
             yref="paper",
             font=dict(size=15,color = 'DarkSlateBlue'))])

fig.update_layout(title ="<b>Choose Location which you want to understand<b>",
                  title_x = 0.5,
                  title_font = dict(size = 18, color = 'Darkred'),
                  autosize=False,
                  width=1200,
                  height=600)
fig.show()

In [None]:
top_20 = df.groupby(['Location'])['Total'].sum().sort_values(ascending = False).reset_index().head(20)
figure = px.choropleth(top_20,locations='Location', locationmode= 'country names', color= 'Total', 
                       hover_name='Location', color_continuous_scale='tealgrn', 
                       range_color=[1,1000000],title='Countries with Highest Number of CovidCases')
figure.show()