# **Covid 19 Data Analysis**

Repository: https://github.com/owid/covid-19-data

Data Source: https://catalog.ourworldindata.org/garden/covid/latest/compact/compact.csv


# 1. Loading the libraries


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import seaborn as sns

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)


# 2. Loading and Understanding the Data


In [None]:
data = 'https://catalog.ourworldindata.org/garden/covid/latest/compact/compact.csv'
df = pd.read_csv(data, parse_dates=['date'])
df.head()

In [None]:
df.shape

In [None]:
df[df['country'] == 'India']

# 3. Exploratory Data Analysis

In [None]:
# Summary of the missing data
df.isnull().sum()

In [None]:
# percentage of missing values
NaN = [(c,df[c].isna().sum()/len(df)*100) for c in df]

NaN = pd.DataFrame(NaN, columns=['column_name','percentage'])
NaN

In [None]:
print(df['country'].unique())
len(df['country'].unique())

### 3.1 Summary on the recent info

In [None]:
# dropping the world/continent summary data
to_drop = ['Africa',
    'Asia',
    'Europe',
    'Oceania',
    'North America',
    'World',
    'World excl. China',
    'World excl. China and South Korea',
    'World excl. China, South Korea, Japan and Singapore',
    'Summer Olympics 2020',
    'Asia excl. China',
    'High-income countries',
    'Low-income countries',
    'Lower-middle-income countries',
    'Upper-middle-income countries',
    'Winter Olympics 2022',
    'European Union (27)'
]

df.drop(df[df['country'].isin(to_drop)].index, inplace=True)



In [None]:
requested_date_str = '2024-08-19'
requested_date = pd.to_datetime(requested_date_str).normalize()

df_requested_date = df[(df['date'] == requested_date) & (df['total_cases'].notna())]

if not df_requested_date.empty:
    df_recent = df_requested_date.reset_index(drop=True)
    print(f"Displaying data for {requested_date_str}.")
else:
    latest_valid_date = df[df['total_cases'].notna()]['date'].max()
    df_recent = df[df['date'] == latest_valid_date].reset_index(drop=True)
    print(f"No valid data found for {requested_date_str}. Displaying data for the latest available date: {latest_valid_date.strftime('%Y-%m-%d')}.")

df_recent.head()

In [None]:
df_earliest = df[df['date'] == df['date'].min()].reset_index()
df_earliest.head()

### 3.2 Summary Table

In [None]:
df_world = df_recent.groupby(['date'])[['total_cases','new_cases','new_deaths','total_deaths']].sum().reset_index()

In [None]:
labels = [
    'Last Update',
    'Total Confirmed',
    'New Cases',
    'New Deaths',
    'Total Deaths'
]

fig = go.Figure(
    data=[
        go.Table(
            header=dict(values=labels),
            cells=dict(values=[
                [df_world.loc[0, 'date']],
                [df_world.loc[0, 'total_cases']],
                [df_world.loc[0, 'new_cases']],
                [df_world.loc[0, 'new_deaths']],
                [df_world.loc[0, 'total_deaths']]
            ])
        )
    ]
)

fig.update_layout(title='Covid-19 Summary Table')
fig.show()

In [None]:
df_overtime = df.groupby(['date'])[['total_cases','new_cases','new_deaths','total_deaths']].sum().reset_index().sort_values('date', ascending=True).reset_index(drop=True)
df_overtime.head()

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=df_overtime.index, y=df_overtime['total_cases'], mode='lines', name='Total Cases'))
fig.update_layout(title='Total Cases of Covid-19 Over Time in the World', template = 'plotly_white', xaxis_title='Days', yaxis_title='Total Cases')
fig.show()

In [None]:
fig = go.Figure()
fig.add_trace(go.Bar(x=df_overtime['date'], y=df_overtime['total_cases']))
fig.update_layout(title='Total Cases of Covid-19 Over Time in the World', template = 'plotly_white', xaxis_title='Total Cases', yaxis_title='Days')
fig.show()

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=df_overtime.index, y=df_overtime['total_deaths'], mode='lines', name='Total Deaths'))
fig.update_layout(title='Total Deaths due to Covid-19 Over Time in the World', template = 'plotly_dark', xaxis_title='Days', yaxis_title='Total Deaths')
fig.show()

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=df_overtime.index, y=df_overtime['new_cases'], mode='lines', name='New Cases'))
fig.update_layout(title='New Cases due to Covid-19 Over Time in the World', template = 'plotly_dark', xaxis_title='Days', yaxis_title='New Cases')
fig.show()

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=df_overtime.index, y=df_overtime['new_deaths'], mode='lines', name='New Deaths'))
fig.update_layout(title='New Deaths due to Covid-19 Over Time in the World', template = 'plotly_dark', xaxis_title='Days', yaxis_title='New Deaths')
fig.show()

### 3.3 Covid 19 by Country

In [None]:
df_per_country = df.groupby(['country'])[['new_cases','new_deaths']].sum().reset_index().sort_values('new_cases', ascending=False).reset_index(drop = True)
df_per_country.head()

In [None]:
df_per_country.columns = ['Country','Total Cases','Total Deaths']
df_per_country = df_per_country.astype({'Country': str ,'Total Cases':int,'Total Deaths':int})
df_per_country

In [None]:
fig = go.Figure(go.Bar(x=df_per_country['Total Cases'], y=df_per_country['Country'], orientation='h'))
fig.update_layout(title='Total Cases of Covid-19 by Country', template = 'plotly_white', xaxis_title='Country', yaxis_title='Total Cases')
fig.show()

In [None]:
fig = go.Figure(go.Bar(x=df_per_country['Total Deaths'], y=df_per_country['Country'], orientation='h'))
fig.update_layout(title='Total deaths of Covid-19 by Country', template = 'plotly_white', xaxis_title='Country', yaxis_title='Total Deaths')
fig.show()

# 4. Mapping Covid 19 Across the World

In [None]:
fig = px.choropleth(df_per_country, locations=df_per_country['Country'],
                    color = df_per_country['Total Cases'],
                    locationmode = 'country names',
                    hover_name = df_per_country['Country'],
                    color_continuous_scale = 'Viridis',
                    template = 'plotly_white')
fig.update_layout(title='Total Cases of Covid-19 Across the World')
fig.show()

In [None]:
fig = px.choropleth(df_per_country, locations=df_per_country['Country'],
                    color = df_per_country['Total Deaths'],
                    locationmode = 'country names',
                    hover_name = df_per_country['Country'],
                    color_continuous_scale = 'Reds',
                    template = 'plotly_white')
fig.update_layout(title='Total Deaths of Covid-19 Across the World')
fig.show()

In [None]:
df_per_country_pm = df.groupby(['country'])[['new_cases_per_million','new_deaths_per_million']].sum().reset_index().sort_values('new_cases_per_million', ascending=False).reset_index(drop = True)
df_per_country_pm.columns = ['Country','Total Cases per Million','Total Deaths per Million']
df_per_country_pm = df_per_country_pm.astype({'Country': str ,'Total Cases per Million':int,'Total Deaths per Million':int})
df_per_country_pm.head()

In [None]:
fig = px.choropleth(df_per_country_pm, locations=df_per_country_pm['Country'],
                    color = df_per_country_pm['Total Cases per Million'],
                    locationmode = 'country names',
                    hover_name = df_per_country_pm['Country'],
                    color_continuous_scale = 'Viridis',
                    template = 'plotly_white')
fig.update_layout(title='Total Cases of Covid-19 per Million Across the World')
fig.show()

In [None]:
df_per_country_pm = df_per_country_pm.sort_values('Total Cases per Million', ascending=False)

fig = go.Figure(
    data=[
        go.Scatter(
            x=df_per_country_pm['Country'][:10],
            y=df_per_country_pm['Total Cases per Million'][:10],
            mode='markers',
            marker=dict(
                size=df_per_country_pm['Total Cases per Million'][:10],
                sizemode='area',
                sizeref=2. * max(df_per_country_pm['Total Cases per Million'][:10]) / (40. ** 2),
                sizemin=6,
                color=df_per_country_pm['Total Cases per Million'][:10],
                colorscale='Reds',
                showscale=True))])

fig.update_layout(
    title='Most Infected Countries per Million Across the World',
    xaxis_title='Countries',
    yaxis_title='Total Cases per Million',
    template='plotly_white'
)

fig.show()


In [None]:
df_per_country_pm = df_per_country_pm.sort_values('Total Deaths per Million', ascending=False)

fig = go.Figure(
    data=[
        go.Scatter(
            x=df_per_country_pm['Country'][:10],
            y=df_per_country_pm['Total Deaths per Million'][:10],
            mode='markers',
            marker=dict(
                size=df_per_country_pm['Total Deaths per Million'][:10],
                sizemode='area',
                sizeref=2. * max(df_per_country_pm['Total Deaths per Million'][:10]) / (40. ** 2),
                sizemin=6,
                color=df_per_country_pm['Total Deaths per Million'][:10],
                colorscale='Reds',
                showscale=True))])

fig.update_layout(
    title='Most Deaths Countries per Million Across the World',
    xaxis_title='Countries',
    yaxis_title='Total Cases per Million',
    template='plotly_white'
)

fig.show()

# 5. Evolution of Confirmed Cases

In [None]:
df.tail()

In [None]:
fig = px.choropleth(df, locations=df['country'],
                    color = df['total_cases'],
                    locationmode = 'country names',
                    hover_name = df['country'],
                    color_continuous_scale = 'Viridis',
                    template = 'plotly_white',
                    animation_frame = 'date')
fig.update_layout(title='Evolution of Total Cases of Covid-19 Across the World')
fig.show()

In [None]:
fig = px.choropleth(df, locations=df['country'],
                    color = df['total_cases_per_million'],
                    locationmode = 'country names',
                    hover_name = df['country'],
                    color_continuous_scale = 'Inferno',
                    template = 'plotly_white',
                    animation_frame = 'date')
fig.update_layout(title='Evolution of Total Cases per Million of Covid-19 Across the World')
fig.show()

In [None]:
fig = px.choropleth(df, locations=df['country'],
                    color = df['total_deaths_per_million'],
                    locationmode = 'country names',
                    hover_name = df['country'],
                    color_continuous_scale = 'Reds',
                    template = 'plotly_dark',
                    animation_frame = 'date')
fig.update_layout(title='Evolution of Total Deaths per Million of Covid-19 Across the World')
fig.show()

# 6. Comparing Countries and Continents

In [None]:
df.head()

In [None]:
df_per_country_pm = df.groupby(['continent'])[['new_cases_per_million','new_deaths_per_million']].sum().reset_index().sort_values('new_cases_per_million', ascending=False).reset_index(drop = True)
df_per_country_pm.columns = ['Continent','Total Cases per Million','Total Deaths per Million']
df_per_country_pm = df_per_country_pm.astype({'Continent': str ,'Total Cases per Million':int,'Total Deaths per Million':int})
df_per_country_pm.head(6)

In [None]:
# Comparing India with rest of the world
df_india = df[df['country'] == 'India']
df_india.head()

In [None]:
data = 'https://catalog.ourworldindata.org/garden/covid/latest/compact/compact.csv'
df = pd.read_csv(data, parse_dates=['date'])
df_world  = df[df['country'] == 'World']
df_world.head()

In [None]:
fig=go.Figure()

fig.add_trace(go.Scatter(x=df_india['date'], y = df_india['total_cases_per_million'],
                         mode = 'lines',
                         name = 'Total Cases in India'))

fig.add_trace(go.Scatter(x=df_world['date'], y = df_world['total_cases_per_million'],
                         mode = 'lines',
                         name = 'Total Cases in the World'))

fig.update_layout(title='Evolution of Total Cases per million of Covid-19 over time in India as compared to the World', template = 'plotly_white', xaxis_title='Date', yaxis_title='Total Cases')
fig.show()

# 7. Make a Flourish Chart Bar Chart Race



In [None]:
data = 'https://catalog.ourworldindata.org/garden/covid/latest/compact/compact.csv'
df = pd.read_csv(data, parse_dates=['date'])
df.head()

In [None]:
df['date'] = pd.to_datetime(df['date'])
df['date'] = df['date'].dt.strftime('%m/%d/%y')
df.head()

In [None]:
df.head()

In [None]:
df_florish = pd.pivot_table(
    df,
    values='total_cases',
    index=['country'],
    columns=['date']
).reset_index()

df_florish = df_florish.fillna(0)

df_florish.head()


In [None]:
file_name = "https://raw.githubusercontent.com/rajeevratan84/covid/master/flags_world.csv"
flags = pd.read_csv(file_name)
flags.head()



In [None]:
df_florish = pd.merge(df_florish, flags, on='country')
df_florish.head()


In [None]:
from datetime import datetime

# Use current date for timestamped filename
dateTimeObj = datetime.now()
timestampStr = dateTimeObj.strftime("%d_%b_%Y")


# Create a series to hold the Image URLs
mid = df_florish['Image URL']

# Delete the existing Image URLs
df_florish.drop(labels=['Image URL'], axis=1, inplace=True)

# Insert Mid into the second position in the dataframe
df_florish.insert(1, 'Image URL', mid)

# Give the CSV a name with the current timestamp
csv_name = 'world_covid_cases_' + timestampStr + '.csv'
df_florish.to_csv(csv_name)

df_florish.head()


In [None]:
from IPython.display import IFrame, display

flourish_url = "https://public.flourish.studio/visualisation/27035997/"

display(IFrame(
    src=flourish_url,
    width="100%",
    height=600
))
