In [None]:
import plotly.express as px
import plotly.graph_objects as go
import plotly.figure_factory as ff

import folium
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

import math
import random
from datetime import timedelta

from plotly.subplots import make_subplots

import warnings 
warnings.filterwarnings('ignore')

#Color Palette
cnf = '#393e46'
dth = '#ff2e63'
rec = '#21bf73'
act = '#fe9801'

In [None]:
df = pd.read_csv(f'/Users/User/Jupyter Project/Data Analysis/Covid Visualization/Covid-19-Preprocessed-Dataset/preprocessed/covid_19_data_cleaned.csv', parse_dates = ['Date'])
country_daywise = pd.read_csv(f'/Users/User/Jupyter Project/Data Analysis/Covid Visualization/Covid-19-Preprocessed-Dataset/preprocessed/country_daywise.csv', parse_dates = ['Date'])
countrywise = pd.read_csv(f'/Users/User/Jupyter Project/Data Analysis/Covid Visualization/Covid-19-Preprocessed-Dataset/preprocessed/countrywise.csv')
daywise = pd.read_csv(f'/Users/User/Jupyter Project/Data Analysis/Covid Visualization/Covid-19-Preprocessed-Dataset/preprocessed/daywise.csv', parse_dates = ['Date'])

In [None]:
df['Province/State'] = df['Province/State'].fillna("")
df

In [None]:
daywise

In [None]:
country_daywise

In [None]:
confirmed = df.groupby('Date').sum()['Confirmed'].reset_index()
recovered = df.groupby('Date').sum()['Recovered'].reset_index()
deaths = df.groupby('Date').sum()['Deaths'].reset_index()
deaths.head()

In [None]:
df.isnull().sum()

In [None]:
df.info()

In [None]:
df.query('Country == "Malaysia"')

In [None]:
confirmed.tail()

In [None]:
deaths.tail()

In [None]:
recovered.tail()

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x = confirmed['Date'], y = confirmed['Confirmed'], mode = 'lines+markers', name = 'Confirmed', line = dict(color = "Orange", width = 2)))
fig.add_trace(go.Scatter(x = recovered['Date'], y = recovered['Recovered'], mode = 'lines+markers', name = 'Recovered', line = dict(color = "Green", width = 2)))
fig.add_trace(go.Scatter(x = deaths['Date'], y = deaths['Deaths'], mode = 'lines+markers', name = 'Deaths', line = dict(color = "Red", width = 2)))
fig.update_layout(title = 'Worldwide Covid-19 Cases', xaxis_tickfont_size = 14, yaxis = dict(title = 'Number of Cases'))

fig.show()

# Cases Density World Map

In [None]:
df.info()

In [None]:
df['Date'] = df['Date'].astype(str)

In [None]:
df.info()

In [None]:
df.head()

In [None]:
fig = px.density_mapbox(df, lat = 'Lat', lon = 'Long', hover_name = 'Country', hover_data = ['Confirmed', 'Recovered', 'Deaths'], animation_frame = 'Date', color_continuous_scale='Portland', radius = 7, zoom = 0, height = 700)
fig.update_layout(title = 'Worldwide Covid 19 Cases')
fig.update_layout(mapbox_style = 'open-street-map', mapbox_center_lon = 0)

fig.show()

# Total Cases on Ships

In [None]:
df['Date'] = pd.to_datetime(df['Date'])
df.info()

In [None]:
ship_rows = df['Province/State'].str.contains('Grand Princess') | df['Province/State'].str.contains('Diamond Princess') | df['Country'].str.contains('Grand Princess') | df['Country'].str.contains('Diamond Princess') |  df['Country'].str.contains('MS Zaandam')
ship = df[ship_rows]

df = df[~ship_rows]

In [None]:
ship_latest = ship[ship['Date'] == max(ship['Date'])]
ship_latest

In [None]:
ship_latest.style.background_gradient(cmap = 'Pastel1_r')

# Cases Over the Time with Area Plot

In [None]:
temp = df.groupby('Date')['Confirmed', 'Deaths', 'Recovered', 'Active'].sum().reset_index()
# Get latest datarow
temp = temp[temp['Date'] == max(temp['Date'])].reset_index(drop = True)
temp

In [None]:
# Problem with this method; if the data is not sort in asc/descending format; will be different
temp.tail(1)

In [None]:
tm = temp.melt(id_vars = 'Date', value_vars = ['Active', 'Deaths', 'Recovered'])
fig = px.treemap(tm, path = ['variable'], values = 'value', height = 250, width = 800, color_discrete_sequence = [act, rec, dth])

fig.data[0].textinfo = 'label+text+value'
fig.show()

In [None]:
temp = df.groupby('Date')['Recovered', 'Deaths', 'Active'].sum().reset_index()
temp = temp.melt(id_vars = 'Date', value_vars = ['Recovered', 'Deaths', 'Active'], var_name = 'Case', value_name = 'Count')

In [None]:
fig = px.area(temp, x = 'Date', y= 'Count', color='Case', height=600, title='Cases over time', color_discrete_sequence= [rec, dth, act])
fig.update_layout(xaxis_rangeslider_visible= True)
fig.show()

# Folium Maps

In [None]:
#Worldwide Cases on Folium Maps

In [None]:
# Take latest data of all country
temp = df[df['Date'] == max(df['Date'])]
temp

In [None]:
m = folium.Map(location = [0, 0], tiles = 'cartodbpositron', min_zoom = 1, max_zoom=4, zoom_start=1)

for i in range(0, len(temp)):
    folium.Circle(location = [temp.iloc[i]['Lat'], temp.iloc[i]['Long']], color = 'crimson', fill = 'crimson', 
                              tooltip = '<li><bold> Country: ' + str(temp.iloc[i]['Country']) + 
                                        '<li><bold> Province: ' + str(temp.iloc[i]['Province/State']) +
                                        '<li><bold> Confirmed: ' + str(temp.iloc[i]['Confirmed']) +
                                        '<li><bold> Deaths: ' + str(temp.iloc[i]['Deaths']),
                              radius = int(temp.iloc[i]['Confirmed'])**0.5).add_to(m)
                                        

m


# Confirmed Cases with Choropleth Map

In [None]:
country_daywise.head()

In [None]:
fig = px.choropleth(country_daywise, locations='Country', locationmode = 'country names', color = np.log(country_daywise['Confirmed']), hover_name = 'Country', animation_frame = country_daywise['Date'].dt.strftime('%Y/%m/%d'),
                    title = 'Cases Over Time', color_continuous_scale=px.colors.sequential.Inferno)

fig.update(layout_coloraxis_showscale = True)
fig.show()


# Death and Recoveries per 100 Cases

In [None]:
daywise.head()

In [None]:
fig_c = px.bar(daywise, x='Date', y='Confirmed', color_discrete_sequence=[act])
fig_d = px.bar(daywise, x='Date', y='Deaths', color_discrete_sequence=[dth])
fig = make_subplots(rows =1 , cols=2, shared_xaxes = False, horizontal_spacing = 0.1, 
                    subplot_titles=('Confirmed Cases', 'Deaths Cases'))

fig.add_trace(fig_c['data'][0], row = 1, col = 1)
fig.add_trace(fig_d['data'][0], row = 1, col = 2)

fig.update_layout(height = 400)

fig.show()

# Confirmed and Deaths Cases with Static Colormap

In [None]:
fig_c = px.choropleth(countrywise, locations = 'Country', locationmode = 'country names',
                      color = np.log(countrywise['Confirmed']), hover_data = ['Confirmed'])

temp = countrywise[countrywise['Deaths'] > 0]

fig_d = px.choropleth(temp , locations = 'Country', locationmode = 'country names',
                      color = np.log(temp['Deaths']), hover_data = ['Deaths'])


In [None]:
fig = make_subplots(rows = 1, cols = 2, subplot_titles = ['Confirmed', 'Deaths'], specs = [[{'type':'choropleth'}, {'type':'choropleth'}]])

fig.add_trace(fig_c['data'][0], row = 1, col = 1)
fig.add_trace(fig_d['data'][0], row = 1, col = 2)

fig.update(layout_coloraxis_showscale = False)
fig.show()

In [None]:
daywise.columns

In [None]:
fig1 = px.line(daywise, x = 'Date', y= 'Deaths / 100 Cases', color_discrete_sequence=[dth])
fig2 = px.line(daywise, x = 'Date', y= 'Recovered / 100 Cases', color_discrete_sequence=[rec])
fig3 = px.line(daywise, x = 'Date', y= 'Deaths / 100 Recovered', color_discrete_sequence=['aqua'])

fig = make_subplots(rows = 1, cols = 3, shared_xaxes = False, subplot_titles=('Deaths / 100 Cases', 'Recovered / 100 Cases', 'Deaths / 100 Recovered'))

fig.add_trace(fig1['data'][0], row = 1, col = 1)
fig.add_trace(fig2['data'][0], row = 1, col = 2)
fig.add_trace(fig3['data'][0], row = 1, col = 3)

fig.update_layout(height = 400)
fig.show()




# New Cases and Number of Countries

In [None]:
fig_c = px.bar(daywise, x = 'Date', y = 'Confirmed', color_discrete_sequence = [act])
fig_d = px.bar(daywise, x = 'Date', y = 'No. of Countries', color_discrete_sequence = [dth])

fig = make_subplots(rows = 1, cols = 2, shared_xaxes=False, horizontal_spacing = 0.1,
                   subplot_titles=('No of New Cases Per Day', 'No of Countries'))

fig.add_trace(fig_c['data'][0], row = 1, col = 1)
fig.add_trace(fig_d['data'][0], row = 1, col = 2)

fig.show()

# Top 15 Countries Case Analysis

In [None]:
countrywise.columns

In [None]:
top = 15

fig_c = px.bar(countrywise.sort_values('Confirmed').tail(top), x = 'Confirmed', y = 'Country',
              text = 'Confirmed', orientation = 'h', color_discrete_sequence = [cnf])

fig_d = px.bar(countrywise.sort_values('Deaths').tail(top), x = 'Deaths', y = 'Country',
              text = 'Deaths', orientation = 'h', color_discrete_sequence = [dth])

fig_a = px.bar(countrywise.sort_values('Active').tail(top), x = 'Active', y = 'Country',
              text = 'Active', orientation = 'h', color_discrete_sequence = [act])

fig_r = px.bar(countrywise.sort_values('Recovered').tail(top), x = 'Recovered', y = 'Country',
              text = 'Recovered', orientation = 'h', color_discrete_sequence = [rec])

fig_dc = px.bar(countrywise.sort_values('Deaths / 100 Cases').tail(top), x = 'Deaths / 100 Cases', y = 'Country',
              text = 'Deaths / 100 Cases', orientation = 'h', color_discrete_sequence = ['#434343'])

fig_rc = px.bar(countrywise.sort_values('Recovered / 100 Cases').tail(top), x = 'Recovered / 100 Cases', y = 'Country',
              text = 'Recovered / 100 Cases', orientation = 'h', color_discrete_sequence = ['#f84351'])

fig_nc = px.bar(countrywise.sort_values('New Cases').tail(top), x = 'New Cases', y = 'Country',
              text = 'New Cases', orientation = 'h', color_discrete_sequence = ['#f84351'])

temp = countrywise[countrywise['Population']>1000000]

fig_p = px.bar(temp.sort_values('Cases / Million People').tail(top), x = 'Cases / Million People', y = 'Country',
              text = 'Cases / Million People', orientation = 'h', color_discrete_sequence = ['#b40398'])

# 1 Week Change

fig_wc = px.bar(countrywise.sort_values('1 week change').tail(top), x = '1 week change', y = 'Country',
              text = '1 week change', orientation = 'h', color_discrete_sequence = ['#c04041'])

temp1 = countrywise[countrywise['Confirmed']>100]

fig_in = px.bar(temp1.sort_values('1 week % increase').tail(top), x = '1 week % increase', y = 'Country',
              text = '1 week % increase', orientation = 'h', color_discrete_sequence = ['#b00398'])





fig = make_subplots(rows = 5, cols = 2, shared_xaxes = False, horizontal_spacing = 0.2, 
                    vertical_spacing = 0.1, subplot_titles=('Confirmed Cases', 'Death Reported', 
                                                            'Recovered Cases', 'Active Cases',
                                                               'Death / 100 Cases', 'Recovered/ 100 Cases',
                                                           'New Cases', 'Cases / Million People',
                                                           '1 Week Change', '1 Week % increase'))

fig.add_trace(fig_c['data'][0], row = 1, col = 1)
fig.add_trace(fig_d['data'][0], row = 1, col = 2)

fig.add_trace(fig_r['data'][0], row = 2, col = 1)
fig.add_trace(fig_a['data'][0], row = 2, col = 2)

fig.add_trace(fig_dc['data'][0], row = 3, col = 1)
fig.add_trace(fig_rc['data'][0], row = 3, col = 2)

fig.add_trace(fig_nc['data'][0], row = 4, col = 1)
fig.add_trace(fig_p['data'][0], row = 4, col = 2)

fig.add_trace(fig_wc['data'][0], row = 5, col = 1)
fig.add_trace(fig_in['data'][0], row = 5, col = 2)


fig.update_layout(height = 3000)
fig.show()

# Save Static Plots

In [None]:
import os
if not os.path.exists('images'):
    os.mkdir('images')

In [None]:
fig.write_image('images/fig.png')

In [None]:
fig.write_image('images/fig.pdf')

# Scatter Plot for Deaths vs Confirmed Cases

In [None]:
countrywise.sort_values('Deaths', ascending = False).head(15)

In [None]:
top = 15
fig = px.scatter(countrywise.sort_values('Deaths', ascending = False).head(top), 
                 x= 'Confirmed', y='Deaths', color = 'Country',
                 size = 'Confirmed', height = 800, width = 1100 ,text = 'Country', log_x = True, log_y = True, title = 'Deaths vs Confirmed Cases (log10 scale)')

fig.update_traces(textposition = 'top center')
fig.update_layout(showlegend = True)
fig.update_layout(xaxis_rangeslider_visible = True)
fig.show()

# Confirmed, Deaths, New Cases vs Country and Date

<h1><li> Bar Plot </li>

In [None]:
fig = px.bar(country_daywise, x = 'Date', y='Confirmed', color='Country', height = 600,
            title = 'Confirmed', color_discrete_sequence = px.colors.cyclical.mygbm)

fig.show()

In [None]:
fig = px.bar(country_daywise, x = 'Deaths', y='Confirmed', color='Country', height = 600,
            title = 'Deaths', color_discrete_sequence = px.colors.cyclical.mygbm)

fig.show()

In [None]:
fig = px.bar(country_daywise, x = 'Date', y='Recovered', color='Country', height = 600,
            title = 'Recovered', color_discrete_sequence = px.colors.cyclical.mygbm)

fig.show()

In [None]:
fig = px.bar(country_daywise, x = 'Date', y='New Cases', color='Country', height = 600,
            title = 'New Cases', color_discrete_sequence = px.colors.cyclical.mygbm)

fig.show()

# Line PLot

In [None]:
fig = px.line(country_daywise, x = 'Date', y='Confirmed', color='Country', height = 600,
            title = 'Confirmed', color_discrete_sequence = px.colors.cyclical.mygbm)

fig.show()

fig = px.line(country_daywise, x = 'Date', y='Deaths', color='Country', height = 600,
            title = 'Deaths', color_discrete_sequence = px.colors.cyclical.mygbm)

fig.show()

fig = px.line(country_daywise, x = 'Date', y='Recovered', color='Country', height = 600,
            title = 'Recovered', color_discrete_sequence = px.colors.cyclical.mygbm)

fig.show()

# Growth Rate after 100 Cases

In [None]:
df.head()

In [None]:
growth100 = country_daywise[country_daywise['Confirmed'] > 100]['Country'].unique()
temp = df[df['Country'].isin(growth100)]


temp = temp.groupby(['Country', 'Date'])['Confirmed'].sum().reset_index()
temp = temp[temp['Confirmed']>100]
temp

In [None]:
# The date when 100 cases are (start)reported in this country
min_date = temp.groupby('Country')['Date'].min().reset_index()
min_date.columns = ['Country', 'Min Date']
min_date

In [None]:
from_100_case = pd.merge(temp, min_date, on = 'Country')
from_100_case[' N days'] = (from_100_case['Date'] - from_100_case['Min Date']).dt.days

from_100_case

In [None]:
fig = px.line(from_100_case, x = ' N days', y = 'Confirmed', color = 'Country', title = 'Nth Days from 100 cases', height = 600)

fig.show()

# Growth rate after 1 Million cases

In [None]:
growth1mil = country_daywise[country_daywise['Confirmed'] > 1000000]['Country'].unique()
temp = df[df['Country'].isin(growth1mil)]


temp = temp.groupby(['Country', 'Date'])['Confirmed'].sum().reset_index()
temp = temp[temp['Confirmed']>1000000]

# The date when 1Million cases are (start)reported in this country
min_date = temp.groupby('Country')['Date'].min().reset_index()
min_date.columns = ['Country', 'Min Date']

from_1mil_case = pd.merge(temp, min_date, on = 'Country')
from_1mil_case[' N days'] = (from_1mil_case['Date'] - from_1mil_case['Min Date']).dt.days

fig = px.line(from_1mil_case, x = ' N days', y = 'Confirmed', color = 'Country', title = 'Nth Days from 1 Million cases', height = 600)

fig.show()

# Growth Rate after 5 Million Cases

In [None]:
growth5mil = country_daywise[country_daywise['Confirmed'] > 5000000]['Country'].unique()
temp = df[df['Country'].isin(growth5mil)]


temp = temp.groupby(['Country', 'Date'])['Confirmed'].sum().reset_index()
temp = temp[temp['Confirmed']>5000000]

# The date when 100 cases are (start)reported in this country
min_date = temp.groupby('Country')['Date'].min().reset_index()
min_date.columns = ['Country', 'Min Date']

from_5mil_case = pd.merge(temp, min_date, on = 'Country')
from_5mil_case[' N days'] = (from_5mil_case['Date'] - from_5mil_case['Min Date']).dt.days

fig = px.line(from_5mil_case, x = ' N days', y = 'Confirmed', color = 'Country', title = 'Nth Days from 5 Million cases', height = 600)

fig.show()

# Growth Rate after 10 Million Cases

In [None]:
growthmillion = country_daywise[country_daywise['Confirmed'] > 10000000]['Country'].unique()
temp = df[df['Country'].isin(growthmillion)]


temp = temp.groupby(['Country', 'Date'])['Confirmed'].sum().reset_index()
temp = temp[temp['Confirmed']>10000000]

# The date when 100 cases are (start)reported in this country
min_date = temp.groupby('Country')['Date'].min().reset_index()
min_date.columns = ['Country', 'Min Date']

from_million_case = pd.merge(temp, min_date, on = 'Country')
from_million_case[' N days'] = (from_million_case['Date'] - from_million_case['Min Date']).dt.days

fig = px.line(from_million_case, x = ' N days', y = 'Confirmed', color = 'Country', title = 'Nth Days from 10 Million cases', height = 600)

fig.show()

# Tree Map Analysis

<b> Confirmed Cases </b>

In [None]:
full_latest = df[df['Date'] == max(df['Date'])]

fig = px.treemap(full_latest.sort_values(by = 'Confirmed', ascending = False).reset_index(drop = True), 
                path = ['Country', 'Province/State'], values = 'Confirmed', height = 700, title = 
                'Number of Confirmed Cases', color_discrete_sequence = px.colors.qualitative.Dark2)


fig.data[0].textinfo='label+text+value'
fig.show()

# Death Cases

In [None]:
full_latest = df[df['Date'] == max(df['Date'])]

fig = px.treemap(full_latest.sort_values(by = 'Deaths', ascending = False).reset_index(drop = True), 
                path = ['Country', 'Province/State'], values = 'Deaths', height = 700, title = 
                'Number of Death Cases', color_discrete_sequence = px.colors.qualitative.Dark2)


fig.data[0].textinfo='label+text+value'
fig.show()

# First and Last Case Report Time

In [None]:
first_date = df[df['Confirmed']> 0]
first_date = first_date.groupby('Country')['Date'].agg(['min']).reset_index()

last_date = df.groupby(['Country', 'Date'])['Confirmed', 'Deaths', 'Recovered']
last_date = last_date.sum().diff().reset_index()

mask = last_date['Country'] != last_date['Country'].shift(1)

last_date.loc[mask, 'Confirmed'] = np.nan
last_date.loc[mask, 'Deaths'] = np.nan
last_date.loc[mask, 'Recovered'] = np.nan 

last_date = last_date[last_date['Confirmed'] > 0]
last_date = last_date.groupby('Country')['Date'].agg(['max']).reset_index()

first_last = pd.concat([first_date, last_date['max']], axis = 1)
first_last['max'] = first_last['max'] + timedelta(days = 1)

first_last['Days'] = first_last['max'] - first_last['min']
first_last['Task'] = first_last['Country']

first_last.columns = ['Country', 'Start', 'Finish', 'Days', 'Task']

first_last = first_last.sort_values('Days')
first_last

In [None]:
# Create Random Colors
colors = ['#' + ''.join([random.choice('0123456789ABCDEF') for j in range(6)]) for i in range(len(first_last))]
colors

In [None]:
fig = ff.create_gantt(first_last, index_col = 'Country', colors = colors, show_colorbar = False, 
                     bar_width = 0.2, showgrid_x = True, showgrid_y = True, height = 3500)

fig.show()

# Confirmed Cases Country and Day Wise

In [None]:
country_daywise.head()

In [None]:
temp = country_daywise.groupby(['Country', 'Date'])['Confirmed'].sum().reset_index()
temp = temp[temp['Country'].isin(growth1mil)]

countries = temp['Country'].unique()
countries

In [None]:
ncols = 3
nrows = math.ceil(len(countries)/ncols)

fig = make_subplots(rows = nrows, cols = ncols, shared_xaxes = False, 
                   subplot_titles = countries)

for ind, country in enumerate(countries):
    row = int((ind/ncols)+1)
    col = int((ind%ncols)+1)
    fig.add_trace(go.Bar(x = temp['Date'], y = temp.loc[temp['Country'] == country, 'Confirmed'], 
                         name = country), row = row, col = col)

fig.update_layout(height = 4000, title_text = 'Confirmed Cases in Each Country')
fig.update_layout(showlegend = False)
fig.show()

# Covid-19 vs Other Pandemics

In [None]:
full_latest

In [None]:
# Wikipedia Source

pandemic = pd.DataFrame({
    'epidemic': ['COVID-19', 'SARS', 'EBOLA', 'MERS', 'H1N1'],
    'start_year' : [2019, 2002, 2013, 2012, 2009],
    'end_year' : [2020, 2004, 2016, 2020, 2010],
    'confirmed' : [full_latest['Confirmed'].sum(), 8422, 28646,2519,6724149],
    'deaths' : [full_latest['Deaths'].sum(), 813, 11323, 866, 19654]
})

In [None]:
pandemic['mortality'] = round((pandemic['deaths']/pandemic['confirmed'])*100, 2)

pandemic

In [None]:
temp = pandemic.melt(id_vars = 'epidemic', value_vars = ['confirmed', 'deaths', 'mortality'],
                    var_name = 'Case', value_name = 'Value')
temp

In [None]:
fig = px.bar(temp, x = 'epidemic', y = 'Value', color = 'epidemic', text = 'Value', facet_col = 'Case', 
            color_discrete_sequence = px.colors.qualitative.Bold)

fig.update_traces(textposition = 'outside')
fig.update_layout(uniformtext_minsize = 8, uniformtext_mode = 'hide')
fig.update_yaxes(showticklabels = False)
fig.layout.yaxis2.update(matches = None)
fig.layout.yaxis3.update(matches = None)
fig.show()