# Get Ready

## Grant permission for accessing data on Google Drive

In [1]:
# from pydrive.auth import GoogleAuth
# from pydrive.drive import GoogleDrive
# from google.colab import auth
# from oauth2client.client import GoogleCredentials

# auth.authenticate_user()
# gauth = GoogleAuth()
# gauth.credentials = GoogleCredentials.get_application_default()
# drive = GoogleDrive(gauth)

## Install required libraries

In [2]:
# !pip install bokeh
# !pip install selenium
# !pip install phantomjs
# !pip install geopandas
# !pip install pyecharts

## Import library

In [3]:
import pandas as pd
import warnings
import json
import geopandas as gpd

from pyecharts import *
from pyecharts import options as opts
from pyecharts.globals import ThemeType
from pyecharts.charts import Map
from pyecharts.globals import CurrentConfig, NotebookType
from pyecharts.render import make_snapshot

from IPython.core.interactiveshell import InteractiveShell
import nest_asyncio
from snapshot_pyppeteer import snapshot

## Some settings for this code file

In [4]:
!jupyter trust Data-Analysis.ipynb
InteractiveShell.ast_node_interactivity = "all"
CurrentConfig.NOTEBOOK_TYPE = NotebookType.JUPYTER_LAB
nest_asyncio.apply()
theme=ThemeType.LIGHT

pd.set_option('display.max_columns', None)
pd.set_option('display.width', 500)
pd.set_option('display.max_colwidth', 50)

warnings.simplefilter(action='ignore', category=FutureWarning)

data_file_root_path = "Data"

data_source = "https://www.kaggle.com/sudalairajkumar/novel-corona-virus-2019-dataset/data"

Notebook already signed: Data-Analysis.ipynb


# Data Analysis

In [5]:
def read_file(file_name):
    file_data = pd.read_csv(f"{data_file_root_path}/novel-corona-virus-2019-dataset/{file_name}").fillna(0)
    file_data.rename(
        columns={'Country/Region' : 'CountryRegion', 
                 'Province/State' : "ProvinceState"}, 
                 inplace=True)
    return file_data

In [6]:
def cal_new_confirmed(a_df):
    confirmed_col = a_df['Confirmed']
    new_confirmed = []
    new_confirmed.append(confirmed_col[0])

    for i in range(len(confirmed_col)):
        try:
            new_confirmed.append(int(confirmed_col[i + 1] - confirmed_col[i]))
        except KeyError:
            break

    a_df["NewConfirmed"] = new_confirmed
    return a_df


## Basic Summary

In [7]:
covid_19_data = read_file("covid_19_data.csv")
covid_19_data["Active"] = covid_19_data['Confirmed'] - covid_19_data['Deaths'] - covid_19_data['Recovered']
start_obser_date = covid_19_data["ObservationDate"].iloc[0]
latest_obser_date = covid_19_data["ObservationDate"].iloc[-1]
covid_19_data

Unnamed: 0,SNo,ObservationDate,ProvinceState,CountryRegion,Last Update,Confirmed,Deaths,Recovered,Active
0,1,01/22/2020,Anhui,Mainland China,1/22/2020 17:00,1.0,0.0,0.0,1.0
1,2,01/22/2020,Beijing,Mainland China,1/22/2020 17:00,14.0,0.0,0.0,14.0
2,3,01/22/2020,Chongqing,Mainland China,1/22/2020 17:00,6.0,0.0,0.0,6.0
3,4,01/22/2020,Fujian,Mainland China,1/22/2020 17:00,1.0,0.0,0.0,1.0
4,5,01/22/2020,Gansu,Mainland China,1/22/2020 17:00,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...
17044,17045,04/20/2020,Wyoming,US,2020-04-20 23:44:50,317.0,2.0,0.0,315.0
17045,17046,04/20/2020,Xinjiang,Mainland China,2020-04-20 23:44:50,76.0,3.0,73.0,0.0
17046,17047,04/20/2020,Yukon,Canada,2020-04-20 23:44:50,11.0,0.0,0.0,11.0
17047,17048,04/20/2020,Yunnan,Mainland China,2020-04-20 23:44:50,184.0,2.0,178.0,4.0


### Overall cases count

In [8]:
grouped_static = covid_19_data.groupby('ObservationDate')['Confirmed', 'Deaths', 'Recovered', 'Active'].sum().reset_index()
grouped_static = cal_new_confirmed(grouped_static)
total_static = grouped_static[grouped_static["ObservationDate"] == max(grouped_static["ObservationDate"])].reset_index(drop=True)
total_static.style.background_gradient(cmap='Pastel1').format({
    "Confirmed": "{:,.0f}", 
    "Deaths": "{:,.0f}", 
    "Recovered": "{:,.0f}",
    "Active": "{:,.0f}",
    "NewConfirmed": "{:,.0f}",
    })


Unnamed: 0,ObservationDate,Confirmed,Deaths,Recovered,Active,NewConfirmed
0,04/20/2020,2472259,169986,645738,1656535,70880


### Cases per country/Region

In [9]:
print(f"Latest Record of Data: {latest_obser_date} \n")

latest_covid_19_data = covid_19_data[covid_19_data["ObservationDate"] == max(covid_19_data["ObservationDate"])].reset_index()

basic_static = latest_covid_19_data.groupby(["CountryRegion"])['Confirmed', 'Deaths', 'Recovered', 'Active'].sum().reset_index()
basic_static = basic_static.sort_values(by='Confirmed', ascending=False).reset_index(drop=True)
basic_static.index += 1 
basic_static.style.background_gradient(cmap='Reds').format(
    {"Confirmed": "{:,.0f}", 
     "Deaths": "{:,.0f}", 
     "Recovered": "{:,.0f}",
     "Active": "{:,.0f}",}
    )


Latest Record of Data: 04/20/2020 



Unnamed: 0,CountryRegion,Confirmed,Deaths,Recovered,Active
1,US,784326,42094,72329,669903
2,Spain,200210,20852,80587,98771
3,Italy,181228,24114,48877,108237
4,France,156480,20292,38036,98152
5,Germany,147065,4862,91500,50703
6,UK,125856,16550,446,108860
7,Turkey,90980,2140,13430,75410
8,Iran,83505,5209,59273,19023
9,Mainland China,82747,4632,77093,1022
10,Russia,47121,405,3446,43270


### Plot top 15 countries data

In [88]:
def plot_ebar(a_df, type_str: str) -> charts.Bar:
    country = a_df["CountryRegion"].to_list()
    y1 = a_df[type_str].to_list()
    
    color_dict = {
        "Confirmed": "#FF5252",
        "Recovered": "#00BFA5",
        "Deaths": "#FF6D00"
    }
    
    bar = (
        charts.Bar(init_opts=opts.InitOpts(
            theme=ThemeType.LIGHT,
            width="1350px",
            height="800px"
        ))
        .add_xaxis(
            country,
        )
        .add_yaxis(
            type_str, y1,
            itemstyle_opts=opts.ItemStyleOpts(
                color = color_dict[type_str]
            ),
        )
        .set_global_opts(
            title_opts=opts.TitleOpts(
                title=f'{type_str} Number of Top 15 Confirmed Count Countries',
                subtitle=f"from {start_obser_date} to {latest_obser_date}",
                pos_top=0
            ),
            yaxis_opts=opts.AxisOpts(
                # name=f'Number of cases',
                name_location="center",
                
            ),
            xaxis_opts=opts.AxisOpts(
                name="Country/\nRegion",
                axislabel_opts = opts.LabelOpts(
                    interval=0,
                    rotate=25,
                    margin=10
                )
            ),
            legend_opts=opts.LegendOpts(
                is_show=True,
                pos_right=100,
            ),
        )
    )
    
    file_name = f"{type_str}-bar"
    
    make_snapshot(
        snapshot, 
        bar.render(f"Images/{file_name}.html"), 
        f"Images/{file_name}.png"
    )
    return bar

In [68]:
top_15_countries = basic_static.head(15)

In [89]:
top_15_confirmed_bar = plot_ebar(top_15_countries, "Confirmed")
top_15_confirmed_bar.load_javascript()
top_15_confirmed_bar.render_notebook()

<pyecharts.render.display.Javascript at 0x1b60bd16d48>

In [90]:
top_15_deaths_bar = plot_ebar(top_15_countries, "Deaths")
top_15_deaths_bar.load_javascript()
top_15_deaths_bar.render_notebook()

<pyecharts.render.display.Javascript at 0x1b60bd53c48>

In [91]:
top_15_recovered_bar = plot_ebar(top_15_countries, "Recovered")
top_15_recovered_bar.load_javascript()
top_15_recovered_bar.render_notebook()

<pyecharts.render.display.Javascript at 0x1b60bd54188>

## Tendency

### Functions for trend analysis

#### Make trend table

In [15]:
def make_trend_table(country: str):
    line_data = covid_19_data[covid_19_data['CountryRegion']==country]
    line_data = line_data.groupby(["ObservationDate"])['Confirmed', 'Deaths', 'Recovered', 'Active'].sum().reset_index()
    line_data["ObservationDate"] = pd.to_datetime(line_data["ObservationDate"], format='%m/%d/%Y')
    
    return line_data

#### Plot trend data

In [16]:
def plot_line_trend(trend_df, country: str) -> charts.Line:
    date = trend_df["ObservationDate"].astype(str).to_list()
    y1 = trend_df["Confirmed"].to_list()
    y2 = trend_df["Deaths"].to_list()
    y3 = trend_df["Recovered"].to_list()
    y4 = trend_df["Active"].to_list()
    y5 = trend_df["NewConfirmed"].to_list()
    
    line = (
    charts.Line(init_opts=opts.InitOpts(
            theme=ThemeType.LIGHT,
            width="1350px",
            height="800px"
    ))
    .add_xaxis(xaxis_data=date)
    .add_yaxis(
        series_name="Confirmed",
        y_axis=y1,
        label_opts=opts.LabelOpts(is_show=False),
        # is_smooth=True,
        linestyle_opts=opts.LineStyleOpts(width=3),
    )
        .add_yaxis(
        series_name="Recovered",
        y_axis=y3,
        label_opts=opts.LabelOpts(is_show=False),
        # is_smooth=True,
        linestyle_opts=opts.LineStyleOpts(width=3),
    )
    .add_yaxis(
        series_name="Deaths",
        y_axis=y2,
        label_opts=opts.LabelOpts(is_show=False),
        # is_smooth=True,
        linestyle_opts=opts.LineStyleOpts(width=3),
    )
    .add_yaxis(
        series_name="Active",
        y_axis=y4,
        label_opts=opts.LabelOpts(is_show=False),
        # is_smooth=True,
        linestyle_opts=opts.LineStyleOpts(width=3),
    )
    .add_yaxis(
        series_name="New Confirmed",
        y_axis=y5,
        label_opts=opts.LabelOpts(is_show=False),
        # is_smooth=True,
        linestyle_opts=opts.LineStyleOpts(width=3),
    )
    .set_global_opts(
        title_opts=opts.TitleOpts(
            title=f"COVID-19 Trend of {country}",
            subtitle=f"from {start_obser_date} to {latest_obser_date}"
        ),
        tooltip_opts=opts.TooltipOpts(trigger="axis"),
        yaxis_opts=opts.AxisOpts(
            type_="value",
            axistick_opts=opts.AxisTickOpts(is_show=True),
            splitline_opts=opts.SplitLineOpts(is_show=True),
        ),
        xaxis_opts=opts.AxisOpts(type_="category", boundary_gap=False),
    ))
    
    file_name = f"{country}-trend"
    
    make_snapshot(
        snapshot, 
        line.render(f"Images/{file_name}.html"), 
        f"Images/{file_name}.png"
    )
    
    return line
    

### China Trend

In [17]:
china_line_data = make_trend_table('Mainland China')
china_line_data = cal_new_confirmed(china_line_data)
china_line_data

Unnamed: 0,ObservationDate,Confirmed,Deaths,Recovered,Active,NewConfirmed
0,2020-01-22,547.0,17.0,28.0,502.0,547.0
1,2020-01-23,639.0,18.0,30.0,591.0,92.0
2,2020-01-24,916.0,26.0,36.0,854.0,277.0
3,2020-01-25,1399.0,42.0,39.0,1318.0,483.0
4,2020-01-26,2062.0,56.0,49.0,1957.0,663.0
...,...,...,...,...,...,...
85,2020-04-16,82341.0,3342.0,77900.0,1099.0,47.0
86,2020-04-17,82694.0,4632.0,77003.0,1059.0,353.0
87,2020-04-18,82718.0,4632.0,77029.0,1057.0,24.0
88,2020-04-19,82735.0,4632.0,77068.0,1035.0,17.0


In [18]:
china_line = plot_line_trend(china_line_data, 'Mainland China')
china_line.load_javascript()
china_line.render_notebook()

<pyecharts.render.display.Javascript at 0x1b608a15688>

### Singapore Trend

In [19]:
singapore_line_data = make_trend_table('Singapore')
singapore_line_data = cal_new_confirmed(singapore_line_data)
singapore_line = plot_line_trend(singapore_line_data, 'Singapore')
singapore_line.load_javascript()
singapore_line.render_notebook()

<pyecharts.render.display.Javascript at 0x1b608a32a88>

### Japan Trend

In [92]:
japan_line_data = make_trend_table('Japan')
japan_line_data = cal_new_confirmed(japan_line_data)
japan_line = plot_line_trend(japan_line_data, 'Japan')
japan_line.load_javascript()
japan_line.render_notebook()

<pyecharts.render.display.Javascript at 0x1b60bd6b088>

### South Korea Trend

In [21]:
south_korea_line_data = make_trend_table('South Korea')
south_korea_line_data = cal_new_confirmed(south_korea_line_data)
south_korea_line = plot_line_trend(south_korea_line_data, 'South Korea')
south_korea_line.load_javascript()
south_korea_line.render_notebook()

<pyecharts.render.display.Javascript at 0x1b6089cee88>

### India Trend

In [22]:
india_line_data = make_trend_table('India')
india_line_data = cal_new_confirmed(india_line_data)
india_line = plot_line_trend(india_line_data, 'India')
india_line.load_javascript()
india_line.render_notebook()

<pyecharts.render.display.Javascript at 0x1b608a1ce48>

### United Kingdom Trend

In [23]:
uk_line_data = make_trend_table('UK')
uk_line_data = cal_new_confirmed(uk_line_data)
uk_line = plot_line_trend(uk_line_data, 'UK')
uk_line.load_javascript()
uk_line.render_notebook()

<pyecharts.render.display.Javascript at 0x1b608adcb48>

### Italy Trend

In [24]:
italy_line_data = make_trend_table('Italy')
italy_line_data = cal_new_confirmed(italy_line_data)
italy_line = plot_line_trend(italy_line_data, 'Italy')
italy_line.load_javascript()
italy_line.render_notebook()

<pyecharts.render.display.Javascript at 0x1b608ae53c8>

### Spain Trend

In [25]:
spain_line_data = make_trend_table('Spain')
spain_line_data = cal_new_confirmed(spain_line_data)
spain_line = plot_line_trend(spain_line_data, 'Spain')
spain_line.load_javascript()
spain_line.render_notebook()

<pyecharts.render.display.Javascript at 0x1b608b0ad88>

### Australia Trend

In [26]:
australia_line_data = make_trend_table('Australia')
australia_line_data = cal_new_confirmed(australia_line_data)
australia_line = plot_line_trend(australia_line_data, 'Australia')
australia_line.load_javascript()
australia_line.render_notebook()

<pyecharts.render.display.Javascript at 0x1b60891ff88>

### France Trend

In [27]:
france_line_data = make_trend_table('France')
france_line_data = cal_new_confirmed(france_line_data)
france_line = plot_line_trend(france_line_data, 'France')
france_line.load_javascript()
france_line.render_notebook()

<pyecharts.render.display.Javascript at 0x1b609be3588>

### America Trend

In [28]:
us_line_data = make_trend_table('US')
us_line_data = cal_new_confirmed(us_line_data)
us_line = plot_line_trend(us_line_data, 'US')
us_line.load_javascript()
us_line.render_notebook()

<pyecharts.render.display.Javascript at 0x1b609c072c8>

### Hong Kong Trend

In [29]:
hk_line_data = make_trend_table('Hong Kong')
hk_line_data = cal_new_confirmed(hk_line_data)
hk_line = plot_line_trend(hk_line_data, 'Hong Kong')
hk_line.load_javascript()
hk_line.render_notebook()

<pyecharts.render.display.Javascript at 0x1b609c28dc8>

## Make Pie Chart function

In [55]:
def plot_grouping_pie_chart(grouped_df, group_name: str) -> charts.Pie:
   
    labels = grouped_df[group_name]
    percentages = grouped_df["Percent"]       
    
    pie = (
        charts.Pie(init_opts=opts.InitOpts(
            theme=ThemeType.LIGHT
        ))
        .add(
            "", [list(z) for z in zip(labels, percentages)],
            radius=["40%", "75%"],
            rosetype="percentages"
        )
        .set_global_opts(
            title_opts=opts.TitleOpts(
                title=f"COVID-19 Confirmed {group_name} Group",
                subtitle= f"from {start_obser_date} to {latest_obser_date}"
            ),
            legend_opts=opts.LegendOpts(
                orient='vertical',
                is_show=True,
                pos_right=10,
                pos_top=50
            ),

        )
        .set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {c}"))
    )
    
    make_snapshot(
        snapshot, 
        pie.render(f"Images/{group_name}-grouping-percentage.html"), 
        f"Images/{group_name}-grouping-percentage.png"
    )
    return pie

In [31]:
def make_pie_df(a_df):
    new_grouped = a_df.reset_index(name="Count")
    percent = []
    sum = 0
    for c in new_grouped["Count"]:
        sum += c

    for c in new_grouped["Count"]:
        percent.append(round(c / sum, 3))

    new_grouped["Percent"] = percent
    return new_grouped

## Age Group

### Process Data

In [32]:
COVID19_open_line_list = read_file("COVID19_open_line_list.csv")
null_index = COVID19_open_line_list[COVID19_open_line_list["ID"] == 0.0].index
COVID19_open_line_list.drop(null_index, inplace=True)
COVID19_open_line_list = COVID19_open_line_list.loc[:, ~COVID19_open_line_list.columns.str.contains('^Unnamed')]
COVID19_open_line_list = COVID19_open_line_list.loc[:, ~COVID19_open_line_list.columns.str.contains('^admin')]
COVID19_open_line_list = COVID19_open_line_list.replace({
    "male" : "Male",
    "female" : "Female",
})

In [33]:
age_series = COVID19_open_line_list["age"].astype(str)

for age in age_series:
    if '-' in age:
        age_range = age.split('-')
        a1 = int(age_range[0])
        a2 = int(age_range[1])
        a = int((a1 + a2) / 2)
        age_series = age_series.replace(age, a)

COVID19_open_line_list["age"] = age_series.astype(float)
COVID19_open_line_list

Unnamed: 0,ID,age,sex,city,province,country,wuhan(0)_not_wuhan(1),latitude,longitude,geo_resolution,date_onset_symptoms,date_admission_hospital,date_confirmation,symptoms,lives_in_Wuhan,travel_history_dates,travel_history_location,reported_market_exposure,additional_information,chronic_disease_binary,chronic_disease,source,sequence_available,outcome,date_death_or_discharge,notes_for_discussion,location,country_new,data_moderator_initials
0,1.0,30.0,Male,"Chaohu City, Hefei City",Anhui,China,1.0,31.646960,117.716600,admin3,18.01.2020,20.01.2020,22.01.2020,0,yes,17.01.2020,Wuhan,0,0,0.0,0,http://ah.people.com.cn/GB/n2/2020/0127/c35826...,0,0,0,0,0,China,0
1,2.0,47.0,Male,"Baohe District, Hefei City",Anhui,China,1.0,31.778630,117.331900,admin3,10.01.2020,21.01.2020,23.01.2020,0,no,10.01.2020,"Luzhou Hunan, via Wuhan",0,0,0.0,0,http://ah.people.com.cn/GB/n2/2020/0127/c35826...,0,0,0,0,0,China,0
2,3.0,49.0,Male,"High-Tech Zone, Hefei City",Anhui,China,1.0,31.828313,117.224844,point,15.01.2020,20.01.2020,23.01.2020,0,no,10.01.2020,"Yinzhou Hunan, via Wuhan",0,0,0.0,0,http://ah.people.com.cn/GB/n2/2020/0127/c35826...,0,0,0,0,High-Tech Zone,China,0
3,4.0,47.0,Female,"High-Tech Zone, Hefei City",Anhui,China,1.0,31.828313,117.224844,point,17.01.2020,20.01.2020,23.01.2020,0,no,0,0,0,contacted with confirmed case,0.0,0,http://ah.people.com.cn/GB/n2/2020/0127/c35826...,0,0,0,0,High-Tech Zone,China,0
4,5.0,50.0,Female,"Feidong County, Hefei City",Anhui,China,1.0,32.001230,117.568100,admin3,10.01.2020,21.01.2020,23.01.2020,0,no,07.01.2020,Wuhan,0,"06.01.2020 went to Wuhan, 07.01.2020 returned ...",0.0,0,http://ah.people.com.cn/GB/n2/2020/0127/c35826...,0,0,0,0,0,China,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13169,13475.0,64.0,Male,Nagoya City,Aichi Prefecture,Japan,1.0,0.000000,0.000000,0,27.02.2020,0,28.02.2020,fever (38 ° C),0,0,#207,0,0,0.0,0,https://www.mhlw.go.jp/content/10906000/000601...,0,0,0,0,0,0,0
13170,13476.0,44.0,Female,0,Osaka,Japan,1.0,0.000000,0.000000,0,19.02.2020,0,28.02.2020,fever (38 ° C),0,0,#208,0,0,0.0,0,https://www.mhlw.go.jp/content/10906000/000601...,0,0,0,0,0,0,0
13171,13477.0,5.0,Female,0,Osaka,Japan,1.0,0.000000,0.000000,0,0,0,28.02.2020,0,0,0,#209,0,0,0.0,0,https://www.mhlw.go.jp/content/10906000/000601...,0,0,0,0,0,0,0
13172,13478.0,0.0,Female,Amsterdam,0,Netherlands,1.0,0.000000,0.000000,0,0,0,28.02.2020,mild,0,0,returned from Italy this week,0,works at Amsterdam UMC Hospital,0.0,0,https://nos.nl/artikel/2324942-amsterdamse-cor...,0,0,0,0,0,0,0


In [34]:
list_bins = [1, 10, 20, 30, 40, 50, 60, 70, 80, 100]
list_label = ['0-10', '11-20', '21-30', '31-40', '41-50', '51-60', '61-70', '71-80', '81-100']

age_grouped = pd.cut(COVID19_open_line_list["age"], bins=list_bins, labels=list_label, include_lowest=True)

age_group_df = pd.Series(age_grouped, name=("Age")).to_frame()
age_group_df = age_group_df.groupby(["Age"]).size()

new_age_grouped = make_pie_df(age_group_df)
new_age_grouped

Unnamed: 0,Age,Count,Percent
0,0-10,40,0.03
1,11-20,34,0.025
2,21-30,183,0.136
3,31-40,256,0.19
4,41-50,335,0.249
5,51-60,236,0.176
6,61-70,166,0.124
7,71-80,69,0.051
8,81-100,25,0.019


### Plot Pie Chart

In [56]:
age_pie = plot_grouping_pie_chart(new_age_grouped, "Age")
age_pie.load_javascript()
age_pie.render_notebook()

<pyecharts.render.display.Javascript at 0x1b609fee948>

## Gender Group

In [36]:
gender_group = COVID19_open_line_list.groupby("sex")
gender_group_count = gender_group.size()
gender_group_count.index.name = "Gender"
gender_group_count = make_pie_df(gender_group_count).replace(0, "Not Reported")
gender_group_count

Unnamed: 0,Gender,Count,Percent
0,Not Reported,11910,0.904
1,Female,556,0.042
2,Male,707,0.054


In [57]:
gender_pie = plot_grouping_pie_chart(gender_group_count, "Gender")
gender_pie.load_javascript()
gender_pie.render_notebook()

<pyecharts.render.display.Javascript at 0x1b60a031c08>

## Map
Reference: [A Complete Guide to an Interactive Geographical Map using Python](https://towardsdatascience.com/a-complete-guide-to-an-interactive-geographical-map-using-python-f4c5197e23e0)

In [38]:
plot_geo_df = basic_static
plot_geo_df = plot_geo_df.replace({
    "US" : "United States",
    "Mainland China" : "China",
    "UK" : "United Kingdom",
    "Congo (Brazzaville)" : "Dem. Rep. Congo",
    "Congo (Kinshasa)" : "Congo",
    "Burma" : "Myanmar",
    "South Sudan" : "S. Sudan",
    "Central African Republic" : "Central African Rep.",
    "Western Sahara" : "W. Sahara",
    "South Korea" : "Korea",
    "Czech Republic" : "Czech Rep.",
    "Dominican Republic" : "Dominican Rep.",
    "Ivory Coast" : "Côte d'Ivoire",
    "Laos" : "Lao PDR",
    "North Macedonia" : "Macedonia",
    "Bosnia and Herzegovina" : "Bosnia and Herz.",
    "Equatorial Guinea" : "Eq. Guinea"
})
    
    
locate = plot_geo_df["CountryRegion"]
confirmed_cases = plot_geo_df["Confirmed"].astype(int)
recovered_cases = plot_geo_df["Recovered"].astype(int)
deaths_cases = plot_geo_df["Deaths"].astype(int)



In [95]:
def plot_map(cases, type_str):
    file_name = f"COVID-19-Global-{type_str}-Cases"
    
    map = (
        charts.Map(init_opts=opts.InitOpts(
                theme=ThemeType.LIGHT,
                width="1350px",
                height="800px"
        ))
        .add(
            f"World {type_str} Cases", 
            [list(z) for z in zip(locate.to_list(), cases.to_list())], 
            "world",
            is_map_symbol_show=False
        )      
        .set_series_opts(label_opts=opts.LabelOpts(is_show=False))
        .set_global_opts(
            title_opts=opts.TitleOpts(
                title=file_name.replace('-', " "),
                subtitle=f"from {start_obser_date} to {latest_obser_date}"
            ),
            visualmap_opts=opts.VisualMapOpts(max_=300000),
        )
    )
        
    make_snapshot(
            snapshot, 
            map.render(f"Images/{file_name}.html"), 
            f"Images/{file_name}.png"
        )
    return map

In [96]:
world_confirmed_map = plot_map(confirmed_cases, "Comfirmed")
world_confirmed_map.load_javascript()
world_confirmed_map.render_notebook()

<pyecharts.render.display.Javascript at 0x1b60c055f88>

In [97]:
world_recovered_map = plot_map(recovered_cases, "Recovered")
world_recovered_map.load_javascript()
world_recovered_map.render_notebook()

<pyecharts.render.display.Javascript at 0x1b60c089548>

In [98]:
world_deaths_map = plot_map(deaths_cases, "Deaths")
world_recovered_map.load_javascript()
world_recovered_map.render_notebook()

<pyecharts.render.display.Javascript at 0x1b60c0966c8>

#### Calculate US time series condirmed cases

In [43]:
def cal_us_series_sum(a_df):
    
    all_number_us_df = a_df.drop(
        columns=[
                 "UID", "iso2", "iso3", "code3", "FIPS", "Admin2", "Province_State", "Country_Region", "Lat", "Long_", "Combined_Key"
                ]
    )
    a_df["Sum"] = all_number_us_df.sum(axis=1)
    return a_df

In [44]:
time_series_covid_19_confirmed_US = cal_us_series_sum(read_file("time_series_covid_19_confirmed_US.csv"))
time_series_covid_19_deaths_US = cal_us_series_sum(read_file("time_series_covid_19_deaths_US.csv"))

#### Combine whole time series data

In [45]:
time_series_covid_19_confirmed = read_file("time_series_covid_19_confirmed.csv")
time_series_covid_19_confirmed_US = read_file("time_series_covid_19_confirmed_US.csv")

lat = time_series_covid_19_confirmed["Lat"].append(time_series_covid_19_confirmed_US["Lat"])
lon = time_series_covid_19_confirmed["Long"].append(time_series_covid_19_confirmed_US["Long_"])
ProvinceState = time_series_covid_19_confirmed["ProvinceState"].astype(str).append(time_series_covid_19_confirmed_US["Province_State"].astype(str))
CountryRegion = time_series_covid_19_confirmed["CountryRegion"].astype(str).append(time_series_covid_19_confirmed_US["Country_Region"].astype(str))
time_series_o = time_series_covid_19_confirmed.loc[:, time_series_covid_19_confirmed.columns.str.contains('20')]
time_series_US = time_series_covid_19_confirmed_US.loc[:, time_series_covid_19_confirmed_US.columns.str.contains('20')]
time_series = time_series_o + time_series_US
time_series.fillna(0, inplace=True)

all_time_series = {
    "CountryRegion": CountryRegion, 
    "ProvinceState": ProvinceState,
    "Latitude": lat,
    "Longitude": lon,
    }
all_time_series_df = pd.DataFrame(all_time_series)
all_time_series_df = all_time_series_df.join(time_series)

all_number_time_series_df = all_time_series_df.drop(
    columns=[
             "ProvinceState", "CountryRegion", "Latitude", "Longitude"
             ]
)
all_time_series_df["Sum"] = all_number_time_series_df.sum(axis=1).astype(int)



In [46]:
all_time_series_df

Unnamed: 0,CountryRegion,ProvinceState,Latitude,Longitude,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,1/29/20,1/30/20,1/31/20,2/1/20,2/2/20,2/3/20,2/4/20,2/5/20,2/6/20,2/7/20,2/8/20,2/9/20,2/10/20,2/11/20,2/12/20,2/13/20,2/14/20,2/15/20,2/16/20,2/17/20,2/18/20,2/19/20,2/20/20,2/21/20,2/22/20,2/23/20,2/24/20,2/25/20,2/26/20,2/27/20,2/28/20,2/29/20,3/1/20,3/2/20,3/3/20,3/4/20,3/5/20,3/6/20,3/7/20,3/8/20,3/9/20,3/10/20,3/11/20,3/12/20,3/13/20,3/14/20,3/15/20,3/16/20,3/17/20,3/18/20,3/19/20,3/20/20,3/21/20,3/22/20,3/23/20,3/24/20,3/25/20,3/26/20,3/27/20,3/28/20,3/29/20,3/30/20,3/31/20,4/1/20,4/2/20,4/3/20,4/4/20,4/5/20,4/6/20,4/7/20,4/8/20,4/9/20,4/10/20,4/11/20,4/12/20,4/13/20,4/14/20,4/15/20,4/16/20,4/17/20,4/18/20,4/19/20,4/20/20,Sum
0,Afghanistan,0,33.000000,65.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,4.0,4.0,5.0,7.0,7.0,7.0,11.0,16.0,21.0,22.0,22.0,22.0,24.0,24.0,40.0,40.0,74.0,84.0,94.0,110.0,110.0,120.0,170.0,174.0,237.0,273.0,281.0,299.0,349.0,367.0,423.0,444.0,484.0,521.0,555.0,607.0,665.0,714.0,784.0,840.0,906.0,933.0,996.0,1026.0,12929
0,US,American Samoa,-14.271000,-170.132000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,4.0,4.0,5.0,7.0,7.0,7.0,11.0,16.0,21.0,22.0,22.0,22.0,24.0,24.0,40.0,40.0,74.0,84.0,94.0,110.0,110.0,120.0,170.0,174.0,237.0,273.0,281.0,299.0,349.0,367.0,423.0,444.0,484.0,521.0,555.0,607.0,665.0,714.0,784.0,840.0,906.0,933.0,996.0,1026.0,12929
1,Albania,0,41.153300,20.168300,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,10.0,12.0,23.0,33.0,38.0,42.0,54.0,58.0,64.0,76.0,84.0,91.0,116.0,133.0,155.0,183.0,219.0,237.0,252.0,268.0,281.0,312.0,336.0,359.0,388.0,426.0,473.0,490.0,504.0,521.0,537.0,546.0,566.0,579.0,600.0,608.0,629.0,653.0,675.0,684.0,698.0,720.0,13735
1,US,Guam,13.444300,144.793700,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,10.0,12.0,23.0,33.0,38.0,42.0,54.0,58.0,64.0,76.0,84.0,91.0,116.0,133.0,155.0,183.0,219.0,237.0,252.0,268.0,281.0,312.0,336.0,359.0,388.0,426.0,473.0,490.0,504.0,521.0,537.0,546.0,566.0,579.0,600.0,608.0,629.0,653.0,675.0,684.0,698.0,720.0,13735
2,Algeria,0,28.033900,1.659600,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,3.0,5.0,12.0,12.0,17.0,17.0,19.0,20.0,20.0,20.0,24.0,26.0,37.0,48.0,54.0,60.0,74.0,87.0,90.0,139.0,201.0,230.0,264.0,302.0,367.0,409.0,454.0,511.0,584.0,718.0,853.0,992.0,1179.0,1259.0,1328.0,1431.0,1476.0,1583.0,1677.0,1772.0,1836.0,1925.0,1994.0,2081.0,2173.0,2281.0,2431.0,2548.0,2643.0,2732.0,41024
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3256,US,Utah,39.372319,-111.575868,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
3257,US,Utah,38.996171,-110.701396,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
3258,US,Utah,37.854472,-111.441876,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
3259,US,Utah,40.124915,-109.517442,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
