In [2]:
import os
import pandas as pd
pd.__version__

'1.1.4'

In [3]:
# Set up path
DATA_REPO = os.getcwd()+"/data/COVID-19/csse_covid_19_data/"
DAILY_REPO = DATA_REPO + "csse_covid_19_daily_reports/"
TIME_REPO = DATA_REPO + "csse_covid_19_time_series/"

In [4]:
def daily_prep(dataframe_dir, date):
    # Load data
    df = pd.read_csv(dataframe_dir+date)
    # Trasform daily dataframes
    df_idx = ['Confirmed', 'Deaths', 'Recovered', 'Country_Region']
    df = df[df_idx].rename(columns={'Confirmed': '확진자 수', 'Deaths':'사망자 수','Recovered':'회복자 수','Country_Region': '국가 구분'})
    # Accumulate daily data
    df = df.groupby('국가 구분').sum().reset_index()
    return df

# Daily patients by countries
daily_df= daily_prep(DAILY_REPO, "11-09-2020.csv")
daily_df

Unnamed: 0,국가 구분,확진자 수,사망자 수,회복자 수
0,Afghanistan,42297,1574,34721
1,Albania,24731,571,12203
2,Algeria,62693,2062,42325
3,Andorra,5437,75,4332
4,Angola,12680,308,5927
...,...,...,...,...
185,West Bank and Gaza,58838,521,50877
186,Western Sahara,10,1,8
187,Yemen,2071,605,1394
188,Zambia,16971,349,16011


In [5]:
def time_series_prep(dataframe_dir, condition):
    # Load data
    drop_idx = ['Province/State', 'Country/Region', 'Lat', 'Long']
    dataframe_path = dataframe_dir + f"time_series_covid19_{condition}_global.csv"
    df = pd.read_csv(dataframe_path)
    # Data transform
    df = df.drop(columns=drop_idx)
    # Accumulate data
    df = df.sum().reset_index().rename(columns={'index':'date', 0:condition})
    return df

conditions = ['confirmed', 'deaths', 'recovered']
final_df = None
for condition in conditions:
    if final_df is None:
        final_df = time_series_prep(TIME_REPO, condition)
    else:
        final_df = final_df.merge(time_series_prep(TIME_REPO, condition))

# dat = time_series_prep(TIME_REPO, "confirmed")
# dat2 = time_series_prep(TIME_REPO, "deaths")
# dat3 = time_series_prep(TIME_REPO, "recovered")

# Global total confirmed patients
final_df

Unnamed: 0,date,confirmed,deaths,recovered
0,1/22/20,555,17,28
1,1/23/20,654,18,30
2,1/24/20,941,26,36
3,1/25/20,1434,42,39
4,1/26/20,2118,56,52
...,...,...,...,...
289,11/6/20,49346623,1243076,32480626
290,11/7/20,49858197,1250650,32779541
291,11/8/20,50423019,1256388,33032762
292,11/9/20,50918703,1263094,33289404


In [6]:
# Confirmed patients by country
def time_series_prep_by_country(dataframe_dir, condition, country):
    # Define path & will remove target idx
    dataframe_path = dataframe_dir + f"time_series_covid19_{condition}_global.csv"
    expected_idx = ['Province/State', 'Country/Region', 'Lat', 'Long']
    # Data load
    df = pd.read_csv(dataframe_path)
    # Data transform
    df = df.loc[df['Country/Region'] == country]
    df = df.drop(columns=expected_idx)
    df = df.sum()
    df = df.reset_index().rename(columns={'index':'date', 0:condition})
    return df

final_df = None
conditions = ['confirmed', 'deaths', 'recovered']
country = "Korea, South"
for cond in conditions:
    if final_df is None:
        final_df = time_series_prep_by_country(TIME_REPO, cond, country)
    else:
        final_df = final_df.merge(time_series_prep_by_country(TIME_REPO, cond, country))
final_df

Unnamed: 0,date,confirmed,deaths,recovered
0,1/22/20,1,0,0
1,1/23/20,1,0,0
2,1/24/20,2,0,0
3,1/25/20,2,0,0
4,1/26/20,3,0,0
...,...,...,...,...
289,11/6/20,27284,477,24910
290,11/7/20,27427,478,24968
291,11/8/20,27553,480,25029
292,11/9/20,27653,485,25160


In [7]:
import plotly.express as px
print(daily_df.head)

<bound method NDFrame.head of                   국가 구분  확진자 수  사망자 수  회복자 수
0           Afghanistan  42297   1574  34721
1               Albania  24731    571  12203
2               Algeria  62693   2062  42325
3               Andorra   5437     75   4332
4                Angola  12680    308   5927
..                  ...    ...    ...    ...
185  West Bank and Gaza  58838    521  50877
186      Western Sahara     10      1      8
187               Yemen   2071    605   1394
188              Zambia  16971    349  16011
189            Zimbabwe   8561    254   8023

[190 rows x 4 columns]>


In [18]:
projections = ["equirectangular", "mercator" , "orthographic" , "natural earth" , "kavrayskiy7" , "miller" , "robinson" , "eckert4" , "azimuthal equal area" , "azimuthal equidistant" , "conic equal area" , "conic conformal" , "conic equidistant" , "gnomonic" , "stereographic" , "mollweide" , "hammer" , "transverse mercator" , "albers usa" , "winkel tripel" , "aitoff" , "sinusoidal" ]

# for projection in projections:
fig = px.scatter_geo(
    daily_df,text='사망자 수', 
    color="확진자 수", 
    locations='국가 구분', 
    locationmode='country names', 
    hover_name="국가 구분",
    size="확진자 수",
    size_max=50,
    hover_data={
        "확진자 수":':,2f', 
        "회복자 수":':,2f', 
        "사망자 수":':,2f', 
        "국가 구분":False
    },
    template="plotly_dark",
    projection=projection,
    title=projection
)
fig.show()