In [1]:
import pandas as pd
import json

PATH = "COVID-19-master/csse_covid_19_data/csse_covid_19_daily_reports/"

with open("COVID-19-master/csse_covid_19_data/country_convert.json", "r", encoding="utf-8-sig") as json_file :
    json_data = json.load(json_file)

def country_name_convert(row) :
    if row["Country_Region"] in json_data :
        return json_data[row["Country_Region"]]
    return row["Country_Region"]

def create_dateframe(filename) :
    doc = pd.read_csv(f"{PATH}{filename}", encoding="utf-8-sig")
    try:
        doc = doc[["Country_Region", "Confirmed"]]
    except:
        doc = doc[["Country/Region", "Confirmed"]]
        doc.columns = ["Country_Region", "Confirmed"]
    doc=doc.dropna(subset=["Confirmed"])
    doc["Country_Region"] = doc.apply(country_name_convert, axis=1)
    # doc = doc[doc["Confirmed"] != 0]
    doc = doc.astype({"Confirmed": "int64"})
    doc = doc.groupby("Country_Region").sum()

    date_column = filename.split(".")[0].lstrip("0").replace("-","/")
    doc.columns = [date_column]
    return doc

In [2]:
doc1 = create_dateframe("01-22-2020.csv")
doc2 = create_dateframe("04-01-2020.csv")

In [3]:
doc2.info()

<class 'pandas.core.frame.DataFrame'>
Index: 190 entries, Afghanistan to Zimbabwe
Data columns (total 1 columns):
 #   Column     Non-Null Count  Dtype
---  ------     --------------  -----
 0   4/01/2020  190 non-null    int64
dtypes: int64(1)
memory usage: 3.0+ KB


In [4]:
doc1.info()

<class 'pandas.core.frame.DataFrame'>
Index: 19 entries, Antarctica to Winter Olympics 2022
Data columns (total 1 columns):
 #   Column     Non-Null Count  Dtype
---  ------     --------------  -----
 0   1/22/2020  19 non-null     int64
dtypes: int64(1)
memory usage: 304.0+ bytes


In [5]:
doc2.head()

Unnamed: 0_level_0,4/01/2020
Country_Region,Unnamed: 1_level_1
Afghanistan,192
Albania,259
Algeria,847
Andorra,390
Angola,8


In [6]:
doc = pd.merge(doc1, doc2, how="outer", left_index=True, right_index=True)
doc.head()

Unnamed: 0_level_0,1/22/2020,4/01/2020
Country_Region,Unnamed: 1_level_1,Unnamed: 2_level_1
Afghanistan,,192
Albania,,259
Algeria,,847
Andorra,,390
Angola,,8


In [7]:
doc = doc.fillna(0)
doc

Unnamed: 0_level_0,1/22/2020,4/01/2020
Country_Region,Unnamed: 1_level_1,Unnamed: 2_level_1
Afghanistan,0.0,192
Albania,0.0,259
Algeria,0.0,847
Andorra,0.0,390
Angola,0.0,8
...,...,...
Vietnam,0.0,218
West Bank and Gaza,0.0,134
Winter Olympics 2022,0.0,0
Zambia,0.0,36


In [8]:
import os

PATH = "COVID-19-master/csse_covid_19_data/csse_covid_19_daily_reports/"
file_list, csv_list = os.listdir(PATH), list() # 구조분해할당

for file in file_list :
    if file.strip(".")[-1] == "csv" :
        csv_list.append(file)

print(csv_list)

['.gitignore',
 '01-01-2021.csv',
 '01-01-2022.csv',
 '01-01-2023.csv',
 '01-02-2021.csv',
 '01-02-2022.csv',
 '01-02-2023.csv',
 '01-03-2021.csv',
 '01-03-2022.csv',
 '01-03-2023.csv',
 '01-04-2021.csv',
 '01-04-2022.csv',
 '01-04-2023.csv',
 '01-05-2021.csv',
 '01-05-2022.csv',
 '01-05-2023.csv',
 '01-06-2021.csv',
 '01-06-2022.csv',
 '01-06-2023.csv',
 '01-07-2021.csv',
 '01-07-2022.csv',
 '01-07-2023.csv',
 '01-08-2021.csv',
 '01-08-2022.csv',
 '01-08-2023.csv',
 '01-09-2021.csv',
 '01-09-2022.csv',
 '01-09-2023.csv',
 '01-10-2021.csv',
 '01-10-2022.csv',
 '01-10-2023.csv',
 '01-11-2021.csv',
 '01-11-2022.csv',
 '01-11-2023.csv',
 '01-12-2021.csv',
 '01-12-2022.csv',
 '01-12-2023.csv',
 '01-13-2021.csv',
 '01-13-2022.csv',
 '01-13-2023.csv',
 '01-14-2021.csv',
 '01-14-2022.csv',
 '01-14-2023.csv',
 '01-15-2021.csv',
 '01-15-2022.csv',
 '01-15-2023.csv',
 '01-16-2021.csv',
 '01-16-2022.csv',
 '01-16-2023.csv',
 '01-17-2021.csv',
 '01-17-2022.csv',
 '01-17-2023.csv',
 '01-18-2021.csv