## 코로나 국가별 확진자 수 추이 분석/시각화
- https://public.flourish.studio/visualisation/17998107/

In [16]:
import pandas as pd
import json, os 

with open('COVID-19-master/csse_covid_19_data/country_convert.json', 'r', encoding='utf-8-sig') as json_file:
    json_data = json.load(json_file)

def country_name_convert(row):
    if row['Country_Region'] in json_data:
        return json_data[row['Country_Region']]
    return row['Country_Region']
    
def create_dateframe(filename):

    doc = pd.read_csv(PATH + filename, encoding='utf-8-sig')  # 1. csv 파일 읽기
    try:
        doc = doc[['Country_Region', 'Confirmed']]  # 2. 특정 컬럼만 선택해서 데이터프레임 만들기
    except:
        doc = doc[['Country/Region', 'Confirmed']]  # 2. 특정 컬럼만 선택해서 데이터프레임 만들기
        doc.columns = ['Country_Region', 'Confirmed']
    doc = doc.dropna(subset=['Confirmed'])     # 3. 특정 컬럼에 없는 데이터 삭제하기
    doc['Country_Region'] = doc.apply(country_name_convert, axis=1)   # 4. 'Country_Region'의 국가명을 여러 파일에 일관되게 변경하기
    doc = doc.astype({'Confirmed': 'int64'})   # 5. 특정 컬럼의 데이터 타입 변경하기
    doc = doc.groupby('Country_Region').sum()  # 6. 특정 컬럼으로 중복된 데이터를 합치기

    # 7. 파일명을 기반으로 날짜 문자열 변환하고, 'Confirmed' 컬럼명 변경하기
    date_column = filename.split(".")[0].lstrip('0').replace('-', '/') 
    doc.columns = [date_column]
    return doc

In [17]:
import os
from datetime import datetime

def generate_dateframe_by_path(PATH):

    file_list, csv_list = os.listdir(PATH), list()
    first_doc = True
    for file in file_list:
        if file.split(".")[-1] == 'csv':
            csv_list.append(file)
    # 날짜 형식에 따라 정확히 정렬하도록 정렬 코드 개선
    csv_list.sort(key=lambda x: datetime.strptime(x, '%m-%d-%Y.csv'))
    
    for file in csv_list:
        doc = create_dateframe(file)
        if first_doc:
            final_doc, first_doc = doc, False
        else:
            final_doc = pd.merge(final_doc, doc, how='outer', left_index=True, right_index=True)

    final_doc = final_doc.fillna(0)
    return final_doc

In [18]:
def create_flag_link(column):
    # 현재 국기 이미지를 가져올 수 있는 코드  
        flag_link = 'https://public.flourish.studio/country-flags/svg/' + column.lower() + '.svg'
        return flag_link


PATH = 'COVID-19-master/csse_covid_19_data/csse_covid_19_daily_reports/'
df_confirmed = generate_dateframe_by_path(PATH)
df_confirmed = df_confirmed.astype('int64')

country_info = pd.read_csv("COVID-19-master/csse_covid_19_data/UID_ISO_FIPS_LookUp_Table.csv", encoding='utf-8-sig', keep_default_na=False, na_values='')
country_info = country_info[['iso2', 'Country_Region']]
country_info = country_info.drop_duplicates(subset='Country_Region', keep='last')

doc_final_country = pd.merge(df_confirmed, country_info, how='left', on='Country_Region')
doc_final_country = doc_final_country.dropna(subset=['iso2'])
doc_final_country['iso2'] = doc_final_country['iso2'].apply(create_flag_link)

cols = doc_final_country.columns.tolist()
cols.remove('iso2')
cols.insert(1, 'iso2')
doc_final_country = doc_final_country[cols]
cols[1] = 'Country_Flag'
doc_final_country.columns = cols

doc_final_country.to_csv("COVID-19-master/final_covid_data_for_graph.csv")

In [21]:
doc_final_country.head()

Unnamed: 0,Country_Region,Country_Flag,1/22/2020,1/23/2020,1/24/2020,1/25/2020,1/26/2020,1/27/2020,1/28/2020,1/29/2020,...,2/28/2023,3/01/2023,3/02/2023,3/03/2023,3/04/2023,3/05/2023,3/06/2023,3/07/2023,3/08/2023,3/09/2023
0,Afghanistan,https://public.flourish.studio/country-flags/s...,0,0,0,0,0,0,0,0,...,209322,209340,209358,209362,209369,209390,209406,209436,209451,209451
1,Albania,https://public.flourish.studio/country-flags/s...,0,0,0,0,0,0,0,0,...,334391,334408,334408,334427,334427,334427,334427,334427,334443,334457
2,Algeria,https://public.flourish.studio/country-flags/s...,0,0,0,0,0,0,0,0,...,271441,271448,271463,271469,271469,271477,271477,271490,271494,271496
3,Andorra,https://public.flourish.studio/country-flags/s...,0,0,0,0,0,0,0,0,...,47866,47875,47875,47875,47875,47875,47875,47875,47890,47890
4,Angola,https://public.flourish.studio/country-flags/s...,0,0,0,0,0,0,0,0,...,105255,105277,105277,105277,105277,105277,105277,105277,105288,105288
