In [1]:
%run covid19_world_function.ipynb

In [2]:
import matplotlib.pyplot as plt  
import csv 
import operator 
import datetime as dt  
import pandas as pd
import numpy as np
from pandas import DataFrame as df
%matplotlib inline

import math
import os
import json

plt.rcParams['font.family'] = 'NanumGothic'

In [3]:
# https://geopandas.org/
# GDAL-3.0.4-cp37-cp37m-win_amd64.whl
# Fiona-1.8.13-cp37-cp37m-win_amd64.whl
# Shapely-1.7.0-cp37-cp37m-win_amd64.whl
# geopandas 모듈을 설치하기 위한 whl 파일 세개.
# python -m pip install 'whl 파일명' 을 이용해 설치

import geopandas as gpd
from geopandas.tools import geocode

In [4]:
# 시각화 자료를 위한 또다른 모듈 folium
# pip install folium
import folium
from folium import Choropleth, Circle, Marker
from folium.plugins import HeatMap, MarkerCluster, TimestampedGeoJson

In [5]:
# 날짜에 따른 각 나라별(옵션 : 도시) 신규 발생자 현황 csv 파일 로드
# 출처 : https://github.com/CSSEGISandData/COVID-19
# 참고
# https://www.arcgis.com/apps/opsdashboard/index.html#/bda7594740fd40299423467b48e9ecf6
# WHO (https://www.who.int/)
# China CDC (http://weekly.chinacdc.cn/news/TrackingtheEpidemic.htm)
# Tiwan CDC (https://sites.google.com/cdc.gov.tw/2019ncov/taiwan?authuser=0)
# US CDC (https://www.cdc.gov/coronavirus/2019-ncov/index.html)
# Covid Live(Australia) ( https://www.covidlive.com.au/)
# Covid Tracking Project (https://covidtracking.com/data.)

covid = pd.read_csv('time_series_covid19_confirmed_global.csv', engine='python')
covid.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,4/19/20,4/20/20,4/21/20,4/22/20,4/23/20,4/24/20,4/25/20,4/26/20,4/27/20,4/28/20
0,,Afghanistan,33.0,65.0,0,0,0,0,0,0,...,996,1026,1092,1176,1279,1351,1463,1531,1703,1828
1,,Albania,41.1533,20.1683,0,0,0,0,0,0,...,562,584,609,634,663,678,712,726,736,750
2,,Algeria,28.0339,1.6596,0,0,0,0,0,0,...,2629,2718,2811,2910,3007,3127,3256,3382,3517,3649
3,,Andorra,42.5063,1.5218,0,0,0,0,0,0,...,713,717,717,723,723,731,738,738,743,743
4,,Angola,-11.2027,17.8739,0,0,0,0,0,0,...,24,24,24,25,25,25,25,26,27,27


# 각각의 데이터프레임 생성

## 1. 나라별 전체 신규확진자 dataframe

In [6]:
covid[covid.columns[4:-1]]

Unnamed: 0,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,1/29/20,1/30/20,1/31/20,...,4/18/20,4/19/20,4/20/20,4/21/20,4/22/20,4/23/20,4/24/20,4/25/20,4/26/20,4/27/20
0,0,0,0,0,0,0,0,0,0,0,...,933,996,1026,1092,1176,1279,1351,1463,1531,1703
1,0,0,0,0,0,0,0,0,0,0,...,548,562,584,609,634,663,678,712,726,736
2,0,0,0,0,0,0,0,0,0,0,...,2534,2629,2718,2811,2910,3007,3127,3256,3382,3517
3,0,0,0,0,0,0,0,0,0,0,...,704,713,717,717,723,723,731,738,738,743
4,0,0,0,0,0,0,0,0,0,0,...,24,24,24,24,25,25,25,25,26,27
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
259,0,0,0,0,0,0,0,0,0,0,...,1,1,1,1,1,1,1,1,1,1
260,0,0,0,0,0,0,0,0,0,0,...,4,4,4,4,4,5,5,5,6,6
261,0,0,0,0,0,0,0,0,0,0,...,6,6,6,6,6,6,6,6,6,6
262,0,0,0,0,0,0,0,0,0,0,...,4,4,4,4,4,4,4,4,4,4


In [7]:
tot_covid = covid.drop(covid[covid.columns[4:-1]], axis=1)
tot_covid

Unnamed: 0,Province/State,Country/Region,Lat,Long,4/28/20
0,,Afghanistan,33.000000,65.000000,1828
1,,Albania,41.153300,20.168300,750
2,,Algeria,28.033900,1.659600,3649
3,,Andorra,42.506300,1.521800,743
4,,Angola,-11.202700,17.873900,27
...,...,...,...,...,...
259,Saint Pierre and Miquelon,France,46.885200,-56.315900,1
260,,South Sudan,6.877000,31.307000,34
261,,Western Sahara,24.215500,-12.885800,6
262,,Sao Tome and Principe,0.186360,6.613081,8


In [8]:
tot_covid = tot_covid.rename(columns = {'4/28/20':'total'})
tot_covid

Unnamed: 0,Province/State,Country/Region,Lat,Long,total
0,,Afghanistan,33.000000,65.000000,1828
1,,Albania,41.153300,20.168300,750
2,,Algeria,28.033900,1.659600,3649
3,,Andorra,42.506300,1.521800,743
4,,Angola,-11.202700,17.873900,27
...,...,...,...,...,...
259,Saint Pierre and Miquelon,France,46.885200,-56.315900,1
260,,South Sudan,6.877000,31.307000,34
261,,Western Sahara,24.215500,-12.885800,6
262,,Sao Tome and Principe,0.186360,6.613081,8


In [9]:
m = folium.Map(location=[0, 0], tiles='cartodbpositron',
               min_zoom=1, max_zoom=10, zoom_start=1)

In [12]:
for i in range(len(tot_covid)):
    folium.CircleMarker([tot_covid['Lat'][i], tot_covid['Long'][i]], 
                       radius = (tot_covid['total'][i] / tot_covid['total'].sum())*200 ,
                       color = 'crimson',
                       fill_color = 'crimson', 
                       popup = tot_covid['Country/Region'][i] + ' ' +
                       str(tot_covid['total'][i])).add_to(m)

In [13]:
m

In [97]:
# 간혹, 국가가 같지만 도시가 달라 다른 index 로 형성되어 있기에
# 그 표시가 각기 도식화 됨으로 인해
# 어떠한 국가는 총량이 굉장히 작아 보이게 나온다. 
# 따라서 국가가 같다면 그 도시의 총합을 새로 지정해주어
# 그 좌표를 하나만 지정해 줄 필요가 있다. 

tot_covid

Unnamed: 0,Province/State,Country/Region,Lat,Long,total
0,,Afghanistan,33.000000,65.000000,1828
1,,Albania,41.153300,20.168300,750
2,,Algeria,28.033900,1.659600,3649
3,,Andorra,42.506300,1.521800,743
4,,Angola,-11.202700,17.873900,27
...,...,...,...,...,...
259,Saint Pierre and Miquelon,France,46.885200,-56.315900,1
260,,South Sudan,6.877000,31.307000,34
261,,Western Sahara,24.215500,-12.885800,6
262,,Sao Tome and Principe,0.186360,6.613081,8


In [98]:
grouped = tot_covid['total'].groupby(tot_covid['Country/Region'])
grouped.sum()

Country/Region
Afghanistan           1828
Albania                750
Algeria               3649
Andorra                743
Angola                  27
                      ... 
West Bank and Gaza     343
Western Sahara           6
Yemen                    1
Zambia                  95
Zimbabwe                32
Name: total, Length: 185, dtype: int64

In [99]:
countries = pd.DataFrame(grouped.sum()).reset_index()
countries

Unnamed: 0,Country/Region,total
0,Afghanistan,1828
1,Albania,750
2,Algeria,3649
3,Andorra,743
4,Angola,27
...,...,...
180,West Bank and Gaza,343
181,Western Sahara,6
182,Yemen,1
183,Zambia,95


In [100]:
# 각국의 좌표는 각 도시들의 평균치로 구한다. 
# 가능하면 국가별 핵심위치에 위치시키기 위하여

lat = tot_covid['Lat'].groupby(tot_covid['Country/Region']).mean()
long = tot_covid['Long'].groupby(tot_covid['Country/Region']).mean()

latlong_countries = pd.merge(lat, long, how='outer', on='Country/Region')

In [101]:
countries = pd.merge(countries, latlong_countries, how='outer', on='Country/Region')
countries

Unnamed: 0,Country/Region,total,Lat,Long
0,Afghanistan,1828,33.000000,65.000000
1,Albania,750,41.153300,20.168300
2,Algeria,3649,28.033900,1.659600
3,Andorra,743,42.506300,1.521800
4,Angola,27,-11.202700,17.873900
...,...,...,...,...
180,West Bank and Gaza,343,31.952200,35.233200
181,Western Sahara,6,24.215500,-12.885800
182,Yemen,1,15.552727,48.516388
183,Zambia,95,-15.416700,28.283300


In [102]:
m = folium.Map(location=[0, 0], tiles='cartodbpositron',
               min_zoom=1, max_zoom=10, zoom_start=1)

In [103]:
for i in range(len(countries)):
    folium.CircleMarker([countries['Lat'][i], countries['Long'][i]], 
                       radius = countries['total'][i]*0.0001,
                       color = 'crimson',
                       fill_color = 'crimson', 
                       popup = countries['Country/Region'][i] + ' ' +
                       str(countries['total'][i])).add_to(m)

In [104]:
m

In [105]:
# 좌표를 평균치로 설정할경우,
# 몇몇 국가들에 한하여(대표적 영국)
# 잘못된 좌표로 출력되는것을 확인. 
# 따라서 좌표는 각 도시들 중 하나의 좌표로 설정하여
# 그 도시가 그 나라를 대표하도록 한다. 

grouped = tot_covid['total'].groupby(tot_covid['Country/Region'])
countries = pd.DataFrame(grouped.sum()).reset_index()
countries

Unnamed: 0,Country/Region,total
0,Afghanistan,1828
1,Albania,750
2,Algeria,3649
3,Andorra,743
4,Angola,27
...,...,...
180,West Bank and Gaza,343
181,Western Sahara,6
182,Yemen,1
183,Zambia,95


In [106]:
for i in range(len(countries)):
    print(countries['Country/Region'].iloc[i])

Afghanistan
Albania
Algeria
Andorra
Angola
Antigua and Barbuda
Argentina
Armenia
Australia
Austria
Azerbaijan
Bahamas
Bahrain
Bangladesh
Barbados
Belarus
Belgium
Belize
Benin
Bhutan
Bolivia
Bosnia and Herzegovina
Botswana
Brazil
Brunei
Bulgaria
Burkina Faso
Burma
Burundi
Cabo Verde
Cambodia
Cameroon
Canada
Central African Republic
Chad
Chile
China
Colombia
Congo (Brazzaville)
Congo (Kinshasa)
Costa Rica
Cote d'Ivoire
Croatia
Cuba
Cyprus
Czechia
Denmark
Diamond Princess
Djibouti
Dominica
Dominican Republic
Ecuador
Egypt
El Salvador
Equatorial Guinea
Eritrea
Estonia
Eswatini
Ethiopia
Fiji
Finland
France
Gabon
Gambia
Georgia
Germany
Ghana
Greece
Grenada
Guatemala
Guinea
Guinea-Bissau
Guyana
Haiti
Holy See
Honduras
Hungary
Iceland
India
Indonesia
Iran
Iraq
Ireland
Israel
Italy
Jamaica
Japan
Jordan
Kazakhstan
Kenya
Korea, South
Kosovo
Kuwait
Kyrgyzstan
Laos
Latvia
Lebanon
Liberia
Libya
Liechtenstein
Lithuania
Luxembourg
MS Zaandam
Madagascar
Malawi
Malaysia
Maldives
Mali
Malta
Mauritania
Ma

In [107]:
lat = tot_covid[['Lat', 'Long']].groupby(tot_covid['Country/Region'])

In [108]:
pd.DataFrame(lat)

Unnamed: 0,0,1
0,Afghanistan,Lat Long 0 33.0 65.0
1,Albania,Lat Long 1 41.1533 20.1683
2,Algeria,Lat Long 2 28.0339 1.6596
3,Andorra,Lat Long 3 42.5063 1.5218
4,Angola,Lat Long 4 -11.2027 17.8739
...,...,...
180,West Bank and Gaza,Lat Long 241 31.9522 35.2332
181,Western Sahara,Lat Long 261 24.2155 -12.8858
182,Yemen,Lat Long 263 15.552727 48.5...
183,Zambia,Lat Long 229 -15.4167 28.2833


In [83]:
countries = pd.merge(countries, latlong_countries, how='outer', on='Country/Region')
countries

Unnamed: 0,Country/Region,total,Lat,Long
0,Afghanistan,1828,33.000000,65.000000
1,Albania,750,41.153300,20.168300
2,Algeria,3649,28.033900,1.659600
3,Andorra,743,42.506300,1.521800
4,Angola,27,-11.202700,17.873900
...,...,...,...,...
180,West Bank and Gaza,343,31.952200,35.233200
181,Western Sahara,6,24.215500,-12.885800
182,Yemen,1,15.552727,48.516388
183,Zambia,95,-15.416700,28.283300


AttributeError: Cannot access callable attribute 'reset_index' of 'SeriesGroupBy' objects, try using the 'apply' method

In [19]:
tot_covid['Country/Region'][1]

'Albania'

In [138]:
tot_countries = countries.copy()

In [189]:
tot_countries[1]

'Albania'

In [221]:
tot_covid[tot_covid['Country/Region'] == 'China']['total'].sum()

83940

In [220]:
new['total'].sum()

6519038

In [192]:
tot_covid[tot_covid['Country/Region'] == 'China']['total'].sum()

6519038