In [2]:
import pandas as pd

df = pd.read_csv('Examples/COVID-19-master/final_df.csv')
df.head()

Unnamed: 0,Country_Region,1/22/2020,1/23/2020,1/24/2020,1/25/2020,1/26/2020,1/27/2020,1/28/2020,1/29/2020,1/30/2020,...,6/08/2020,6/09/2020,6/10/2020,6/11/2020,6/12/2020,6/13/2020,6/14/2020,6/15/2020,6/16/2020,6/17/2020
0,Afghanistan,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,20917.0,21459.0,22142.0,22890.0,23546.0,24102.0,24766.0,25527.0,26310.0,26874.0
1,Albania,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1263.0,1299.0,1341.0,1385.0,1416.0,1464.0,1521.0,1590.0,1672.0,1722.0
2,Algeria,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,10265.0,10382.0,10484.0,10589.0,10698.0,10810.0,10919.0,11031.0,11147.0,11268.0
3,Andorra,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,852.0,852.0,852.0,852.0,853.0,853.0,853.0,853.0,854.0,854.0
4,Angola,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,92.0,96.0,113.0,118.0,130.0,138.0,140.0,142.0,148.0,155.0


In [3]:
df.shape

(187, 149)

## 1. Get country flags
* [https://www.countryflags.io](https://www.countryflags.io)
* iso2: mapping country's name into two characters.
    * _ex._ 'AF' for Afghanistan, 'AL' for Albania, etc.
* With that iso2 name, we can get country flags from above link
* And we need to add 'iso2' column to visualize graph

In [4]:
country_info = pd.read_csv('Examples/COVID-19-master/csse_covid_19_data/UID_ISO_FIPS_LookUp_Table.csv', 
                           encoding='utf-8-sig', 
                           keep_default_na=False, na_values=' ')  # don't read NA/NaN/.../NULL values as missing data
country_info.head()
# Missing data: '', '#N/A', 'N/A', '#NA', '-1.#IND', '-1.#QNAN', '-nan', '1.#IND', '1.#QNAN', 'N/A', 'NA', 'NULL', 'NaN', 'n/a', 'nan', 'null'

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,UID,iso2,iso3,code3,FIPS,Admin2,Province_State,Country_Region,Lat,Long_,Combined_Key
0,0,0,,BW,,,,,,Botswana,,,Botswana
1,1,1,,BI,,,,,,Burundi,,,Burundi
2,2,2,,SL,,,,,,Sierra Leone,,,Sierra Leone
3,3,3,4.0,AF,AFG,4.0,,,,Afghanistan,33.93911,67.709953,Afghanistan
4,4,4,8.0,AL,ALB,8.0,,,,Albania,41.1533,20.1683,Albania


In [5]:
country_info[country_info['Country_Region'] == 'Namibia']  # iso2 == NA --> but we can still read data

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,UID,iso2,iso3,code3,FIPS,Admin2,Province_State,Country_Region,Lat,Long_,Combined_Key
115,115,115,516.0,,NAM,516.0,,,,Namibia,-22.9576,18.4904,Namibia


In [6]:
country_info = country_info[['iso2', 'Country_Region']]
country_info.head()

Unnamed: 0,iso2,Country_Region
0,BW,Botswana
1,BI,Burundi
2,SL,Sierra Leone
3,AF,Afghanistan
4,AL,Albania


In [7]:
country_info.shape  # we have lots of duplicated data

(3560, 2)

In [8]:
country_info = country_info.drop_duplicates(subset='Country_Region', keep='last')
country_info.shape

(180, 2)

### Merge iso2 data with original data

In [9]:
doc_final_country = pd.merge(df, country_info, how='left', on='Country_Region')  # left merge based on 'Country_Region'
doc_final_country.head()  # we can see iso2 column at the end

Unnamed: 0,Country_Region,1/22/2020,1/23/2020,1/24/2020,1/25/2020,1/26/2020,1/27/2020,1/28/2020,1/29/2020,1/30/2020,...,6/09/2020,6/10/2020,6/11/2020,6/12/2020,6/13/2020,6/14/2020,6/15/2020,6/16/2020,6/17/2020,iso2
0,Afghanistan,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,21459.0,22142.0,22890.0,23546.0,24102.0,24766.0,25527.0,26310.0,26874.0,AF
1,Albania,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1299.0,1341.0,1385.0,1416.0,1464.0,1521.0,1590.0,1672.0,1722.0,AL
2,Algeria,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,10382.0,10484.0,10589.0,10698.0,10810.0,10919.0,11031.0,11147.0,11268.0,DZ
3,Andorra,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,852.0,852.0,852.0,853.0,853.0,853.0,853.0,854.0,854.0,AD
4,Angola,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,96.0,113.0,118.0,130.0,138.0,140.0,142.0,148.0,155.0,AO


In [10]:
doc_final_country.shape  # there are Nan data

(187, 150)

In [11]:
doc_final_country.isnull().sum()  # We can see some NaN iso2 data

Country_Region    0
1/22/2020         0
1/23/2020         0
1/24/2020         0
1/25/2020         0
                 ..
6/14/2020         0
6/15/2020         0
6/16/2020         0
6/17/2020         0
iso2              7
Length: 150, dtype: int64

In [12]:
doc_final_country[doc_final_country['iso2'].isnull()]  # TODO: there are some REAL countries but idk why

Unnamed: 0,Country_Region,1/22/2020,1/23/2020,1/24/2020,1/25/2020,1/26/2020,1/27/2020,1/28/2020,1/29/2020,1/30/2020,...,6/09/2020,6/10/2020,6/11/2020,6/12/2020,6/13/2020,6/14/2020,6/15/2020,6/16/2020,6/17/2020,iso2
38,Comoros,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,141.0,162.0,162.0,163.0,176.0,176.0,176.0,197.0,197.0,
44,Cruise Ship,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
99,Lesotho,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,
130,Others,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
147,Sao Tome and Principe,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,514.0,611.0,632.0,639.0,659.0,661.0,662.0,671.0,683.0,
166,Tajikistan,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,4690.0,4763.0,4834.0,4902.0,4971.0,5035.0,5097.0,5160.0,5221.0,
184,Yemen,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,524.0,560.0,591.0,632.0,705.0,728.0,844.0,885.0,902.0,


In [13]:
# delete useless columns
doc_final_country = doc_final_country.dropna(subset=['iso2'])
doc_final_country[doc_final_country['iso2'].isnull()]

Unnamed: 0,Country_Region,1/22/2020,1/23/2020,1/24/2020,1/25/2020,1/26/2020,1/27/2020,1/28/2020,1/29/2020,1/30/2020,...,6/09/2020,6/10/2020,6/11/2020,6/12/2020,6/13/2020,6/14/2020,6/15/2020,6/16/2020,6/17/2020,iso2


### Get country flag links
* need:
    * 'iso2' value
    * flag link

In [14]:
def create_flag_link(iso2):
    flag_link = 'https://www.countryflags.io/' + iso2 + '/shiny/64.png'
    return flag_link

doc_final_country['iso2'] = doc_final_country['iso2'].apply(create_flag_link)

In [15]:
doc_final_country.head()  # you can see iso2 data

Unnamed: 0,Country_Region,1/22/2020,1/23/2020,1/24/2020,1/25/2020,1/26/2020,1/27/2020,1/28/2020,1/29/2020,1/30/2020,...,6/09/2020,6/10/2020,6/11/2020,6/12/2020,6/13/2020,6/14/2020,6/15/2020,6/16/2020,6/17/2020,iso2
0,Afghanistan,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,21459.0,22142.0,22890.0,23546.0,24102.0,24766.0,25527.0,26310.0,26874.0,https://www.countryflags.io/AF/shiny/64.png
1,Albania,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1299.0,1341.0,1385.0,1416.0,1464.0,1521.0,1590.0,1672.0,1722.0,https://www.countryflags.io/AL/shiny/64.png
2,Algeria,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,10382.0,10484.0,10589.0,10698.0,10810.0,10919.0,11031.0,11147.0,11268.0,https://www.countryflags.io/DZ/shiny/64.png
3,Andorra,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,852.0,852.0,852.0,853.0,853.0,853.0,853.0,854.0,854.0,https://www.countryflags.io/AD/shiny/64.png
4,Angola,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,96.0,113.0,118.0,130.0,138.0,140.0,142.0,148.0,155.0,https://www.countryflags.io/AO/shiny/64.png


In [16]:
total_columns = doc_final_country.columns.tolist()
total_columns

['Country_Region',
 '1/22/2020',
 '1/23/2020',
 '1/24/2020',
 '1/25/2020',
 '1/26/2020',
 '1/27/2020',
 '1/28/2020',
 '1/29/2020',
 '1/30/2020',
 '1/31/2020',
 '2/01/2020',
 '2/02/2020',
 '2/03/2020',
 '2/04/2020',
 '2/05/2020',
 '2/06/2020',
 '2/07/2020',
 '2/08/2020',
 '2/09/2020',
 '2/10/2020',
 '2/11/2020',
 '2/12/2020',
 '2/13/2020',
 '2/14/2020',
 '2/15/2020',
 '2/16/2020',
 '2/17/2020',
 '2/18/2020',
 '2/19/2020',
 '2/20/2020',
 '2/21/2020',
 '2/22/2020',
 '2/23/2020',
 '2/24/2020',
 '2/25/2020',
 '2/26/2020',
 '2/27/2020',
 '2/28/2020',
 '2/29/2020',
 '3/01/2020',
 '3/02/2020',
 '3/03/2020',
 '3/04/2020',
 '3/05/2020',
 '3/06/2020',
 '3/07/2020',
 '3/08/2020',
 '3/09/2020',
 '3/10/2020',
 '3/11/2020',
 '3/12/2020',
 '3/13/2020',
 '3/14/2020',
 '3/15/2020',
 '3/16/2020',
 '3/17/2020',
 '3/18/2020',
 '3/19/2020',
 '3/20/2020',
 '3/21/2020',
 '3/22/2020',
 '3/23/2020',
 '3/24/2020',
 '3/25/2020',
 '3/26/2020',
 '3/27/2020',
 '3/28/2020',
 '3/29/2020',
 '3/30/2020',
 '3/31/2020',
 

In [17]:
total_columns.remove('iso2')
total_columns.insert(1, 'iso2')
doc_final_country = doc_final_country[total_columns]
doc_final_country.head()

Unnamed: 0,Country_Region,iso2,1/22/2020,1/23/2020,1/24/2020,1/25/2020,1/26/2020,1/27/2020,1/28/2020,1/29/2020,...,6/08/2020,6/09/2020,6/10/2020,6/11/2020,6/12/2020,6/13/2020,6/14/2020,6/15/2020,6/16/2020,6/17/2020
0,Afghanistan,https://www.countryflags.io/AF/shiny/64.png,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,20917.0,21459.0,22142.0,22890.0,23546.0,24102.0,24766.0,25527.0,26310.0,26874.0
1,Albania,https://www.countryflags.io/AL/shiny/64.png,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1263.0,1299.0,1341.0,1385.0,1416.0,1464.0,1521.0,1590.0,1672.0,1722.0
2,Algeria,https://www.countryflags.io/DZ/shiny/64.png,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,10265.0,10382.0,10484.0,10589.0,10698.0,10810.0,10919.0,11031.0,11147.0,11268.0
3,Andorra,https://www.countryflags.io/AD/shiny/64.png,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,852.0,852.0,852.0,852.0,853.0,853.0,853.0,853.0,854.0,854.0
4,Angola,https://www.countryflags.io/AO/shiny/64.png,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,92.0,96.0,113.0,118.0,130.0,138.0,140.0,142.0,148.0,155.0


In [18]:
total_columns[1] = 'Country_Flag'  # rename column's name
doc_final_country.columns = total_columns
doc_final_country.head()

Unnamed: 0,Country_Region,Country_Flag,1/22/2020,1/23/2020,1/24/2020,1/25/2020,1/26/2020,1/27/2020,1/28/2020,1/29/2020,...,6/08/2020,6/09/2020,6/10/2020,6/11/2020,6/12/2020,6/13/2020,6/14/2020,6/15/2020,6/16/2020,6/17/2020
0,Afghanistan,https://www.countryflags.io/AF/shiny/64.png,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,20917.0,21459.0,22142.0,22890.0,23546.0,24102.0,24766.0,25527.0,26310.0,26874.0
1,Albania,https://www.countryflags.io/AL/shiny/64.png,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1263.0,1299.0,1341.0,1385.0,1416.0,1464.0,1521.0,1590.0,1672.0,1722.0
2,Algeria,https://www.countryflags.io/DZ/shiny/64.png,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,10265.0,10382.0,10484.0,10589.0,10698.0,10810.0,10919.0,11031.0,11147.0,11268.0
3,Andorra,https://www.countryflags.io/AD/shiny/64.png,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,852.0,852.0,852.0,852.0,853.0,853.0,853.0,853.0,854.0,854.0
4,Angola,https://www.countryflags.io/AO/shiny/64.png,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,92.0,96.0,113.0,118.0,130.0,138.0,140.0,142.0,148.0,155.0


In [19]:
doc_final_country.to_csv('Examples/COVID-19-master/final_covid_data_for_graph.csv')