The World Band > [Life expectancy at birth, total](https://data.worldbank.org/indicator/SP.DYN.LE00.IN)<br />
The World Band > [Life expectancy at birth, male](https://data.worldbank.org/indicator/SP.DYN.LE00.MA.IN)<br />
The World Band > [Life expectancy at birth, female](https://data.worldbank.org/indicator/SP.DYN.LE00.FE.IN)<br />
[World Development Indicators > Life expectancy at birth](https://databank.worldbank.org/reports.aspx?source=2&series=SP.DYN.LE00.IN&country=) *(databank.worldbank.org)*<br />
[OECD stat](https://stats.oecd.org/index.aspx?queryid=30114)

"[Список стран по ожидаемой продолжительности жизни](https://ru.wikipedia.org/wiki/Список_стран_по_ожидаемой_продолжительности_жизни)"<br />
"[Продолжительность жизни в субъектах Российской Федерации](https://ru.wikipedia.org/wiki/Продолжительность_жизни_в_субъектах_Российской_Федерации)"<br />
[Таблица подбора цветов](http://mal-bioit.ru/survey-web-colors)<br />
[Список государств и зависимых территорий по населению](https://ru.wikipedia.org/wiki/Список_государств_и_зависимых_территорий_по_населению)

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import math
import re

In [2]:
pd.options.display.max_rows = 300

DESTINATION_OUTPUT = ''  # to where table code should be placed: 'file', 'here' or just print 'Done'

In [3]:
def output_table_code(st, file_name='', destination=''):
    if not destination:
        destination = DESTINATION_OUTPUT
        
    if destination == 'file':
        with open('output/'+file_name, 'w', encoding="utf-8") as fh:
            fh.write(st)
        print('Data has written to file')
    elif destination == 'here':
        print(st)
    else:
        print('done')

In [4]:
# create code for placing info in Wikipedia
def create_table(df, lang='en'):

    def if_value(x, prec=2):
        return '—' if math.isnan(x) else \
               f"{x:0.{prec}f}"  if x>=0 else \
               f"−{-x:0.{prec}f}"
    
    if lang=='ru':
        file_header='wbg_stats_header_2020_ru.txt'
        ptn_1 = 'флагификация'
        ptn_2 = 'флаг'
        prettify_name = {
            'European Union': '[[Европейский союз]]',
            'Central Europe and the Baltics': '[[Центральная Европа]] и [[Прибалтика]]',
            'Europe & Central Asia': '[[Европа]] и [[Центральная Азия]]',
            'South Asia': '[[Южная Азия]]',
            'East Asia & Pacific': '[[Восточная Азия]] и [[Австралия и Океания|Океания]]',
            'Middle East & North Africa': '[[Страны MENA|Ближний Восток и Северная Африка]]',
            'Africa Eastern and Southern': '[[Восточная Африка|Восточная]] и [[Южная Африка|Южная]] Африка',
            'Sub-Saharan Africa': '[[Чёрная Африка|Африка к югу от Сахары]]',
            'Africa Western and Central': '[[Западная Африка|Западная]] и [[Центральная Африка|Центральная]] Африка',
            'North America': '[[Северная Америка]]',
            'Latin America & Caribbean': '[[Латинская Америка]] и [[Антильские острова|Карибы]]',
            'Arab World': 'Арабский мир',
            'World': 'Мир',
            'Caribbean small states': '[[Список стран Карибского бассейна|Малые страны Карибского бассейна]]',
            'Pacific island small states': 'Малые страны Тихого океана',
            'Small states': 'Малые страны',
            'Other small states': 'Другие малые страны'
        }
    else:
        file_header='wbg_stats_header_2020_en.txt'
        ptn_1 = 'flaglist'
        ptn_2 = 'flagicon'
        prettify_name = {
            'European Union': '[[European Union]]',
            'Central Europe and the Baltics': '[[Central Europe]] & the [[Baltic states|Baltics]]',
            'Europe & Central Asia': '[[Europe]] & [[Central Asia]]',
            'South Asia': '[[South Asia]]',
            'East Asia & Pacific': '[[Asia-Pacific|East Asia & Pacific]]',
            'Middle East & North Africa': '[[MENA|Middle East & North Africa]]',
            'Africa Eastern and Southern': 'Africa [[East Africa|Eastern]] & [[Southern Africa|Southern]]',
            'Sub-Saharan Africa': '[[Sub-Saharan Africa]]',
            'Africa Western and Central': 'Africa [[West Africa|Western]] & [[Central Africa|Central]]',
            'North America': '[[North America]]',
            'Latin America & Caribbean': '[[Latin America and the Caribbean|Latin America & Caribbean]]'
        }
        
    with open('design/' + file_header, mode='r', encoding="utf-8") as fh:
        st = fh.read()
        
    st = st.strip()
        
    for i in range(len(df)):
        ser = df.iloc[i]
        if ser.name in ['World',
                        'European Union', 'Central Europe and the Baltics',
                        'Europe & Central Asia',
                        'South Asia',
                        'East Asia & Pacific',
                        'Middle East & North Africa',
                        'Africa Eastern and Southern', 'Sub-Saharan Africa', 'Africa Western and Central',
                        'North America', 'Latin America & Caribbean']:
            st += '\n' + '|-class=static-row-header\n' + \
                  f'|align=center| \'\'\'{prettify_name.get(ser.name, ser.name)}\'\'\' ' + \
                  f'||style="text-align:center; background:#e0ffd8;"| \'\'\'{if_value(ser.total, 2)}\'\'\' ' + \
                  f'||style="text-align:center; background:#eaf3ff;"| \'\'\'{if_value(ser.male, 2)}\'\'\' ' + \
                  f'||style="text-align:center; background:#fee7f6;"| \'\'\'{if_value(ser.female, 2)}\'\'\' ' + \
                  f'||style="text-align:center;"| \'\'\'{if_value(ser.gender_gap, 2)}\'\'\' ' + \
                  f'||style="text-align:center; background:#e0ffd8; border-left-width:2px;"| \'\'\'{if_value(ser.Δ_2019_total, 2)}\'\'\' ' + \
                  f'||style="text-align:center; background:#eaf3ff;"| \'\'\'{if_value(ser.Δ_2019_male, 2)}\'\'\' ' + \
                  f'||style="text-align:center; background:#fee7f6;"| \'\'\'{if_value(ser.Δ_2019_female, 2)}\'\'\' ' + \
                  f'||style="text-align:center;"| \'\'\'{if_value(ser.Δ_2019_gender_gap, 2)}\'\'\''
        elif ser.name in ['Caribbean small states',
                          'Pacific island small states',
                          'Small states',
                          'Other small states']:
            st += '\n' + '|-\n' + \
                  f'|style="text-align:left; padding-left: 35px;"|{prettify_name.get(ser.name, ser.name)} ' + \
                  f'||style="text-align:center; background:#e0ffd8;"| {if_value(ser.total, 2)} ' + \
                  f'||style="text-align:center; background:#eaf3ff;"| {if_value(ser.male, 2)} ' + \
                  f'||style="text-align:center; background:#fee7f6;"| {if_value(ser.female, 2)} ' + \
                  f'||style="text-align:center;"| {if_value(ser.gender_gap, 2)} ' + \
                  f'||style="text-align:center; background:#e0ffd8; border-left-width:2px;"| {if_value(ser.Δ_2019_total, 2)} ' + \
                  f'||style="text-align:center; background:#eaf3ff;"| {if_value(ser.Δ_2019_male, 2)} ' + \
                  f'||style="text-align:center; background:#fee7f6;"| {if_value(ser.Δ_2019_female, 2)} ' + \
                  f'||style="text-align:center;"| {if_value(ser.Δ_2019_gender_gap, 2)}'
        else:
            st += '\n' + '|-\n' + \
                  f'|align=left|{{{{{ptn_1}|{ser.name}}}}} ' + \
                  f'||style="text-align:center; background:#e0ffd8;"| {if_value(ser.total, 2)} ' + \
                  f'||style="text-align:center; background:#eaf3ff;"| {if_value(ser.male, 2)} ' + \
                  f'||style="text-align:center; background:#fee7f6;"| {if_value(ser.female, 2)} ' + \
                  f'||style="text-align:center;"| {if_value(ser.gender_gap, 2)} ' + \
                  f'||style="text-align:center; background:#e0ffd8; border-left-width:2px;"| {if_value(ser.Δ_2019_total, 2)} ' + \
                  f'||style="text-align:center; background:#eaf3ff;"| {if_value(ser.Δ_2019_male, 2)} ' + \
                  f'||style="text-align:center; background:#fee7f6;"| {if_value(ser.Δ_2019_female, 2)} ' + \
                  f'||style="text-align:center;"| {if_value(ser.Δ_2019_gender_gap, 2)}'
    st += '\n|}'
    
    if lang == 'ru':
        st = re.sub('(?<=\d)\.(?=\d)', ',', st)  # replace . to comma, if this . is between two digits
    
    return st

In [5]:
def extract_data_for_year(df, year='2019'):
    ls_dropping = [
        'Hong Kong SAR, China',     # administrative region of China, population 7,291,000
        'Macao SAR, China',         # administrative region of China, population 682,000
        'Kosovo',                   # partially recognised state, population 1,806,000
        # 'West Bank and Gaza',        # Palestine, partially recognised state, population 5,227,000
        # 'New Caledonia',             # French Overseas Territory in Oceania, population 271,000
        # 'Channel Islands',           # archipelago in Europe, consisting of two Crown Dependencies, population 172,000
        # 'Curacao',                   #  constituent country of the Netherlands in S.America, population 149,000, little info about LE
        # 'Virgin Islands (U.S.)',     # unincorporated and organized territory of USA, population 87,000
        # 'Isle of Man',               # self-governing Crown Dependency, Europe, population 84,000
        # 'Dominica',                  # micro-state in N.America, population 72,400
        # 'Marshall Islands',          # micro-state in Oceania, population 62,000
        # 'Faroe Islands',             # autonomous territory of Denmark in Europe, population 54,000
        # 'St. Kitts and Nevis',       # micro-state in N.America, population 47,600
        # 'Turks and Caicos Islands',  # British Overseas Territory in N.America, population 44,000
        # 'Sint Maarten (Dutch part)', # constituent country of the Netherlands in N.America, population 41,500
        # 'Gibraltar',                 # British Overseas Territory in Europe, population 34,000
        # 'St. Martin (French part)',  # French Overseas Territory in N.America, population 32,500
        # 'British Virgin Islands',    # British Overseas Territory in N.America, population 30,000
        # 'Palau',           #  mini-state in Oceania, population 22,000, little info about LE
        # 'Tuvalu',                    # island micro-state in Oceania, population 12,000
        # 'Nauru'                      # micro-state in Oceania, population 11,000
        'Euro area',
        'IBRD only',
        'IDA & IBRD total',
        'IDA total',
        'IDA blend',
        'IDA only',
        'OECD members',
        'Small states',
        'Other small states',
        'Europe & Central Asia (excluding high income)',
        'East Asia & Pacific (excluding high income)',
        'Latin America & Caribbean (excluding high income)',
        'Middle East & North Africa (excluding high income)',
        'Sub-Saharan Africa (excluding high income)',
        'East Asia & Pacific (IDA & IBRD countries)',
        'Europe & Central Asia (IDA & IBRD countries)',
        'Middle East & North Africa (IDA & IBRD countries)',
        'Sub-Saharan Africa (IDA & IBRD countries)',           
        'Latin America & the Caribbean (IDA & IBRD countries)',
        'South Asia (IDA & IBRD)',
        'Heavily indebted poor countries (HIPC)',
        'Fragile and conflict affected situations',
        'Least developed countries: UN classification',
        'Early-demographic dividend',
        'Late-demographic dividend',
        'Pre-demographic dividend',
        'Post-demographic dividend',
        'Low income',
        'Lower middle income',
        'Low & middle income',
        'Middle income',
        'Upper middle income',
        'High income'
    ]
    
    ser = df.drop(ls_dropping) \
            [str(year)] \
            .dropna() \
            .rename(index = {'United States':'USA',
                             'Russian Federation': 'Russia',
                             'Korea, Rep.': 'South Korea',
                             'Egypt, Arab Rep.': 'Egypt',
                             'Congo, Dem. Rep.': 'DR Congo',   #'Congo, Democratic Republic of',
                             'Congo, Rep.': 'Congo, Rep.', #'Republic of the Congo', #'Congo Republic',  #'Congo, Republic of',
                             'Yemen, Rep.': 'Yemen',
                             'Micronesia, Fed. Sts.': 'Micronesia',
                             'Gambia, The': 'Gambia',
                             'Iran, Islamic Rep.': 'Iran',
                             "Korea, Dem. People's Rep.": 'North Korea',
                             'Syrian Arab Republic': 'Syria',
                             'Venezuela, RB': 'Venezuela',
                             'Kyrgyz Republic': 'Kyrgyzstan',
                             'Lao PDR': 'Laos',
                             'Czech Republic': 'Czechia',
                             'Slovak Republic': 'Slovakia',
                             'Bahamas, The': 'Bahamas',
                             'Brunei Darussalam': 'Brunei',
                             'Turkiye': 'Turkey',
                             'West Bank and Gaza': 'Palestine',
                             'Sint Maarten (Dutch part)': 'Sint Maarten',
                             'St. Martin (French part)': 'Collectivity of Saint Martin'
                            })
    
    return ser

In [6]:
df = pd.read_csv('data/World_Bank_Group-total.csv', skiprows=4, index_col=0)

ser_2020_total = extract_data_for_year(df, 2020)
ser_2020_total.name = '2020_total'

ser_2019_total = extract_data_for_year(df, 2019)
ser_2019_total.name = '2019_total'

del df

print(f"2020-total: {len(ser_2020_total)} records")
print(f"2019-total: {len(ser_2019_total)} records")

2020-total: 221 records
2019-total: 221 records


In [7]:
df = pd.read_csv('data/World_Bank_Group-male.csv', skiprows=4, index_col=0)

ser_2020_male = extract_data_for_year(df, 2020)
ser_2020_male.name = '2020_male'

ser_2019_male = extract_data_for_year(df, 2019)
ser_2019_male.name = '2019_male'

del df

print(f"2020-male: {len(ser_2020_male)} records")
print(f"2019-male: {len(ser_2019_male)} records")

2020-male: 221 records
2019-male: 221 records


In [8]:
df = pd.read_csv('data/World_Bank_Group-female.csv', skiprows=4, index_col=0)

ser_2020_female = extract_data_for_year(df, 2020)
ser_2020_female.name = '2020_female'

ser_2019_female = extract_data_for_year(df, 2019)
ser_2019_female.name = '2019_female'

del df

print(f"2020-female: {len(ser_2020_female)} records")
print(f"2019-female: {len(ser_2019_female)} records")

2020-female: 221 records
2019-female: 221 records


In [9]:
# concat series in DataFrame and rename columns in it
df = pd.concat([ser_2020_total, ser_2020_male, ser_2020_female, ser_2019_total, ser_2019_male, ser_2019_female],
               axis='columns', join='outer')

df.rename(columns={'2020_total': 'total',
                   '2020_male': 'male',
                   '2020_female': 'female'}, inplace=True)

df.index.name = ''

print(df.shape)
df.loc[['World', 'Russia']]

(221, 6)


Unnamed: 0,total,male,female,2019_total,2019_male,2019_female
,,,,,,
World,72.24385,69.799734,74.85733,72.979716,70.623612,75.470008
Russia,71.33878,66.49,76.43,73.083902,68.24,78.17


In [10]:
# # calculate difference between some columns and insert result as new columns in dataframe
# df.insert(loc=3, column='gender_gap', value=df['female']-df['male'])


# df.insert(loc=7, column='2000→2014', value=df['2014_t']-df['2000_t'])
# df.insert(loc=8, column='2014→2019', value=df['2019_t']-df['2014_t'])
# df.insert(loc=9, column='2019→2020', value=df['2020_t']-df['2019_t'])
# df.insert(loc=11, column='2020→2021', value=df['total']-df['2020_t'])

# # sort DataFrame by total life expectancy
# df.sort_values(by=['total', 'male', 'female'], ascending=False, inplace=True)

In [11]:
# calculate difference between some columns and insert result as new columns in dataframe
df.insert(loc=3, column='gender_gap', value=df['female']-df['male'])
df['Δ_2019_total'] = df['total'] - df['2019_total']
df['Δ_2019_male'] = df['male'] - df['2019_male']
df['Δ_2019_female'] = df['female'] - df['2019_female']
df['Δ_2019_gender_gap'] = (df['female'] - df['male']) - (df['2019_female'] - df['2019_male'])

# drop redundant columns
df.drop(columns=['2019_total', '2019_male', '2019_female'], inplace=True)

# sort DataFrame by total life expectancy
df.sort_values(by=['total', 'male', 'female'], ascending=False, inplace=True)

print(df.shape)
df.loc[['World', 'Russia']]

(221, 8)


Unnamed: 0,total,male,female,gender_gap,Δ_2019_total,Δ_2019_male,Δ_2019_female,Δ_2019_gender_gap
,,,,,,,,
World,72.24385,69.799734,74.85733,5.057596,-0.735866,-0.823878,-0.612678,0.2112
Russia,71.33878,66.49,76.43,9.94,-1.745122,-1.75,-1.74,0.01


<br />
<br />
<br />

In [12]:
# ls_new = df.index.to_list()
# print(len(ls_new))

# ls_old = ['Japan', 'Singapore', 'South Korea', ...]

# print(len(ls_old))

In [13]:
# [country for country in ls_new if country not in ls_old]

In [14]:
# [country for country in ls_old if country not in ls_new]

<br />
<br />
<br />

In [15]:
dd_N_America = ['Antigua and Barbuda', 'Bahamas', 'Barbados', 'Belize', 'Canada', 'Costa Rica',
                'Cuba', 'Dominican Republic', 'El Salvador', 'Grenada', 'Guatemala',
                'Haiti', 'Honduras', 'Jamaica', 'Mexico', 'Nicaragua', 'Panama',
                'St. Lucia', 'St. Vincent and the Grenadines',
                'Trinidad and Tobago', 'USA',
                'World',                
                'Bermuda', 'Puerto Rico', 'Virgin Islands (U.S.)', 'British Virgin Islands', 'Turks and Caicos Islands',
                'Caribbean small states', 'Sint Maarten', 'Collectivity of Saint Martin', 'Dominica', 'St. Kitts and Nevis',
                'Aruba', 'Greenland']
# no values in the source: ['Dominica', 'St. Kitts and Nevis']

df_N_America = df.loc[dd_N_America]   \
                 .sort_values(by='total', ascending=False)
df_N_America

Unnamed: 0,total,male,female,gender_gap,Δ_2019_total,Δ_2019_male,Δ_2019_female,Δ_2019_gender_gap
,,,,,,,,
Canada,81.670488,79.49,83.96,4.47,-0.558293,-0.69,-0.42,0.27
Bermuda,81.136,77.885,84.272,6.387,0.103,0.094,0.094,0.0
Collectivity of Saint Martin,80.149,77.021,83.423,6.402,0.166,0.146,0.163,0.017
Virgin Islands (U.S.),79.819512,76.6,83.2,6.6,0.15122,0.2,0.1,-0.1
Costa Rica,79.277,76.751,81.912,5.161,-0.15,-0.251,-0.041,0.21
Antigua and Barbuda,78.841,76.061,81.316,5.255,0.15,0.152,0.145,-0.007
Puerto Rico,78.041,73.644,82.588,8.944,-1.022,-1.188,-0.868,0.32
Cuba,77.567,75.044,80.225,5.181,-0.044,-0.2,0.15,0.35
Barbados,77.393,75.418,79.21,3.792,0.136,0.188,0.087,-0.101


In [16]:
table_code_N_America = create_table(df_N_America)
output_table_code(table_code_N_America, 'Table code WBG -2020 -N_America -en.txt', destination='file')

Data has written to file


In [17]:
table_code_N_America = create_table(df_N_America, lang='ru')
output_table_code(table_code_N_America, 'Table code WBG -2020 -N_America -ru.txt', destination='file')

Data has written to file


<br />
<br />
<br />

In [18]:
dd_S_America = ['Argentina', 'Bolivia', 'Brazil', 'Chile', 'Colombia', 'Ecuador',
                'Guyana', 'Paraguay', 'Peru', 'Suriname', 'Uruguay', 'Venezuela',
                'World']

df_S_America = df.loc[dd_S_America]  \
                 .sort_values(by='total', ascending=False)
df_S_America

Unnamed: 0,total,male,female,gender_gap,Δ_2019_total,Δ_2019_male,Δ_2019_female,Δ_2019_gender_gap
,,,,,,,,
Chile,79.377,76.745,82.041,5.296,-0.949,-1.149,-0.67,0.479
Uruguay,78.43,74.688,81.911,7.223,0.922,1.109,0.615,-0.494
Argentina,75.892,72.546,79.285,6.739,-1.392,-1.311,-1.396,-0.085
Colombia,74.769,71.537,78.136,6.599,-1.983,-2.263,-1.585,0.678
Brazil,74.009,70.7,77.375,6.675,-1.329,-1.503,-1.094,0.409
Peru,73.665,70.808,76.816,6.008,-2.491,-3.096,-1.681,1.415
Paraguay,73.182,70.398,76.156,5.758,-0.439,-0.563,-0.285,0.278
Suriname,72.561,69.264,75.981,6.717,0.319,0.32,0.301,-0.019
World,72.24385,69.799734,74.85733,5.057596,-0.735866,-0.823878,-0.612678,0.2112


In [19]:
table_code_S_America = create_table(df_S_America)
output_table_code(table_code_S_America, 'Table code WBG -2020 -S_America -en.txt', destination='file')

Data has written to file


In [20]:
table_code_S_America = create_table(df_S_America, lang='ru')
output_table_code(table_code_S_America, 'Table code WBG -2020 -S_America -ru.txt', destination='file')

Data has written to file


<br />
<br />
<br />

In [21]:
dd_Asia = ['Afghanistan', 'Armenia', 'Azerbaijan', 'Bahrain', 'Bangladesh', 'Bhutan', 'Brunei', 'Cambodia', 'China', 'Cyprus', 'Egypt',
           'Georgia', 'India', 'Indonesia', 'Iran', 'Iraq', 'Israel', 'Japan', 'Jordan', 'Kazakhstan', 'North Korea', 'South Korea',
           'Kuwait', 'Kyrgyzstan', 'Laos', 'Lebanon', 'Malaysia', 'Maldives', 'Mongolia', 'Myanmar', 'Nepal', 'Oman', 'Pakistan',
           'Philippines', 'Qatar', 'Russia', 'Saudi Arabia', 'Singapore', 'Sri Lanka', 'Syria', 'Tajikistan', 'Thailand', 'Timor-Leste',
           'Turkey', 'Turkmenistan', 'United Arab Emirates', 'Uzbekistan', 'Vietnam', 'Yemen', 'Palestine',
           'World']  # 'Europe & Central Asia', 'South Asia', 'East Asia & Pacific', 'Middle East & North Africa'

df_Asia = df.loc[dd_Asia]   \
            .sort_values(by='total', ascending=False)
df_Asia

Unnamed: 0,total,male,female,gender_gap,Δ_2019_total,Δ_2019_male,Δ_2019_female,Δ_2019_gender_gap
,,,,,,,,
Japan,84.56,81.56,87.71,6.15,0.203659,0.15,0.26,0.11
Singapore,84.465854,83.1,85.9,2.8,0.870732,1.7,0.0,-1.7
South Korea,83.426829,80.5,86.5,6.0,0.2,0.2,0.2,0.0
Israel,82.64878,80.6,84.8,4.2,-0.156098,-0.4,0.1,0.5
Cyprus,81.391,79.345,83.448,4.103,-0.006,-0.163,0.178,0.341
Maldives,79.875,78.85,81.273,2.423,-0.241,-0.473,0.111,0.584
Thailand,79.274,74.954,83.697,8.743,0.299,0.316,0.264,-0.052
Bahrain,79.174,78.165,80.433,2.268,-0.845,-0.815,-0.886,-0.071
Qatar,79.099,78.225,80.679,2.454,-1.891,-1.839,-1.967,-0.128


In [22]:
table_code_Asia = create_table(df_Asia)
output_table_code(table_code_Asia, 'Table code WBG -2020 -Asia -en.txt', destination='file')

Data has written to file


In [23]:
table_code_Asia = create_table(df_Asia, lang='ru')
output_table_code(table_code_Asia, 'Table code WBG -2020 -Asia -ru.txt', destination='file')

Data has written to file


<br />
<br />
<br />

In [24]:
dd_Europe = ['Albania', 'Armenia', 'Austria', 'Azerbaijan', 'Belarus', 'Belgium', 'Bosnia and Herzegovina', 'Bulgaria', 'Croatia', 'Cyprus',
             'Czechia', 'Denmark', 'Estonia', 'Finland', 'France', 'Georgia', 'Germany', 'Greece', 'Hungary', 'Iceland', 'Ireland', 'Italy',
             'Kazakhstan', 'Latvia', 'Lithuania', 'Luxembourg', 'Malta', 'Moldova', 'Montenegro', 'Netherlands', 'North Macedonia', 'Norway',
             'Poland', 'Portugal', 'Romania', 'Russia', 'Serbia', 'Slovakia', 'Slovenia', 'Spain', 'Sweden', 'Switzerland',
             'Turkey', 'Ukraine', 'United Kingdom', 'Liechtenstein',
             'World', 'European Union',
             'Channel Islands', 'Faroe Islands', 'Gibraltar', 'Isle of Man']  # 'Central Europe and the Baltics', 'Europe & Central Asia']
# small countries: Andorra, Liechtenstein, Monaco, San Marino, Vatican City

df_Europe = df.loc[dd_Europe]   \
              .sort_values(by='total', ascending=False)
df_Europe

Unnamed: 0,total,male,female,gender_gap,Δ_2019_total,Δ_2019_male,Δ_2019_female,Δ_2019_gender_gap
,,,,,,,,
Norway,83.209756,81.6,84.9,3.3,0.25122,0.3,0.2,-0.1
Faroe Islands,83.197561,81.1,85.4,4.3,0.25122,0.3,0.2,-0.1
Iceland,83.063415,81.6,84.6,3.0,-0.1,-0.1,-0.1,0.0
Switzerland,83.0,81.0,85.1,4.1,-0.904878,-1.1,-0.7,0.4
Ireland,82.556098,80.8,84.4,3.6,-0.146341,0.0,-0.3,-0.3
Sweden,82.356098,80.6,84.2,3.6,-0.753659,-0.9,-0.6,0.3
Malta,82.34878,80.3,84.5,4.2,-0.509756,-0.9,-0.1,0.8
Spain,82.331707,79.6,85.2,5.6,-1.5,-1.5,-1.5,0.0
Gibraltar,82.198,80.249,84.112,3.863,-0.165,-0.125,-0.176,-0.051


In [25]:
table_code_Europe = create_table(df_Europe)
output_table_code(table_code_Europe, 'Table code WBG -2020 -Europe -en.txt', destination='file')

Data has written to file


In [26]:
table_code_Europe = create_table(df_Europe, lang='ru')
output_table_code(table_code_Europe, 'Table code WBG -2020 -Europe -ru.txt', destination='file')

Data has written to file


<br />
<br />
<br />

In [27]:
dd_Africa = ['Algeria', 'Angola', 'Benin', 'Botswana', 'Burkina Faso', 'Burundi', 'Cameroon', 'Cabo Verde',
             "Cote d'Ivoire", 'Central African Republic', 'Chad', 'Comoros', 'DR Congo',
             'Congo, Rep.', 'Djibouti', 'Egypt', 'Equatorial Guinea', 'Eritrea', 'Eswatini',
             'Ethiopia', 'Gabon', 'Gambia', 'Ghana', 'Guinea', 'Guinea-Bissau', 'Kenya',
             'Lesotho', 'Liberia', 'Libya', 'Madagascar', 'Malawi', 'Mali', 'Mauritania', 'Mauritius',
             'Morocco', 'Mozambique', 'Namibia', 'Niger', 'Nigeria', 'Rwanda', 'Sao Tome and Principe',
             'Senegal', 'Seychelles', 'Sierra Leone', 'Somalia', 'South Africa', 'South Sudan', 'Sudan',
             'Tanzania', 'Togo', 'Tunisia', 'Uganda', 'Zambia', 'Zimbabwe',
             'World']  # 'Middle East & North Africa', 'Africa Eastern and Southern', 'Sub-Saharan Africa', 'Africa Western and Central'

df_Africa = df.loc[dd_Africa]   \
              .sort_values(by='total', ascending=False)
df_Africa

Unnamed: 0,total,male,female,gender_gap,Δ_2019_total,Δ_2019_male,Δ_2019_female,Δ_2019_gender_gap
,,,,,,,,
Seychelles,77.236585,72.7,82.0,9.3,3.190244,2.8,3.6,0.8
Tunisia,75.292,71.837,79.279,7.442,-0.701,-1.058,-0.1,0.958
Cabo Verde,74.808,70.333,79.16,8.827,-1.196,-1.237,-1.023,0.214
Algeria,74.453,73.082,75.912,2.83,-2.021,-2.156,-1.848,0.308
Mauritius,74.177073,70.86,77.66,6.8,-0.05878,-0.21,0.1,0.31
Morocco,73.92,71.757,76.308,4.551,-0.35,-0.488,-0.14,0.348
Libya,72.472,69.867,75.348,5.481,0.009,0.371,-0.418,-0.789
World,72.24385,69.799734,74.85733,5.057596,-0.735866,-0.823878,-0.612678,0.2112
Egypt,70.99,68.667,73.39,4.723,-0.368,-0.382,-0.321,0.061


In [28]:
table_code_Africa = create_table(df_Africa)
output_table_code(table_code_Africa, 'Table code WBG -2020 -Africa -en.txt', destination='file')

Data has written to file


In [29]:
table_code_Africa = create_table(df_Africa, lang='ru')
output_table_code(table_code_Africa, 'Table code WBG -2020 -Africa -ru.txt', destination='file')

Data has written to file


<br />
<br />
<br />

In [30]:
dd_Oceania = ['Australia', 'Papua New Guinea', 'New Zealand', 'Fiji', 'Solomon Islands', 'Vanuatu',
              'Samoa', 'Kiribati', 'Micronesia', 'Tonga', 'Marshall Islands', 'Nauru', 'Tuvalu',
              'French Polynesia', 'New Caledonia', 'Guam', 'Pacific island small states',
              'World']
# Database does not contain data for 'Palau', 'American Samoa', 'Northern Mariana Islands'

df_Oceania = df.loc[dd_Oceania]   \
                 .sort_values(by='total', ascending=False)
df_Oceania

Unnamed: 0,total,male,female,gender_gap,Δ_2019_total,Δ_2019_male,Δ_2019_female,Δ_2019_gender_gap
,,,,,,,,
Australia,83.2,81.2,85.3,4.1,0.3,0.3,0.3,0.0
New Zealand,82.256098,80.5,84.1,3.6,0.2,0.2,0.2,-1.421085e-14
French Polynesia,82.217,79.321,85.834,6.513,-0.779,-1.383,0.226,1.609
New Caledonia,80.834951,77.253,84.596,7.343,0.415244,0.445,0.384,-0.061
Guam,76.612,72.197,81.868,9.671,-1.105,-1.967,0.292,2.259
Samoa,72.768,70.341,75.459,5.118,0.611,0.574,0.654,0.08
World,72.24385,69.799734,74.85733,5.057596,-0.735866,-0.823878,-0.612678,0.2111999
Tonga,70.928,68.337,73.698,5.361,0.057,0.072,0.029,-0.043
Micronesia,70.674,67.106,74.54,7.434,-0.403,-0.475,-0.302,0.173


In [31]:
table_code_Oceania = create_table(df_Oceania)
output_table_code(table_code_Oceania, 'Table code WBG -2020 -Oceania -en.txt', destination='file')

Data has written to file


In [32]:
table_code_Oceania = create_table(df_Oceania, lang='ru')
output_table_code(table_code_Oceania, 'Table code WBG -2020 -Oceania -ru.txt', destination='file')

Data has written to file


<br />
<br />
<br />

In [33]:
dd_CIS = ['Armenia', 'Azerbaijan', 'Belarus', 'Kazakhstan', 'Kyrgyzstan', 'Moldova',
          'Russia', 'Tajikistan', 'Uzbekistan', 'Turkmenistan']  # 'Georgia', 'Ukraine'

df_CIS = df.loc[dd_CIS]   \
           .sort_values(by='total', ascending=False)
df_CIS

Unnamed: 0,total,male,female,gender_gap,Δ_2019_total,Δ_2019_male,Δ_2019_female,Δ_2019_gender_gap
,,,,,,,,
Belarus,72.45722,67.306,77.866,10.56,-1.76961,-1.994,-1.534,0.46
Armenia,72.173,67.051,76.996,9.945,-3.266,-3.352,-2.86,0.492
Kyrgyzstan,71.8,67.8,76.0,8.2,0.2,0.2,0.2,0.0
Kazakhstan,71.37,67.09,75.53,8.44,-1.81,-1.73,-1.77,-0.04
Russia,71.33878,66.49,76.43,9.94,-1.745122,-1.75,-1.74,0.01
Uzbekistan,70.331,67.876,72.789,4.913,-1.013,-0.906,-1.094,-0.188
Moldova,70.166,65.692,74.852,9.16,-0.769,-0.686,-0.765,-0.079
Turkmenistan,68.687,65.392,71.984,6.592,-0.315,-0.16,-0.48,-0.32
Tajikistan,67.994,65.999,70.17,4.171,-2.873,-2.863,-2.84,0.023


In [34]:
table_code_CIS = create_table(df_CIS)
output_table_code(table_code_CIS, 'Table code WBG -2020 -CIS -en.txt', destination='file')

Data has written to file


In [35]:
table_code_CIS = create_table(df_CIS, lang='ru')
output_table_code(table_code_CIS, 'Table code WBG -2020 -CIS -ru.txt', destination='file')

Data has written to file


<br />
<br />
<br />

In [36]:
ls_regions = [
    'European Union', 'Central Europe and the Baltics',
    'Europe & Central Asia',
    'South Asia',
    'East Asia & Pacific',
    'Middle East & North Africa',
    'Africa Eastern and Southern', 'Sub-Saharan Africa', 'Africa Western and Central',
    'North America', 'Latin America & Caribbean',
    'Arab World'
]

# exclude countries with population <50,000
ls_dropped_territories = ['St. Kitts and Nevis', 'Turks and Caicos Islands', 'Sint Maarten', 'Gibraltar',
                          'Collectivity of Saint Martin', 'British Virgin Islands', 'Tuvalu', 'Nauru']  # 'Palau'

In [37]:
df_all_countries = df.drop(ls_regions + ls_dropped_territories)
print(len(df_all_countries))

201


In [38]:
table_code_all_countries = create_table(df_all_countries)
output_table_code(table_code_all_countries, 'Table code WBG -2020 -all_countries -en.txt', destination='file')

Data has written to file


In [39]:
table_code_all_countries = create_table(df_all_countries, lang='ru')
output_table_code(table_code_all_countries, 'Table code WBG -2020 -all_countries -ru.txt', destination='file')

Data has written to file


<br />
<br />
<br />

In [40]:
# create code for placing info in Wikipedia
def create_table_for_regions(df, lang='en'):

    if lang=='ru':
        file_header='wbg_stats_header_2020_ru.txt'
        ptn_1 = 'флагификация'
        ptn_2 = 'флаг'
        prettify_name = {
            'European Union': '[[Европейский союз]]',
            'Central Europe and the Baltics': '[[Центральная Европа]] и [[Прибалтика]]',
            'Europe & Central Asia': '[[Европа]] и [[Центральная Азия]]',
            'South Asia': '[[Южная Азия]]',
            'East Asia & Pacific': '[[Восточная Азия]] и [[Австралия и Океания|Океания]]',
            'Middle East & North Africa': '[[Страны MENA|Ближний Восток и Северная Африка]]',
            'Africa Eastern and Southern': '[[Восточная Африка|Восточная]] и [[Южная Африка|Южная]] Африка',
            'Sub-Saharan Africa': '[[Чёрная Африка|Африка к югу от Сахары]]',
            'Africa Western and Central': '[[Западная Африка|Западная]] и [[Центральная Африка|Центральная]] Африка',
            'North America': '[[Северная Америка]]',
            'Latin America & Caribbean': '[[Латинская Америка]] и [[Антильские острова|Карибы]]',
            'Arab World': 'Арабский мир',
            'World': 'Мир'
        }
    else:
        file_header='wbg_stats_header_2020_en.txt'
        ptn_1 = 'flaglist'
        ptn_2 = 'flagicon'
        prettify_name = {
            'European Union': '[[European Union]]',
            'Central Europe and the Baltics': '[[Central Europe]] & the [[Baltic states|Baltics]]',
            'Europe & Central Asia': '[[Europe]] & [[Central Asia]]',
            'South Asia': '[[South Asia]]',
            'East Asia & Pacific': '[[Asia-Pacific|East Asia & Pacific]]',
            'Middle East & North Africa': '[[MENA|Middle East & North Africa]]',
            'Africa Eastern and Southern': 'Africa [[East Africa|Eastern]] & [[Southern Africa|Southern]]',
            'Sub-Saharan Africa': '[[Sub-Saharan Africa]]',
            'Africa Western and Central': 'Africa [[West Africa|Western]] & [[Central Africa|Central]]',
            'North America': '[[North America]]',
            'Latin America & Caribbean': '[[Latin America and the Caribbean|Latin America & Caribbean]]'
        }
        
    with open('design/' + file_header, mode='r', encoding="utf-8") as fh:
        st = fh.read()
        
    st = st.strip()
        
    for i in range(len(df)):
        ser = df.iloc[i]
        if ser.name in ['World']:
            st += '\n' + '|-class=static-row-header\n' + \
                  f'|align=center| \'\'\'{prettify_name.get(ser.name, ser.name)}\'\'\' ' + \
                  f'||style="text-align:center; background:#e0ffd8;"| \'\'\'{ser.total:0.2f}\'\'\' ' + \
                  f'||style="text-align:center; background:#eaf3ff;"| \'\'\'{ser.male:0.2f}\'\'\' ' + \
                  f'||style="text-align:center; background:#fee7f6;"| \'\'\'{ser.female:0.2f}\'\'\' ' + \
                  f'||style="text-align:center;"| \'\'\'{ser.gender_gap:0.2f}\'\'\' ' + \
                  f'||style="text-align:center; background:#e0ffd8; border-left-width:2px;"| \'\'\'{ser.Δ_2019_total:0.2f}\'\'\' ' + \
                  f'||style="text-align:center; background:#eaf3ff;"| \'\'\'{ser.Δ_2019_male:0.2f}\'\'\' ' + \
                  f'||style="text-align:center; background:#fee7f6;"| \'\'\'{ser.Δ_2019_female:0.2f}\'\'\' ' + \
                  f'||style="text-align:center;"| \'\'\'{ser.Δ_2019_gender_gap:0.2f}\'\'\''
        else:
            st += '\n' + '|-\n' + \
                  f'|align=left|{prettify_name.get(ser.name, ser.name)} ' + \
                  f'||style="text-align:center; background:#e0ffd8;"| {ser.total:0.2f} ' + \
                  f'||style="text-align:center; background:#eaf3ff;"| {ser.male:0.2f} ' + \
                  f'||style="text-align:center; background:#fee7f6;"| {ser.female:0.2f} ' + \
                  f'||style="text-align:center;"| {ser.gender_gap:0.2f} ' + \
                  f'||style="text-align:center; background:#e0ffd8; border-left-width:2px;"| {ser.Δ_2019_total:0.2f} ' + \
                  f'||style="text-align:center; background:#eaf3ff;"| {ser.Δ_2019_male:0.2f} ' + \
                  f'||style="text-align:center; background:#fee7f6;"| {ser.Δ_2019_female:0.2f} ' + \
                  f'||style="text-align:center;"| {ser.Δ_2019_gender_gap:0.2f}'
    st += '\n|}'

    if lang == 'ru':
        st = re.sub('(?<=\d)\.(?=\d)', ',', st)  # replace . to comma, if this . is between two digits

    return st

In [41]:
df_regions = df.loc[ls_regions + ['World']]   \
               .sort_values(by='total', ascending=False)
df_regions

Unnamed: 0,total,male,female,gender_gap,Δ_2019_total,Δ_2019_male,Δ_2019_female,Δ_2019_gender_gap
,,,,,,,,
European Union,80.452093,77.735582,83.317874,5.582291,-0.862765,-0.930323,-0.791189,0.139134
North America,77.463872,74.746275,80.317669,5.571394,-1.67784,-1.953443,-1.388346,0.565097
Europe & Central Asia,76.95919,73.841057,80.275346,6.434289,-1.227058,-1.269519,-1.158177,0.111342
East Asia & Pacific,76.76194,73.993839,79.700041,5.706202,-0.035835,-0.092787,0.040677,0.133464
Central Europe and the Baltics,76.087717,72.395227,79.974126,7.578899,-1.179128,-1.309201,-1.041954,0.267247
Latin America & Caribbean,73.038474,69.670039,76.596316,6.926278,-1.999875,-2.284248,-1.578995,0.705253
Middle East & North Africa,72.949768,70.812576,75.321682,4.509106,-1.009347,-1.049693,-0.92472,0.124973
World,72.24385,69.799734,74.85733,5.057596,-0.735866,-0.823878,-0.612678,0.2112
Arab World,70.92336,68.936223,73.112711,4.176488,-0.921266,-0.970743,-0.826599,0.144144


In [42]:
table_code_regions = create_table_for_regions(df_regions)
output_table_code(table_code_regions, 'Table code WBG -2020 -z_regions -en.txt', destination='file')

Data has written to file


In [43]:
table_code_regions = create_table_for_regions(df_regions, lang='ru')
output_table_code(table_code_regions, 'Table code WBG -2020 -z_regions -ru.txt', destination='file')

Data has written to file
