[my svg](https://commons.wikimedia.org/wiki/File:Comparison_gender_life_expectancy_WHO.svg)<br>
[original svg](https://commons.wikimedia.org/wiki/File:Comparison_gender_life_expectancy_CIA_factbook.svg)<br>
[List of countries by population](https://en.wikipedia.org/wiki/List_of_countries_by_population_(United_Nations))<br>
[List of countries by life expectancy](https://en.wikipedia.org/wiki/List_of_countries_by_life_expectancy#World_Health_Organization_(2019))<br>
[selection of colors](http://mal-bioit.ru/programs/small/survey-web-colors.html)

In [1]:
import pandas as pd
from eng_rus_dictionary import dd_to_rus, dd_decryption  # dictionary for translation of names and decryption of abbreviation

In [2]:
# load data for countries
pop = pd.read_csv('data/UN_population.csv', sep='\t', index_col=0)  # pop - abbr for population

pop.head(2)

Unnamed: 0_level_0,region,choice,subregion,subregion abbr,code,1950,1951,1952,1953,1954,...,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
World,-,-,-,-,900,2 536 431,2 584 034,2 630 862,2 677 609,2 724 847,...,6 956 824,7 041 194,7 125 828,7 210 582,7 295 291,7 379 797,7 464 022,7 547 859,7 631 091,7 713 468
Africa,-,-,-,-,903,227 794,232 328,237 097,242 092,247 311,...,1 039 304,1 066 410,1 094 343,1 123 045,1 152 434,1 182 439,1 213 041,1 244 222,1 275 921,1 308 064


In [3]:
# Filter and handle dataframe to make it compact and convenient
ls_drop = ['World', 'Africa', 'Asia', 'Europe', 'Latin America and the Caribbean', 'Northern America', 'Oceania',
           'Mayotte', 'Réunion', 'Saint Helena', 'Western Sahara', 'State of Palestine', 'Anguilla', 'Aruba',
           'Bonaire, Sint Eustatius and Saba', 'British Virgin Islands', 'Cayman Islands', 'Curaçao', 'Dominica',
           'Guadeloupe', 'Martinique', 'Montserrat', 'Puerto Rico', 'Saint Barthélemy', 'Saint Kitts and Nevis',
           'Saint Martin (French part)', 'Sint Maarten (Dutch part)', 'Turks and Caicos Islands',
           'United States Virgin Islands', 'Falkland Islands (Malvinas)', 'French Guiana', 'New Caledonia', 'Guam',
           'Marshall Islands', 'Nauru', 'Northern Mariana Islands', 'Palau', 'American Samoa', 'Cook Islands',
           'French Polynesia', 'Niue', 'Tokelau', 'Tuvalu', 'Wallis and Futuna Islands', 'Channel Islands',
           'Faroe Islands', 'Isle of Man', 'Andorra', 'Gibraltar', 'Holy See', 'San Marino', 'Liechtenstein',
           'Monaco', 'Bermuda', 'Greenland', 'Saint Pierre and Miquelon']

pop = pop.drop(index=ls_drop)  \
         [['2019', 'choice', 'subregion abbr']]  \
         .rename(columns={'2019': 'pop',
                          'choice': 'choice_1',
                          'subregion abbr': 'choice_2'})
pop['pop'] = pop['pop'].map(lambda v: int(v.replace(' ', '')) * 1000)
pop.index.name = ''

pop.head(2)

Unnamed: 0,pop,choice_1,choice_2
,,,
Burundi,11531000.0,af,afe
Comoros,851000.0,af,afe


In [4]:
# Combine special provinces of China with China mainland
print('before combining:', pop.loc['China', 'pop'])
pop.loc['China', 'pop'] += pop.loc['China, Hong Kong SAR', 'pop'] + pop.loc['China, Macao SAR', 'pop'] + pop.loc['China, Taiwan Province of China', 'pop']
pop = pop.drop(index=['China, Hong Kong SAR', 'China, Macao SAR', 'China, Taiwan Province of China'])
print('after combining :', pop.loc['China', 'pop'])

before combining: 1433784000
after combining : 1465634000


In [5]:
le = pd.read_csv('data/WHO_2019_countries.csv', header=[0,1], index_col=0)
print(le.shape)
le.loc[['Russian Federation']]

(732, 13)


Unnamed: 0_level_0,Unnamed: 1_level_0,Life expectancy at birth (years),Life expectancy at birth (years),Life expectancy at birth (years),Life expectancy at age 60 (years),Life expectancy at age 60 (years),Life expectancy at age 60 (years),Healthy life expectancy (HALE) at birth (years),Healthy life expectancy (HALE) at birth (years),Healthy life expectancy (HALE) at birth (years),Healthy life expectancy (HALE) at age 60 (years),Healthy life expectancy (HALE) at age 60 (years),Healthy life expectancy (HALE) at age 60 (years)
Country,Year,Both sexes,Male,Female,Both sexes,Male,Female,Both sexes,Male,Female,Both sexes,Male,Female
Russian Federation,2019,73.2,68.2,78.0,19.9,16.8,22.2,64.2,60.7,67.5,15.0,12.8,16.7
Russian Federation,2015,71.3,65.8,76.6,19.1,15.9,21.4,62.7,58.8,66.5,14.4,12.1,16.1
Russian Federation,2010,68.8,62.9,74.7,17.8,14.5,20.2,60.5,56.2,64.9,13.4,11.0,15.1
Russian Federation,2000,65.3,59.0,72.3,16.4,13.3,18.7,57.3,52.4,62.8,12.3,9.9,14.0


In [6]:
# take only the first 4 columns
le = le.iloc[:,:4]

# rename columns
le.columns = ['year', 'all', 'male', 'female']

# select only records for 2019
le = le[le['year'] == 2019]

# drop column 'year'
le.drop(columns='year', inplace=True)

# rename some countries
dd_replace_name = {
    'Russian Federation': 'Russia',
    'Republic of Korea': 'South Korea',
    'United Kingdom of Great Britain and Northern Ireland': 'United Kingdom',
    'Iran (Islamic Republic of)': 'Iran',
    'Venezuela (Bolivarian Republic of)': 'Venezuela',
    'Republic of Moldova': 'Moldova',
    'Syrian Arab Republic': 'Syria',
    "Democratic People's Republic of Korea": "North Korea",
    'Bolivia (Plurinational State of)': 'Bolivia',
    "Lao People's Democratic Republic": "Laos",
    'United Republic of Tanzania': 'Tanzania',
    'Congo': 'Congo, Republic of',
    'Democratic Republic of the Congo': 'Congo, Democratic Republic of',
    'Micronesia (Federated States of)': 'Federated States of Micronesia',
    'Brunei Darussalam': 'Brunei',
    'United States of America': 'USA'
}
le.rename(index=dd_replace_name, inplace=True)

print(le.shape)
le.head(2)

(183, 3)


Unnamed: 0,all,male,female
Afghanistan,63.2,63.3,63.2
Albania,78.0,76.3,79.9


In [7]:
# check just in case
assert set(pop.index.to_list()) == set(le.index.to_list()), 'Indexes of dataFrames are not identical'

In [8]:
# concat two dataFrames into one
df = pd.concat([pop, le], axis='columns').sort_values(by='pop', ascending=False)
df = df[['pop', 'all', 'male', 'female', 'choice_1', 'choice_2']]

del pop
del le

# additional correctin of names for proper presentation
df = df.rename(index = {'Congo, Democratic Republic of': 'DR Congo',
                        'Congo, Republic of': 'Republic of Congo',
                        'Viet Nam': 'Vietnam',
                        'United Kingdom': 'UK',
                        'Federated States of Micronesia': 'Micronesia',
                        'United Arab Emirates': 'UAE',
                        'Central African Republic': 'CAR'})   # 'Timor-Leste': 'East Timor'
                        
df['label_x'] = None
df['label_y'] = None
df['label_position'] = None
df.head(2)

Unnamed: 0,pop,all,male,female,choice_1,choice_2,label_x,label_y,label_position
China,1465634000,77.4,74.7,80.5,ase,ase,,,
India,1366418000,70.8,69.5,72.2,ass,ass,,,


<br />
<br />

In [9]:
# load instructins how data should be handled
handling = pd.read_csv('data/WHO_handling.csv', index_col=0)

# transform to dictionary
colors_all, regions_all = handling.to_dict('dict').values()
del handling
colors_all

{'euw': '#6600cc',
 'eun': '#6495ed',
 'eus': '#cc00cc',
 'eue': '#008000',
 'asc': '#993300',
 'asw': '#004800',
 'ass': '#d2691e',
 'ase': '#ff0000',
 'asse': '#808000',
 'oc': '#0000ff',
 'na': '#0099cc',
 'sa': '#32cd32',
 'af': '#696969',
 '-': '#999999',
 'nam': '#0000ff',
 'car': '#ba55d3',
 'ca': '#d2691e',
 'afn': '#008000',
 'afw': '#4169e1',
 'afm': '#696969',
 'afe': '#dc143c',
 'afs': '#a0522d'}

In [10]:
regions_all

{'euw': 'Europe Western',
 'eun': 'Europe Northern',
 'eus': 'Europe Southern',
 'eue': 'Europe Eastern',
 'asc': 'Asia Central',
 'asw': 'Asia Western',
 'ass': 'Asia South',
 'ase': 'Asia Eastern',
 'asse': 'Asia South-Eastern',
 'oc': 'Oceania',
 'na': 'North America',
 'sa': 'South America',
 'af': 'Africa',
 '-': 'other',
 'nam': 'North America',
 'car': 'Caribbean',
 'ca': 'Central America',
 'afn': 'Africa Northern',
 'afw': 'Africa Western',
 'afm': 'Africa Middle',
 'afe': 'Africa Eastern',
 'afs': 'Africa Southern'}

In [11]:
# print(list(regions.keys()), sep=', ')

<br />
<br />

In [12]:
def create_svg(df, regions, colors, box_width=147, box_height=236, file_name='le_diagram_WHO -test.svg', lang='ru'):
    outs = {k: '' for k in regions.keys()}

    out_defs = out_graph = ''

    for i in range(len(df)):
        # extract single values for country
        population, le_a, le_m, le_f, region, x_text, y_text, text_anchor = df.iloc[i]
        
        name = df.iloc[i].name

        # transtlate name of country to Russian if this is required
        name = dd_to_rus[name] if lang=='ru' else name

        x = int(le_m * 10)
        y = int(le_f * -10)
        radius = round(population ** (1/2) / 800, 2)
        color = colors[region]

        if pd.notna(text_anchor):  # if there is label for country
            # y_text = y + offset_label
            # x_text = x + (1 if offset_label > 0 else -1) * \
            #          (2 + int(((radius ** 2 - offset_label ** 2) ** 0.5 if radius > abs(offset_label) else 0) + 0.5));
            # text_anchor = 'start' if offset_label > 0 else 'end';

            out_label_defs = f'\n   <text x="{x_text}" y="{y_text}" dy="0.7ex" text-anchor="{text_anchor}" stroke="none">{name}</text>'
            out_label_active = "";
        else:
            x_text = int(x - radius * 0.8);
            y_text = int(y - radius * 0.8);
            out_label_defs = "";
            out_label_active = f'\n   <text x="{x_text}" y="{y_text}" dy="0.7ex" class="hidden_text" stroke="{color}">{name}</text>'

        population = f"{population:,}".replace(',', ' ')

        out_defs += (
            f'\n  <g id="bubble_{i}" fill="{color}">{out_label_defs}'
            f'\n   <circle cx="{x}" cy="{y}" r="1"/>'
            f'\n   <circle cx="{x}" cy="{y}" r="{radius}" stroke="{color}" fill="url(#grad_{region})"/>'
            f'\n  </g>')

        out_graph += (
            f'\n  <g class="active">'
            f'\n   <use xlink:href="#bubble_{i}"/>{out_label_active}'
            f'\n   <title>{dd_decryption.get(name, name)} (population: {population})\nmale: {le_m}, female: {le_f}, overall: {le_a} years</title>'
            f'\n  </g>')

        outs[region] += f'\n    <use xlink:href="#bubble_{i}"/>'

    ##########
    out_grad = out_legend = ''
    y = -886

    for abbr in regions.keys():
        color = colors[abbr]
        region = regions[abbr]
        
        # transtlate name of region to Russian if this is required
        region = dd_to_rus[region] if lang=='ru' else region

        out_grad += (
            f'\n  <radialGradient id="grad_{abbr}" cx="50%" cy="50%" r="50%" fx="25%" fy="25%">'
            f'\n   <stop offset="50%" stop-color="{color}" stop-opacity="0"/>'
            f'\n   <stop offset="70%" stop-color="{color}" stop-opacity="0.05"/>'
            f'\n   <stop offset="99%" stop-color="{color}" stop-opacity="0.25"/>'
            f'\n  </radialGradient>')

        x = y + 5
        out_legend += (
            f'\n  <g class="active">'
            f'\n    <text x="425" y="{x}" fill="{color}">{region}</text>'
            f'\n    <circle cx="415" cy="{y}" r="8" fill="url(#grad_{abbr})"/>'
            f'\n    <circle cx="415" cy="{y}" r="1" fill="{color}"/>'
            + outs[abbr] +
            f'\n  </g>')

        y += 18

    #######
    out_ticks = ''

    for tick in range(45, 91, 5):
        x = tick * 10
        out_ticks += f'\n        <text x="{x}" y="-483">{tick}</text>'

    for tick in range(50, 91, 5):
        y = tick * -10 + 6
        out_ticks += f'\n        <text x="385" y="{y}">{tick}</text>'

    ######
    svg_code = """<?xml version="1.0" encoding="utf-8"?>
    <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="100%" height="100%" viewBox="350 -908 560 445">
     <title>Comparison gender life expectancy WHO-2019</title>
     <desc>Comparison of countries by life expectancy at birth according to estimation of the World Health Organization for 2019 (https://apps.who.int/gho/data/node.main.688). Squares of bubles are proportional to the population of countries, according to estimation of the United Nations for 2019 (https://population.un.org/wpp/Download/Files/1_Indicators%20(Standard)/EXCEL_FILES/1_Population/WPP2019_POP_F01_1_TOTAL_POPULATION_BOTH_SEXES.xlsx). Grouping of countries to subregions is according to the classification of the United Nations.</desc>
     <style type="text/css">
      #main         { font-family:Helvetica,Arial,sans-serif; font-size:14px; text-anchor:middle;
                      stroke-opacity:0; fill-opacity:0.5; cursor:default; }
      #main:hover   { stroke-opacity:0; fill-opacity:0.1; }
      .nofade,
      .active:hover { stroke-opacity:1; fill-opacity:1; }
      .hidden_text  { font-size:14px; text-anchor:end; fill:none; pointer-events:none; }
     </style>
     <defs>

      <!-- determine appearance of circles -->""" + \
        out_grad + \
    """

      <!-- determine each country -->""" + \
        out_defs + \
    """

      <!-- vertical grid -->
      <path id="grid_x" d="M 0 -500 V -900"/>
      <g id="grid_x_5">
       <use xlink:href="#grid_x"/>
       <use xlink:href="#grid_x" transform="translate(10,0)"/>
       <use xlink:href="#grid_x" transform="translate(20,0)"/>
       <use xlink:href="#grid_x" transform="translate(30,0)"/>
       <use xlink:href="#grid_x" transform="translate(40,0)"/>
       <use xlink:href="#grid_x" transform="translate(50,0)"/>
      </g>

      <!-- horizontal grid -->
      <path id="grid_y" d="M 400 0 H 900"/>
      <g id="grid_y_5">
       <use xlink:href="#grid_y"/>
       <use xlink:href="#grid_y" transform="translate(0,-10)"/>
       <use xlink:href="#grid_y" transform="translate(0,-20)"/>
       <use xlink:href="#grid_y" transform="translate(0,-30)"/>
       <use xlink:href="#grid_y" transform="translate(0,-40)"/>
       <use xlink:href="#grid_y" transform="translate(0,-50)"/>
      </g>
     </defs>



     <g id="main">
      <g class="nofade">
       <!-- background for the whole diagram -->
       <circle cx="0" cy="0" r="99999" fill="#ffffff"/>

       <!-- draw grid -->
       <g stroke-opacity="0.05" stroke="#000000">
        <use xlink:href="#grid_x_5" transform="translate(400,0)"/>
        <use xlink:href="#grid_x_5" transform="translate(450,0)"/>
        <use xlink:href="#grid_x_5" transform="translate(500,0)"/>
        <use xlink:href="#grid_x_5" transform="translate(550,0)"/>
        <use xlink:href="#grid_x_5" transform="translate(600,0)"/>
        <use xlink:href="#grid_x_5" transform="translate(650,0)"/>
        <use xlink:href="#grid_x_5" transform="translate(700,0)"/>
        <use xlink:href="#grid_x_5" transform="translate(750,0)"/>
        <use xlink:href="#grid_x_5" transform="translate(800,0)"/>
        <use xlink:href="#grid_x_5" transform="translate(850,0)"/>
        <use xlink:href="#grid_y_5" transform="translate(0,-500)"/>
        <use xlink:href="#grid_y_5" transform="translate(0,-550)"/>
        <use xlink:href="#grid_y_5" transform="translate(0,-600)"/>
        <use xlink:href="#grid_y_5" transform="translate(0,-650)"/>
        <use xlink:href="#grid_y_5" transform="translate(0,-700)"/>
        <use xlink:href="#grid_y_5" transform="translate(0,-750)"/>
        <use xlink:href="#grid_y_5" transform="translate(0,-800)"/>
        <use xlink:href="#grid_y_5" transform="translate(0,-850)"/>
       </g>

       <!-- labels for axes -->
       <g fill="#999999">  <!-- color of font -->""" + \
    ("""
        <text x="650" y="-467" style="font-size: 12px">Ожидаемая при рождении продолжительность жизни для мужчин</text>
        <text transform="rotate(-90)" x="695" y="365" style="font-size: 12px">Ожидаемая при рождении продолжительность жизни для женщин</text>""" if lang=='ru' else 
     """
        <text x="650" y="-467">Male life expectancy at birth in years</text>
        <text transform="rotate(-90)" x="705" y="365">Female life expectancy at birth in years</text>""") + \
        out_ticks + \
    """
       </g>

       <!-- diagonal line -->
       <path d="M 500 -500 L 900 -900" stroke="#00cc00" stroke-width="2" stroke-dasharray="2,1"/>
      </g>

      <!-- menu -->
      <g text-anchor="start">

       <!-- rectangle around items -->
       <rect class="nofade" x="403" y="-896" width=\"""" + str(box_width) + """\" height=\"""" + str(box_height) + """\" rx="5" ry="5" stroke="#999999" fill="#ffffff"/>

       <!-- single items -->""" + \
        out_legend + \
    """
      </g>

      <!-- pop-up notes -->""" + \
        out_graph + \
    """
     </g>
    </svg>"""

    with open('output/' + file_name, 'w', encoding="utf-8") as fh:
        fh.write(svg_code)
              
    print(f"Done. Number of processed countries: {len(df)}")

<br />
<br />
<br />

In [13]:
# Statistics for the world, excluding countries with population less than 1_000_000
df_selected = df[df['pop'] >= 1_000_000]  \
              .drop(columns='choice_2')   \
              .rename(columns={'choice_1': 'region'})

df_selected.loc['South Korea', ['label_x', 'label_y', 'label_position']] = (797, -885, 'end')
df_selected.loc['France', ['label_x', 'label_y', 'label_position']] = (785, -871, 'end')
df_selected.loc['Germany', ['label_x', 'label_y', 'label_position']] = (770, -860, 'end')
df_selected.loc['China', ['label_x', 'label_y', 'label_position']] = (708, -849, 'end')
df_selected.loc['Mexico', ['label_x', 'label_y', 'label_position']] = (696, -833, 'end')
df_selected.loc['Brazil', ['label_x', 'label_y', 'label_position']] = (692, -819, 'end')
df_selected.loc['Vietnam', ['label_x', 'label_y', 'label_position']] = (682, -805, 'end')
df_selected.loc['Russia', ['label_x', 'label_y', 'label_position']] = (661, -790, 'end')
df_selected.loc['Ukraine', ['label_x', 'label_y', 'label_position']] = (661, -773, 'end')
df_selected.loc['Philippines', ['label_x', 'label_y', 'label_position']] = (651, -755, 'end')
df_selected.loc['Mongolia', ['label_x', 'label_y', 'label_position']] = (633, -738, 'end')
df_selected.loc['Uganda', ['label_x', 'label_y', 'label_position']] = (620, -715, 'end')
df_selected.loc['DR Congo', ['label_x', 'label_y', 'label_position']] = (576, -651, 'end')
df_selected.loc['Mozambique', ['label_x', 'label_y', 'label_position']] = (531, -618, 'end')
df_selected.loc['Somalia', ['label_x', 'label_y', 'label_position']] = (528, -595, 'end')
df_selected.loc['Japan', ['label_x', 'label_y', 'label_position']] = (823, -889, 'start')
df_selected.loc['Spain', ['label_x', 'label_y', 'label_position']] = (833, -854, 'start')
df_selected.loc['Australia', ['label_x', 'label_y', 'label_position']] = (827, -840, 'start')
df_selected.loc['Canada', ['label_x', 'label_y', 'label_position']] = (820, -826, 'start')
df_selected.loc['UK', ['label_x', 'label_y', 'label_position']] = (814, -812, 'start')
df_selected.loc['USA', ['label_x', 'label_y', 'label_position']] = (804, -797, 'start')
df_selected.loc['Algeria', ['label_x', 'label_y', 'label_position']] = (798, -782, 'start')
df_selected.loc['UAE', ['label_x', 'label_y', 'label_position']] = (791, -766, 'start')
df_selected.loc['Saudi Arabia', ['label_x', 'label_y', 'label_position']] = (765, -751, 'start')
df_selected.loc['Bangladesh', ['label_x', 'label_y', 'label_position']] = (752, -738, 'start')
df_selected.loc['Morocco', ['label_x', 'label_y', 'label_position']] = (748, -724, 'start')
df_selected.loc['Indonesia', ['label_x', 'label_y', 'label_position']] = (748, -710, 'start')
df_selected.loc['India', ['label_x', 'label_y', 'label_position']] = (740, -690, 'start')
df_selected.loc['Ethiopia', ['label_x', 'label_y', 'label_position']] = (682, -668, 'start')
df_selected.loc['Pakistan', ['label_x', 'label_y', 'label_position']] = (670, -653, 'start')
df_selected.loc['Haiti', ['label_x', 'label_y', 'label_position']] = (650, -640, 'start')
df_selected.loc['Afghanistan', ['label_x', 'label_y', 'label_position']] = (647, -624, 'start')
df_selected.loc['Nigeria', ['label_x', 'label_y', 'label_position']] = (616, -608, 'start')
df_selected.loc['Chad', ['label_x', 'label_y', 'label_position']] = (580, -598, 'start')

ls_selected = ['euw', 'eun', 'eus', 'eue', 'asc', 'asw', 'ass', 'ase', 'asse', 'oc', 'na', 'sa', 'af']
regions = {k:regions_all[k] for k in ls_selected}
colors = {k:colors_all[k] for k in ls_selected}

create_svg(df_selected, regions, colors, box_width=147, box_height=236, file_name='Comparison gender life expectancy WHO.svg', lang='en')

Done. Number of processed countries: 155


In [14]:
df_selected.loc['Brazil', ['label_x', 'label_y', 'label_position']] = (None, None, None)
df_selected.loc['Vietnam', ['label_x', 'label_y', 'label_position']] = (None, None, None)
df_selected.loc['Morocco', ['label_x', 'label_y', 'label_position']] = (None, None, None)
df_selected.loc['Algeria', ['label_x', 'label_y', 'label_position']] = (None, None, None)
df_selected.loc['Mexico', ['label_x', 'label_y', 'label_position']] = (None, None, None)
df_selected.loc['Poland', ['label_x', 'label_y', 'label_position']] = (708, -849, 'end')
df_selected.loc['China', ['label_x', 'label_y', 'label_position']] = (696, -833, 'end')
df_selected.loc['Belarus', ['label_x', 'label_y', 'label_position']] = (692, -819, 'end')
df_selected.loc['Kazakhstan', ['label_x', 'label_y', 'label_position']] = (686, -805, 'end')
df_selected.loc['Uzbekistan', ['label_x', 'label_y', 'label_position']] = (748, -724, 'start')
df_selected.loc['UK', ['label_x', 'label_y', 'label_position']] = (810, -812, 'start')
df_selected.loc['Turkey', ['label_x', 'label_y', 'label_position']] = (805, -797, 'start')
df_selected.loc['USA', ['label_x', 'label_y', 'label_position']] = (797, -782, 'start')

create_svg(df_selected, regions, colors, box_width=158, box_height=236, file_name='Comparison gender life expectancy WHO -ru.svg', lang='ru')

Done. Number of processed countries: 155


<br />
<br />
<br />

In [15]:
# Statistics for Asia
additional_countries = ['Russia', 'Egypt']

df_selected = df[df['choice_2'].isin(['asc', 'asw', 'ass', 'ase', 'asse', 'oc']) | (df.index.isin(additional_countries))]  \
              .drop(columns='choice_1')   \
              .rename(columns={'choice_2': 'region'})

df_selected.loc[additional_countries, 'region'] = '-'

df_selected.loc['South Korea', ['label_x', 'label_y', 'label_position']] = (792, -878, 'end')
df_selected.loc['Kuwait', ['label_x', 'label_y', 'label_position']] = (784, -862, 'end')
df_selected.loc['Thailand', ['label_x', 'label_y', 'label_position']] = (710, -849, 'end')
df_selected.loc['China', ['label_x', 'label_y', 'label_position']] = (696, -833, 'end')
df_selected.loc['Kazakhstan', ['label_x', 'label_y', 'label_position']] = (692, -819, 'end')
df_selected.loc['Vietnam', ['label_x', 'label_y', 'label_position']] = (687, -805, 'end')
df_selected.loc['Russia', ['label_x', 'label_y', 'label_position']] = (661, -790, 'end')
df_selected.loc['Iraq', ['label_x', 'label_y', 'label_position']] = (661, -773, 'end')
df_selected.loc['Philippines', ['label_x', 'label_y', 'label_position']] = (649, -755, 'end')
df_selected.loc['Mongolia', ['label_x', 'label_y', 'label_position']] = (632, -738, 'end')
df_selected.loc['Myanmar', ['label_x', 'label_y', 'label_position']] = (630, -718, 'end')
df_selected.loc['Yemen', ['label_x', 'label_y', 'label_position']] = (615, -695, 'end')
df_selected.loc['Japan', ['label_x', 'label_y', 'label_position']] = (823, -889, 'start')
df_selected.loc['Singapore', ['label_x', 'label_y', 'label_position']] = (838, -869, 'start')
df_selected.loc['Cyprus', ['label_x', 'label_y', 'label_position']] = (834, -854, 'start')
df_selected.loc['Australia', ['label_x', 'label_y', 'label_position']] = (828, -840, 'start')
df_selected.loc['Israel', ['label_x', 'label_y', 'label_position']] = (820, -827, 'start')
df_selected.loc['New Zealand', ['label_x', 'label_y', 'label_position']] = (807, -812, 'start')
df_selected.loc['Turkey', ['label_x', 'label_y', 'label_position']] = (804, -797, 'start')
df_selected.loc['Iran', ['label_x', 'label_y', 'label_position']] = (801, -782, 'start')
df_selected.loc['UAE', ['label_x', 'label_y', 'label_position']] = (791, -766, 'start')
df_selected.loc['Saudi Arabia', ['label_x', 'label_y', 'label_position']] = (763, -751, 'start')
df_selected.loc['Bangladesh', ['label_x', 'label_y', 'label_position']] = (752, -738, 'start')
df_selected.loc['Uzbekistan', ['label_x', 'label_y', 'label_position']] = (748, -724, 'start')
df_selected.loc['Indonesia', ['label_x', 'label_y', 'label_position']] = (748, -710, 'start')
df_selected.loc['India', ['label_x', 'label_y', 'label_position']] = (738, -690, 'start')
df_selected.loc['Pakistan', ['label_x', 'label_y', 'label_position']] = (675, -660, 'start')
df_selected.loc['Afghanistan', ['label_x', 'label_y', 'label_position']] = (649, -630, 'start')

ls_selected = ['asc', 'asw', 'ass', 'ase', 'asse', 'oc', '-']
regions = {k:regions_all[k] for k in ls_selected}
colors = {k:colors_all[k] for k in ls_selected}

create_svg(df_selected, regions, colors, box_width=147, box_height=128, file_name='Comparison gender life expectancy WHO -Asia.svg', lang='en')

Done. Number of processed countries: 59


In [16]:
create_svg(df_selected, regions, colors, box_width=159, box_height=128, file_name='Comparison gender life expectancy WHO -Asia -ru.svg', lang='ru')

Done. Number of processed countries: 59


<br />
<br />
<br />

In [17]:
# Statistics for America
df_selected = df[df['choice_2'].isin(['nam', 'ca', 'car', 'sa'])]  \
              .drop(columns='choice_1')   \
              .rename(columns={'choice_2': 'region'})

df_selected.loc['Canada', ['label_x', 'label_y', 'label_position']] = (835, -860, 'end')
df_selected.loc['Costa Rica', ['label_x', 'label_y', 'label_position']] = (774, -858, 'end')
df_selected.loc['Chile', ['label_x', 'label_y', 'label_position']] = (766, -842, 'end')
df_selected.loc['Cuba', ['label_x', 'label_y', 'label_position']] = (732, -830, 'end')
df_selected.loc['Mexico', ['label_x', 'label_y', 'label_position']] = (707, -819, 'end')
df_selected.loc['Brazil', ['label_x', 'label_y', 'label_position']] = (694, -805, 'end')
df_selected.loc['Venezuela', ['label_x', 'label_y', 'label_position']] = (681, -786, 'end')
df_selected.loc['Dominican Republic', ['label_x', 'label_y', 'label_position']] = (675, -765, 'end')
df_selected.loc['Guatemala', ['label_x', 'label_y', 'label_position']] = (670, -750, 'end')
df_selected.loc['Honduras', ['label_x', 'label_y', 'label_position']] = (670, -735, 'end')
df_selected.loc['Haiti', ['label_x', 'label_y', 'label_position']] = (621, -651, 'end')


df_selected.loc['Peru', ['label_x', 'label_y', 'label_position']] = (807, -801, 'start')
df_selected.loc['USA', ['label_x', 'label_y', 'label_position']] = (792, -785, 'start')
df_selected.loc['Argentina', ['label_x', 'label_y', 'label_position']] = (773, -768, 'start')


ls_selected = ['nam', 'ca', 'car', 'sa']
regions = {k:regions_all[k] for k in ls_selected}
colors = {k:colors_all[k] for k in ls_selected}

create_svg(df_selected, regions, colors, box_width=147, box_height=76, file_name='Comparison gender life expectancy WHO -America.svg', lang='en')

Done. Number of processed countries: 33


In [18]:
create_svg(df_selected, regions, colors, box_width=171, box_height=76, file_name='Comparison gender life expectancy WHO -America -ru.svg', lang='ru')

Done. Number of processed countries: 33


<br />
<br />
<br />

In [19]:
# Statistics for Europe
additional_countries = ['Armenia', 'Azerbaijan', 'Cyprus', 'Georgia', 'Kazakhstan', 'Turkey']

df_selected = df[df['choice_2'].isin(['euw', 'eun', 'eus', 'eue']) | (df.index.isin(additional_countries))]  \
              .drop(columns='choice_1')   \
              .rename(columns={'choice_2': 'region'})

df_selected.loc[additional_countries, 'region'] = '-'

df_selected.loc['France', ['label_x', 'label_y', 'label_position']] = (790, -871, 'end')
df_selected.loc['Germany', ['label_x', 'label_y', 'label_position']] = (769, -860, 'end')
df_selected.loc['Portugal', ['label_x', 'label_y', 'label_position']] = (764, -846, 'end')
df_selected.loc['Poland', ['label_x', 'label_y', 'label_position']] = (720, -832, 'end')
df_selected.loc['Romania', ['label_x', 'label_y', 'label_position']] = (701, -819, 'end')
df_selected.loc['Belarus', ['label_x', 'label_y', 'label_position']] =  (686, -805, 'end')
df_selected.loc['Russia', ['label_x', 'label_y', 'label_position']] = (661, -790, 'end')
df_selected.loc['Ukraine', ['label_x', 'label_y', 'label_position']] = (661, -773, 'end')
df_selected.loc['Spain', ['label_x', 'label_y', 'label_position']] = (798, -881, 'start')
df_selected.loc['Switzerland', ['label_x', 'label_y', 'label_position']] = (830, -868, 'start')
df_selected.loc['Italy', ['label_x', 'label_y', 'label_position']] = (834, -854, 'start')
df_selected.loc['Sweden', ['label_x', 'label_y', 'label_position']] = (829, -840, 'start')
df_selected.loc['Netherlands', ['label_x', 'label_y', 'label_position']] = (822, -827, 'start')
df_selected.loc['UK', ['label_x', 'label_y', 'label_position']] = (813, -812, 'start') # (812, -827, 'start')
df_selected.loc['Belgium', ['label_x', 'label_y', 'label_position']] = (803, -797, 'start')
df_selected.loc['Turkey', ['label_x', 'label_y', 'label_position']] = (783, -784, 'start')
df_selected.loc['Serbia', ['label_x', 'label_y', 'label_position']] = (753, -772, 'start')

ls_selected = ['euw', 'eun', 'eus', 'eue', '-']
regions = {k:regions_all[k] for k in ls_selected}
colors = {k:colors_all[k] for k in ls_selected}

create_svg(df_selected, regions, colors, box_width=147, box_height=96, file_name='Comparison gender life expectancy WHO -Europe.svg', lang='en')

Done. Number of processed countries: 45


In [20]:
df_selected.loc['Turkey', ['label_x', 'label_y', 'label_position']] = (784, -785, 'start')
df_selected.loc['Kazakhstan', ['label_x', 'label_y', 'label_position']] = (743, -756, 'start')

create_svg(df_selected, regions, colors, box_width=147, box_height=96, file_name='Comparison gender life expectancy WHO -Europe -ru.svg', lang='ru')

Done. Number of processed countries: 45


<br />
<br />
<br />

In [21]:
# Statistics for Africa
df_selected = df[df['choice_2'].isin(['afn', 'afw', 'afm', 'afe', 'afs'])]  \
              .drop(columns='choice_1')   \
              .rename(columns={'choice_2': 'region'})

df_selected.loc['Tunisia', ['label_x', 'label_y', 'label_position']] = (735, -805, 'end')
df_selected.loc['Mauritius', ['label_x', 'label_y', 'label_position']] = (690, -794, 'end')
df_selected.loc['Cabo Verde', ['label_x', 'label_y', 'label_position']] = (683, -781, 'end')
df_selected.loc['Seychelles', ['label_x', 'label_y', 'label_position']] = (683, -767, 'end')
df_selected.loc['Egypt', ['label_x', 'label_y', 'label_position']] = (670, -748, 'end')
df_selected.loc['Rwanda', ['label_x', 'label_y', 'label_position']] = (652, -725, 'end')
df_selected.loc['Uganda', ['label_x', 'label_y', 'label_position']] = (613, -706, 'end')
df_selected.loc['South Africa', ['label_x', 'label_y', 'label_position']] = (590, -690, 'end')
df_selected.loc['Angola', ['label_x', 'label_y', 'label_position']] = (582, -665, 'end')
df_selected.loc['DR Congo', ['label_x', 'label_y', 'label_position']] = (579, -651, 'end')
df_selected.loc['Eswatini', ['label_x', 'label_y', 'label_position']] = (524, -635, 'end')
df_selected.loc['Mozambique', ['label_x', 'label_y', 'label_position']] = (531, -617, 'end')
df_selected.loc['Somalia', ['label_x', 'label_y', 'label_position']] = (527, -595, 'end')
df_selected.loc['CAR', ['label_x', 'label_y', 'label_position']] = (490, -565, 'end')
df_selected.loc['Lesotho', ['label_x', 'label_y', 'label_position']] = (467, -545, 'end')
df_selected.loc['Algeria', ['label_x', 'label_y', 'label_position']] = (788, -779, 'start')
df_selected.loc['Libya', ['label_x', 'label_y', 'label_position']] = (775, -764, 'start')
df_selected.loc['Morocco', ['label_x', 'label_y', 'label_position']] = (743, -734, 'start')
df_selected.loc['Sudan', ['label_x', 'label_y', 'label_position']] = (713, -705, 'start')
df_selected.loc['Ethiopia', ['label_x', 'label_y', 'label_position']] = (697, -688, 'start')
df_selected.loc['Republic of Congo', ['label_x', 'label_y', 'label_position']] = (667, -656, 'start')
df_selected.loc['Niger', ['label_x', 'label_y', 'label_position']] = (646, -635, 'start')
df_selected.loc['Nigeria', ['label_x', 'label_y', 'label_position']] = (631, -620, 'start')
df_selected.loc['Chad', ['label_x', 'label_y', 'label_position']] = (579, -598, 'start')


ls_selected = ['afn', 'afw', 'afm', 'afe', 'afs']
regions = {k:regions_all[k] for k in ls_selected}
colors = {k:colors_all[k] for k in ls_selected}

create_svg(df_selected, regions, colors, box_width=147, box_height=96, file_name='Comparison gender life expectancy WHO -Africa.svg', lang='en')

Done. Number of processed countries: 54


In [22]:
create_svg(df_selected, regions, colors, box_width=167, box_height=96, file_name='Comparison gender life expectancy WHO -Africa -ru.svg', lang='ru')

Done. Number of processed countries: 54
