# raw, strength and specialization

In [1]:
import pandas as pd
import os
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.collections import PatchCollection
import plotly.graph_objects as go

from plotly.subplots import make_subplots
import plotly

In [2]:
df_sdg = pd.read_pickle("../data/dataframes/SDG/all_sdg_fixed_dst.pkl")
df_digital = pd.read_pickle("../data/dataframes/DT/all_digital.pkl")

# Filtering for Journal only
df_sdg = df_sdg[df_sdg.PT == 'J']
df_digital = df_digital[df_digital.PT == 'J']

In [3]:
eu_countries = pd.read_excel("../data/countries_eu.xlsx", sheet_name='EU').Country.to_list()
other_countries = pd.read_excel("../data/countries_eu.xlsx", sheet_name='rest').Country.to_list()
gdp = pd.read_csv("../data/countriesgdp_pop.csv", index_col="ranking")
gdp = gdp[gdp.Country.isin(eu_countries + other_countries)].set_index("Country")
gdp.loc["EU", "gdppc"] = 44024
gdp.loc["EU", "pop2022"] = 446800
gdp.loc["EU", "gdp"] = 446800*44024
gdp

Unnamed: 0_level_0,gdppc,pop2022,gdp
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Luxembourg,118001.0,647.599,76417330.0
Ireland,94392.0,5023.109,474141300.0
Switzerland,72874.0,8740.472,636953200.0
Norway,65800.0,5434.319,357578200.0
United States,63416.0,338289.857,21452990000.0
Denmark,58932.0,5882.261,346653400.0
Netherlands,57534.0,17564.014,1010528000.0
Austria,55218.0,8939.617,493627800.0
Sweden,54146.0,10549.347,571204900.0
Germany,54076.0,83369.843,4508308000.0


In [4]:
# Digital

dic_countries = {country: [0, 0, 0]  for country in gdp.index}
for row in df_digital.itertuples(index=False, name=None):
    year = int(row[9])
    cn = row[12]
    lst_actors = cn.split(", ")
    size_actors = len(lst_actors)
    for actor in lst_actors:
        if actor in dic_countries.keys():
            # Increment for digital
            dic_countries[actor][0] += 1 / size_actors
            if actor in eu_countries:
                dic_countries['EU'][0] += 1 / size_actors

# SDG & SDG-DT
for row in df_sdg.itertuples(index=False, name=None):
    year = int(row[9])
    cn = row[12]
    lst_actors = cn.split(", ")
    size_actors = len(lst_actors)
    for actor in lst_actors:
        if actor in dic_countries.keys():
            # Increment for SDG
            dic_countries[actor][1] += 1 / size_actors
            if actor in eu_countries:
                dic_countries['EU'][1] += 1 / size_actors
            if row[-1]:
                # Increment for SDG-DT
                dic_countries[actor][2] += 1 / size_actors
                if actor in eu_countries:
                    dic_countries['EU'][2] += 1 / size_actors


In [5]:
df_country = pd.DataFrame.from_dict(data=dic_countries, orient="index", columns=['SDG', 'DT', 'SDG-DT'])
df_country_eu = df_country.loc[eu_countries, :]
df_country_eu.index.names = ['Country']

df_country_eu = df_country_eu.merge(gdp, left_index=True, right_index=True)
df_country_eu.loc[:, 'SDG-pop'] = df_country_eu.loc[:, 'SDG'] /  df_country_eu.loc[:, 'pop2022']
df_country_eu.loc[:, 'DT-pop'] = df_country_eu.loc[:, 'DT'] /  df_country_eu.loc[:, 'pop2022']
df_country_eu.loc[:, 'SDG-DT-pop'] = df_country_eu.loc[:, 'SDG-DT'] /  df_country_eu.loc[:, 'pop2022']

df_country_eu.loc[:, 'SDG-gdp'] = df_country_eu.loc[:, 'SDG']*1000000 /  df_country_eu.loc[:, 'gdp']
df_country_eu.loc[:, 'DT-gdp'] = df_country_eu.loc[:, 'DT']*1000000 /  df_country_eu.loc[:, 'gdp']
df_country_eu.loc[:, 'SDG-DT-gdp'] = df_country_eu.loc[:, 'SDG-DT']*1000000 /  df_country_eu.loc[:, 'gdp']

df_country_eu

Unnamed: 0_level_0,SDG,DT,SDG-DT,gdppc,pop2022,gdp,SDG-pop,DT-pop,SDG-DT-pop,SDG-gdp,DT-gdp,SDG-DT-gdp
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Austria,3195.657344,5372.635315,138.258313,55218.0,8939.617,493627800.0,0.357471,0.600992,0.015466,6.47382,10.883981,0.280086
Belgium,4694.626639,8371.507113,181.901241,51096.0,11655.93,595571400.0,0.402767,0.718219,0.015606,7.882559,14.056261,0.305423
Bulgaria,543.927915,1009.169897,24.554932,23817.0,6781.953,161525800.0,0.080202,0.148802,0.003621,3.367437,6.247733,0.152019
Croatia,1215.733007,2621.667351,78.026927,27717.0,4030.358,111709400.0,0.301644,0.65048,0.01936,10.882993,23.46863,0.698481
Cyprus,528.202551,851.738882,41.854211,40107.0,1251.488,50193430.0,0.42206,0.680581,0.033444,10.523341,16.969131,0.833858
Czech Republic,2618.025604,4618.636023,107.792072,40618.0,10493.986,426244700.0,0.249479,0.440122,0.010272,6.142072,10.835644,0.252888
Denmark,3031.371471,7977.961969,143.020285,58932.0,5882.261,346653400.0,0.515341,1.356275,0.024314,8.744675,23.014232,0.412574
Estonia,433.400506,1238.384147,31.906383,38834.0,1326.062,51496290.0,0.326833,0.933881,0.024061,8.41615,24.048026,0.619586
Finland,3549.391121,7719.220726,200.26744,49853.0,5540.745,276222800.0,0.640598,1.393174,0.036144,12.849742,27.945636,0.725022
France,15689.475027,23290.960538,551.407769,46062.0,64626.628,2976832000.0,0.242771,0.360393,0.008532,5.270528,7.824077,0.185233


In [6]:
df_country_eu.loc['max']= df_country_eu.max()
df_country_eu

Unnamed: 0_level_0,SDG,DT,SDG-DT,gdppc,pop2022,gdp,SDG-pop,DT-pop,SDG-DT-pop,SDG-gdp,DT-gdp,SDG-DT-gdp
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Austria,3195.657344,5372.635315,138.258313,55218.0,8939.617,493627800.0,0.357471,0.600992,0.015466,6.47382,10.883981,0.280086
Belgium,4694.626639,8371.507113,181.901241,51096.0,11655.93,595571400.0,0.402767,0.718219,0.015606,7.882559,14.056261,0.305423
Bulgaria,543.927915,1009.169897,24.554932,23817.0,6781.953,161525800.0,0.080202,0.148802,0.003621,3.367437,6.247733,0.152019
Croatia,1215.733007,2621.667351,78.026927,27717.0,4030.358,111709400.0,0.301644,0.65048,0.01936,10.882993,23.46863,0.698481
Cyprus,528.202551,851.738882,41.854211,40107.0,1251.488,50193430.0,0.42206,0.680581,0.033444,10.523341,16.969131,0.833858
Czech Republic,2618.025604,4618.636023,107.792072,40618.0,10493.986,426244700.0,0.249479,0.440122,0.010272,6.142072,10.835644,0.252888
Denmark,3031.371471,7977.961969,143.020285,58932.0,5882.261,346653400.0,0.515341,1.356275,0.024314,8.744675,23.014232,0.412574
Estonia,433.400506,1238.384147,31.906383,38834.0,1326.062,51496290.0,0.326833,0.933881,0.024061,8.41615,24.048026,0.619586
Finland,3549.391121,7719.220726,200.26744,49853.0,5540.745,276222800.0,0.640598,1.393174,0.036144,12.849742,27.945636,0.725022
France,15689.475027,23290.960538,551.407769,46062.0,64626.628,2976832000.0,0.242771,0.360393,0.008532,5.270528,7.824077,0.185233


In [7]:
df_country_eu.columns

Index(['SDG', 'DT', 'SDG-DT', 'gdppc', 'pop2022', 'gdp', 'SDG-pop', 'DT-pop',
       'SDG-DT-pop', 'SDG-gdp', 'DT-gdp', 'SDG-DT-gdp'],
      dtype='object')

In [8]:
for cat in ['SDG', 'DT', 'SDG-DT', 'SDG-pop', 'DT-pop','SDG-DT-pop', 'SDG-gdp', 'DT-gdp', 'SDG-DT-gdp']:
    tit = cat.replace("-pop", "").replace("-gdp", "")
    print(tit)

SDG
DT
SDG-DT
SDG
DT
SDG-DT
SDG
DT
SDG-DT


## Maps for EU

In [65]:
for cat in ['SDG', 'DT', 'SDG-DT', 'SDG-pop', 'DT-pop','SDG-DT-pop', 'SDG-gdp', 'DT-gdp', 'SDG-DT-gdp']:
    # tit = cat.replace("-pop", "").replace("-gdp", "")
    if 'pop' in cat:
        spec = "/pop*"
        annot= "*Normalised by population"
    elif "gdp" in cat:
        spec = "/gdp*"
        annot = "*Normalised by GDP (in M$)"
    else:
        spec = ""
        annot = ""
    tit = f"Nb pubs{spec}"
    layout = go.Layout(
        annotations=[
            dict(
                x=1.05,
                y=1,
                align="right",
                valign="top",
                text=tit,
                showarrow=False,
                xref="paper",
                yref="paper",
                xanchor="center",
                yanchor="top",
                font_size=22,
                ),
            dict(
                x=0.25,
                y=-.01,
                align="right",
                valign="top",
                text=annot,
                showarrow=False,
                xref="paper",
                yref="paper",
                xanchor="center",
                yanchor="top",
                font_size=30,
                ),
            ],
        )
    fig = go.Figure(
            data=go.Choropleth(
                locations=df_country_eu.index,
                z=df_country_eu[cat],
                text=df_country_eu.index,
                locationmode="country names",
                colorscale='Blues',
                autocolorscale=False,
                colorbar=dict(
                    len=0.9,
                    y=0.5
                )
            ),
            layout=layout,
        )

    fig.update_layout(
        template='simple_white',
        font=dict(size=30),
        showlegend=True,
        geo=go.layout.Geo(
            scope="world",
            landcolor='lightgray',
            projection_scale=float(6.1),
            center=dict(lon=15, lat=52),
            projection_type="azimuthal equal area",
            showland=True,
            showcountries=False,
            showframe=False
        ),
        height=1080,
        width=1200


    )
    fig.write_image(f"../img/Commission/maps/EU_{cat}.jpg")
    # pio.write_image(fig, folder, format='png')

## Same on WORLD

In [33]:
%%time
set_countries = set()
for row in df_sdg.itertuples(index=False, name=None):
    cn = row[12]
    lst_actors = cn.split(", ")
    for actor in lst_actors:
        set_countries.add(actor)
set_countries.add('EU')

CPU times: user 3.32 s, sys: 3.57 ms, total: 3.32 s
Wall time: 3.32 s


In [34]:
set_countries

{'Afghanistan',
 'Albania',
 'Algeria',
 'Andorra',
 'Angola',
 'Anguilla',
 'Antigua and Barbuda',
 'Argentina',
 'Armenia',
 'Aruba',
 'Australia',
 'Austria',
 'Azerbaijan',
 'Bahamas',
 'Bahrain',
 'Bangladesh',
 'Barbados',
 'Belarus',
 'Belgium',
 'Belize',
 'Benin',
 'Bermuda',
 'Bhutan',
 'Bolivia',
 'Bosnia and Herzegovina',
 'Botswana',
 'Brazil',
 'British Indian Ocean Territory',
 'British Virgin Islands',
 'Brunei',
 'Bulgaria',
 'Burkina Faso',
 'Burundi',
 'Cambodia',
 'Cameroon',
 'Canada',
 'Cape Verde',
 'Cayman Islands',
 'Central African Republic',
 'Chad',
 'Chile',
 'China',
 'Colombia',
 'Comoros',
 'Congo [DRC]',
 'Cook Islands',
 'Costa Rica',
 "Cote d'Ivoire",
 'Croatia',
 'Cuba',
 'Curacao',
 'Cyprus',
 'Czech Republic',
 'Denmark',
 'Djibouti',
 'Dominica',
 'Dominican Republic',
 'EU',
 'Ecuador',
 'Egypt',
 'El Salvador',
 'Equatorial Guinea',
 'Eritrea',
 'Estonia',
 'Eswatini',
 'Ethiopia',
 'Falkland Islands',
 'Faroe Islands',
 'Fiji',
 'Finland',
 'Fr

In [35]:
# Digital

dic_countries = {country: [0, 0, 0]  for country in set_countries}

for row in df_digital.itertuples(index=False, name=None):
    year = int(row[9])
    cn = row[12]
    lst_actors = cn.split(", ")
    size_actors = len(lst_actors)
    for actor in lst_actors:
        # Increment for digital
        if actor in dic_countries:
            dic_countries[actor][0] += 1 / size_actors
            if actor in eu_countries:
                dic_countries['EU'][0] += 1 / size_actors

# SDG & SDG-DT
for row in df_sdg.itertuples(index=False, name=None):
    year = int(row[9])
    cn = row[12]
    lst_actors = cn.split(", ")
    size_actors = len(lst_actors)
    for actor in lst_actors:
        if actor in dic_countries:
            # Increment for SDG
            dic_countries[actor][1] += 1 / size_actors
            if actor in eu_countries:
                dic_countries['EU'][1] += 1 / size_actors
            if row[-1]:
                # Increment for SDG-DT
                dic_countries[actor][2] += 1 / size_actors
                if actor in eu_countries:
                    dic_countries['EU'][2] += 1 / size_actors


In [36]:
dic_countries

{'Austria': [3195.6573442546473, 5372.635314992437, 138.25831274620577],
 'Colombia': [1204.4622840269724, 3128.0903074657067, 77.37229690387599],
 'Morocco': [1470.3580801038522, 1556.9364458500509, 86.57241508173362],
 'Somalia': [1.9857142857142858, 20.084785958384344, 0.6666666666666666],
 'Tuvalu': [0, 0.9332547855145108, 0],
 'Cape Verde': [0.5333333333333333, 24.45411191850047, 0],
 'Togo': [2.042058516196447, 71.64050996155208, 1.9166666666666667],
 'Denmark': [3031.371470987053, 7977.961968500382, 143.02028501831475],
 'Nigeria': [822.4640553792981, 4875.144073653777, 85.45068089871612],
 'Turks and Caicos Islands': [0, 4.939151068024308, 0],
 'Guinea': [3.2928160919540215, 38.614420526720004, 0.39999999999999997],
 'Taiwan': [0, 10225.048637645823, 592.2434024738548],
 'Niger': [7.353744052881982, 79.27135397294336, 1.5736263736263734],
 'Zimbabwe': [31.52573260073258, 639.3795708072912, 7.286111111111111],
 'Yemen': [79.46929736929754, 96.77856380366788, 3.523809523809524],


In [37]:
dic_countries_to_rename = {"Palestine": "Palestinian Territory",
                                   "Cote d'Ivoire": "Ivory Coast",
                                   "Congo[DRC]": "Democratic Republic of the Congo",
                                   "Congo, Dem.Rep.": "Democratic Republic of the Congo",
                                   "Eswatini": "Swaziland",
                                   "Guinea Bissau": "Guinea-Bissau",
                                   "Timor-Leste": "East Timor",
                                   }
for country in dic_countries.copy():
    if country in dic_countries_to_rename:
        dic_countries[dic_countries_to_rename[country]] = dic_countries[country]

In [38]:
gdp_pop_world = pd.read_csv("../data/countriesgdp_pop.csv", index_col=1)
gdp_pop_world


Unnamed: 0_level_0,ranking,gdppc,pop2022,gdp
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Luxembourg,1,118001,647.599,7.641733e+07
Singapore,2,97057,5975.689,5.799824e+08
Ireland,3,94392,5023.109,4.741413e+08
Qatar,4,93508,2695.122,2.520155e+08
Switzerland,5,72874,8740.472,6.369532e+08
...,...,...,...,...
Malawi,190,993,20405.317,2.026248e+07
Central African Republic,191,979,5579.144,5.461982e+06
Somalia,192,925,17597.511,1.627770e+07
South Sudan,193,791,10913.164,8.632313e+06


In [39]:
df_country = pd.DataFrame.from_dict(data=dic_countries, orient="index", columns=['SDG', 'DT', 'SDG-DT'])

df_country.index.names = ['Country']
dic_countries_to_rename = {"Palestine": "Palestinian Territory",
                                   "Cote d'Ivoire": "Ivory Coast",
                                   "Congo [DRC]": "Democratic Republic of the Congo",
                                   "Congo, Dem.Rep.": "Democratic Republic of the Congo",
                                   "Eswatini": "Swaziland",
                                   "Guinea Bissau": "Guinea-Bissau",
                                   "Timor-Leste": "East Timor",
                                   }
df_country.rename(index=dic_countries_to_rename, inplace=True)
df_country

Unnamed: 0_level_0,SDG,DT,SDG-DT
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Austria,3195.657344,5372.635315,138.258313
Colombia,1204.462284,3128.090307,77.372297
Morocco,1470.358080,1556.936446,86.572415
Somalia,1.985714,20.084786,0.666667
Tuvalu,0.000000,0.933255,0.000000
...,...,...,...
Central African Republic,0.142857,11.904878,0.000000
Guinea-Bissau,0.000000,17.696378,0.142857
Ivory Coast,31.382757,242.738054,3.795635
Palestinian Territory,0.000000,241.647418,5.746429


In [40]:
df_country = df_country.merge(gdp_pop_world, left_index=True, right_index=True)
df_country

Unnamed: 0_level_0,SDG,DT,SDG-DT,ranking,gdppc,pop2022,gdp
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Afghanistan,4.494884,78.130248,0.250000,174,2390,41128.771,9.829776e+07
Albania,33.306126,249.054814,2.220635,90,14218,2842.321,4.041212e+07
Algeria,2165.284091,1425.219143,94.420238,109,11112,44903.225,4.989646e+08
Angola,3.292100,35.749375,0.000000,127,6932,35588.987,2.467029e+08
Antigua and Barbuda,0.000000,5.189912,0.000000,73,18618,93.763,1.745680e+06
...,...,...,...,...,...,...,...
Venezuela,97.623511,311.525328,5.410439,143,5178,28301.696,1.465462e+08
Vietnam,2290.628672,2762.141449,134.434118,112,10869,98186.856,1.067193e+09
Yemen,79.469297,96.778564,3.523810,181,1927,33696.614,6.493338e+07
Zambia,28.898967,321.160727,3.622222,162,3342,20017.675,6.689907e+07


In [41]:
df_country.loc[:, 'SDG-pop'] = df_country.loc[:, 'SDG'] /  df_country.loc[:, 'pop2022']
df_country.loc[:, 'DT-pop'] = df_country.loc[:, 'DT'] /  df_country.loc[:, 'pop2022']
df_country.loc[:, 'SDG-DT-pop'] = df_country.loc[:, 'SDG-DT'] /  df_country.loc[:, 'pop2022']

df_country.loc[:, 'SDG-gdp'] = df_country.loc[:, 'SDG']*1000000 /  df_country.loc[:, 'gdp']
df_country.loc[:, 'DT-gdp'] = df_country.loc[:, 'DT']*1000000 /  df_country.loc[:, 'gdp']
df_country.loc[:, 'SDG-DT-gdp'] = df_country.loc[:, 'SDG-DT']*1000000 /  df_country.loc[:, 'gdp']

df_country

Unnamed: 0_level_0,SDG,DT,SDG-DT,ranking,gdppc,pop2022,gdp,SDG-pop,DT-pop,SDG-DT-pop,SDG-gdp,DT-gdp,SDG-DT-gdp
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
Afghanistan,4.494884,78.130248,0.250000,174,2390,41128.771,9.829776e+07,0.000109,0.001900,0.000006,0.045727,0.794832,0.002543
Albania,33.306126,249.054814,2.220635,90,14218,2842.321,4.041212e+07,0.011718,0.087624,0.000781,0.824162,6.162874,0.054950
Algeria,2165.284091,1425.219143,94.420238,109,11112,44903.225,4.989646e+08,0.048221,0.031740,0.002103,4.339554,2.856353,0.189232
Angola,3.292100,35.749375,0.000000,127,6932,35588.987,2.467029e+08,0.000093,0.001005,0.000000,0.013344,0.144909,0.000000
Antigua and Barbuda,0.000000,5.189912,0.000000,73,18618,93.763,1.745680e+06,0.000000,0.055351,0.000000,0.000000,2.973004,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...
Venezuela,97.623511,311.525328,5.410439,143,5178,28301.696,1.465462e+08,0.003449,0.011007,0.000191,0.666162,2.125783,0.036920
Vietnam,2290.628672,2762.141449,134.434118,112,10869,98186.856,1.067193e+09,0.023329,0.028131,0.001369,2.146405,2.588231,0.125970
Yemen,79.469297,96.778564,3.523810,181,1927,33696.614,6.493338e+07,0.002358,0.002872,0.000105,1.223859,1.490429,0.054268
Zambia,28.898967,321.160727,3.622222,162,3342,20017.675,6.689907e+07,0.001444,0.016044,0.000181,0.431979,4.800676,0.054145


In [63]:
for cat in ['SDG', 'DT', 'SDG-DT', 'SDG-pop', 'DT-pop','SDG-DT-pop', 'SDG-gdp', 'DT-gdp', 'SDG-DT-gdp']:
    # tit = cat.replace("-pop", "").replace("-gdp", "")
    if 'pop' in cat:
        spec = "/pop*"
        annot= "*Normalised by population"
    elif "gdp" in cat:
        spec = "/gdp*"
        annot = "*Normalised by GDP (in M$)"
    else:
        spec = ""
        annot = ""
    tit = f"Nb pubs{spec}"

    layout = go.Layout(
        annotations=[
            dict(
                x=1,
                y=0.9,
                align="right",
                valign="top",
                text=tit,
                showarrow=False,
                xref="paper",
                yref="paper",
                xanchor="center",
                yanchor="top",
                font_size=22,
                ),
            dict(
                x=0.25,
                y=-.01,
                align="right",
                valign="top",
                text=annot,
                showarrow=False,
                xref="paper",
                yref="paper",
                xanchor="center",
                yanchor="top",
                font_size=30,
                ),
            ],
        )
    fig = go.Figure(
            data=go.Choropleth(
                locations=df_country.index,
                z=df_country[cat],
                text=df_country.index,
                locationmode="country names",
                colorscale='Blues',
                autocolorscale=False,
                colorbar=dict(
                    len=0.8,
                    y=0.45,
                    x= 0.95
                )
            ),
            layout=layout,
        )

    fig.update_layout(
        template='simple_white',
        font=dict(size=25),
        showlegend=True,

        geo=go.layout.Geo(
            scope="world",
            landcolor='lightgray',
            projection_scale=1,
            center=dict(lon=20, lat=30),
            projection_type="kavrayskiy7",

            showland=True,
            showcountries=False,
            showframe=False
        ),
        height=900,
        width=1400
    )

    fig.write_image(f"../img/Commission/maps/World/{cat}.jpg")
    # pio.write_image(fig, folder, format='png')

ValueError: Invalid property specified for object of type plotly.graph_objs.Layout: 'showborder'

Did you mean "boxmode"?

    Valid properties:
        activeselection
            :class:`plotly.graph_objects.layout.Activeselection`
            instance or dict with compatible properties
        activeshape
            :class:`plotly.graph_objects.layout.Activeshape`
            instance or dict with compatible properties
        annotations
            A tuple of
            :class:`plotly.graph_objects.layout.Annotation`
            instances or dicts with compatible properties
        annotationdefaults
            When used in a template (as
            layout.template.layout.annotationdefaults), sets the
            default property values to use for elements of
            layout.annotations
        autosize
            Determines whether or not a layout width or height that
            has been left undefined by the user is initialized on
            each relayout. Note that, regardless of this attribute,
            an undefined layout width or height is always
            initialized on the first call to plot.
        autotypenumbers
            Using "strict" a numeric string in trace data is not
            converted to a number. Using *convert types* a numeric
            string in trace data may be treated as a number during
            automatic axis `type` detection. This is the default
            value; however it could be overridden for individual
            axes.
        bargap
            Sets the gap (in plot fraction) between bars of
            adjacent location coordinates.
        bargroupgap
            Sets the gap (in plot fraction) between bars of the
            same location coordinate.
        barmode
            Determines how bars at the same location coordinate are
            displayed on the graph. With "stack", the bars are
            stacked on top of one another With "relative", the bars
            are stacked on top of one another, with negative values
            below the axis, positive values above With "group", the
            bars are plotted next to one another centered around
            the shared location. With "overlay", the bars are
            plotted over one another, you might need to reduce
            "opacity" to see multiple bars.
        barnorm
            Sets the normalization for bar traces on the graph.
            With "fraction", the value of each bar is divided by
            the sum of all values at that location coordinate.
            "percent" is the same but multiplied by 100 to show
            percentages.
        boxgap
            Sets the gap (in plot fraction) between boxes of
            adjacent location coordinates. Has no effect on traces
            that have "width" set.
        boxgroupgap
            Sets the gap (in plot fraction) between boxes of the
            same location coordinate. Has no effect on traces that
            have "width" set.
        boxmode
            Determines how boxes at the same location coordinate
            are displayed on the graph. If "group", the boxes are
            plotted next to one another centered around the shared
            location. If "overlay", the boxes are plotted over one
            another, you might need to set "opacity" to see them
            multiple boxes. Has no effect on traces that have
            "width" set.
        calendar
            Sets the default calendar system to use for
            interpreting and displaying dates throughout the plot.
        clickmode
            Determines the mode of single click interactions.
            "event" is the default value and emits the
            `plotly_click` event. In addition this mode emits the
            `plotly_selected` event in drag modes "lasso" and
            "select", but with no event data attached (kept for
            compatibility reasons). The "select" flag enables
            selecting single data points via click. This mode also
            supports persistent selections, meaning that pressing
            Shift while clicking, adds to / subtracts from an
            existing selection. "select" with `hovermode`: "x" can
            be confusing, consider explicitly setting `hovermode`:
            "closest" when using this feature. Selection events are
            sent accordingly as long as "event" flag is set as
            well. When the "event" flag is missing, `plotly_click`
            and `plotly_selected` events are not fired.
        coloraxis
            :class:`plotly.graph_objects.layout.Coloraxis` instance
            or dict with compatible properties
        colorscale
            :class:`plotly.graph_objects.layout.Colorscale`
            instance or dict with compatible properties
        colorway
            Sets the default trace colors.
        computed
            Placeholder for exporting automargin-impacting values
            namely `margin.t`, `margin.b`, `margin.l` and
            `margin.r` in "full-json" mode.
        datarevision
            If provided, a changed value tells `Plotly.react` that
            one or more data arrays has changed. This way you can
            modify arrays in-place rather than making a complete
            new copy for an incremental change. If NOT provided,
            `Plotly.react` assumes that data arrays are being
            treated as immutable, thus any data array with a
            different identity from its predecessor contains new
            data.
        dragmode
            Determines the mode of drag interactions. "select" and
            "lasso" apply only to scatter traces with markers or
            text. "orbit" and "turntable" apply only to 3D scenes.
        editrevision
            Controls persistence of user-driven changes in
            `editable: true` configuration, other than trace names
            and axis titles. Defaults to `layout.uirevision`.
        extendfunnelareacolors
            If `true`, the funnelarea slice colors (whether given
            by `funnelareacolorway` or inherited from `colorway`)
            will be extended to three times its original length by
            first repeating every color 20% lighter then each color
            20% darker. This is intended to reduce the likelihood
            of reusing the same color when you have many slices,
            but you can set `false` to disable. Colors provided in
            the trace, using `marker.colors`, are never extended.
        extendiciclecolors
            If `true`, the icicle slice colors (whether given by
            `iciclecolorway` or inherited from `colorway`) will be
            extended to three times its original length by first
            repeating every color 20% lighter then each color 20%
            darker. This is intended to reduce the likelihood of
            reusing the same color when you have many slices, but
            you can set `false` to disable. Colors provided in the
            trace, using `marker.colors`, are never extended.
        extendpiecolors
            If `true`, the pie slice colors (whether given by
            `piecolorway` or inherited from `colorway`) will be
            extended to three times its original length by first
            repeating every color 20% lighter then each color 20%
            darker. This is intended to reduce the likelihood of
            reusing the same color when you have many slices, but
            you can set `false` to disable. Colors provided in the
            trace, using `marker.colors`, are never extended.
        extendsunburstcolors
            If `true`, the sunburst slice colors (whether given by
            `sunburstcolorway` or inherited from `colorway`) will
            be extended to three times its original length by first
            repeating every color 20% lighter then each color 20%
            darker. This is intended to reduce the likelihood of
            reusing the same color when you have many slices, but
            you can set `false` to disable. Colors provided in the
            trace, using `marker.colors`, are never extended.
        extendtreemapcolors
            If `true`, the treemap slice colors (whether given by
            `treemapcolorway` or inherited from `colorway`) will be
            extended to three times its original length by first
            repeating every color 20% lighter then each color 20%
            darker. This is intended to reduce the likelihood of
            reusing the same color when you have many slices, but
            you can set `false` to disable. Colors provided in the
            trace, using `marker.colors`, are never extended.
        font
            Sets the global font. Note that fonts used in traces
            and other layout components inherit from the global
            font.
        funnelareacolorway
            Sets the default funnelarea slice colors. Defaults to
            the main `colorway` used for trace colors. If you
            specify a new list here it can still be extended with
            lighter and darker colors, see
            `extendfunnelareacolors`.
        funnelgap
            Sets the gap (in plot fraction) between bars of
            adjacent location coordinates.
        funnelgroupgap
            Sets the gap (in plot fraction) between bars of the
            same location coordinate.
        funnelmode
            Determines how bars at the same location coordinate are
            displayed on the graph. With "stack", the bars are
            stacked on top of one another With "group", the bars
            are plotted next to one another centered around the
            shared location. With "overlay", the bars are plotted
            over one another, you might need to reduce "opacity" to
            see multiple bars.
        geo
            :class:`plotly.graph_objects.layout.Geo` instance or
            dict with compatible properties
        grid
            :class:`plotly.graph_objects.layout.Grid` instance or
            dict with compatible properties
        height
            Sets the plot's height (in px).
        hiddenlabels
            hiddenlabels is the funnelarea & pie chart analog of
            visible:'legendonly' but it can contain many labels,
            and can simultaneously hide slices from several
            pies/funnelarea charts
        hiddenlabelssrc
            Sets the source reference on Chart Studio Cloud for
            `hiddenlabels`.
        hidesources
            Determines whether or not a text link citing the data
            source is placed at the bottom-right cored of the
            figure. Has only an effect only on graphs that have
            been generated via forked graphs from the Chart Studio
            Cloud (at https://chart-studio.plotly.com or on-
            premise).
        hoverdistance
            Sets the default distance (in pixels) to look for data
            to add hover labels (-1 means no cutoff, 0 means no
            looking for data). This is only a real distance for
            hovering on point-like objects, like scatter points.
            For area-like objects (bars, scatter fills, etc)
            hovering is on inside the area and off outside, but
            these objects will not supersede hover on point-like
            objects in case of conflict.
        hoverlabel
            :class:`plotly.graph_objects.layout.Hoverlabel`
            instance or dict with compatible properties
        hovermode
            Determines the mode of hover interactions. If
            "closest", a single hoverlabel will appear for the
            "closest" point within the `hoverdistance`. If "x" (or
            "y"), multiple hoverlabels will appear for multiple
            points at the "closest" x- (or y-) coordinate within
            the `hoverdistance`, with the caveat that no more than
            one hoverlabel will appear per trace. If *x unified*
            (or *y unified*), a single hoverlabel will appear
            multiple points at the closest x- (or y-) coordinate
            within the `hoverdistance` with the caveat that no more
            than one hoverlabel will appear per trace. In this
            mode, spikelines are enabled by default perpendicular
            to the specified axis. If false, hover interactions are
            disabled.
        iciclecolorway
            Sets the default icicle slice colors. Defaults to the
            main `colorway` used for trace colors. If you specify a
            new list here it can still be extended with lighter and
            darker colors, see `extendiciclecolors`.
        images
            A tuple of :class:`plotly.graph_objects.layout.Image`
            instances or dicts with compatible properties
        imagedefaults
            When used in a template (as
            layout.template.layout.imagedefaults), sets the default
            property values to use for elements of layout.images
        legend
            :class:`plotly.graph_objects.layout.Legend` instance or
            dict with compatible properties
        mapbox
            :class:`plotly.graph_objects.layout.Mapbox` instance or
            dict with compatible properties
        margin
            :class:`plotly.graph_objects.layout.Margin` instance or
            dict with compatible properties
        meta
            Assigns extra meta information that can be used in
            various `text` attributes. Attributes such as the
            graph, axis and colorbar `title.text`, annotation
            `text` `trace.name` in legend items, `rangeselector`,
            `updatemenus` and `sliders` `label` text all support
            `meta`. One can access `meta` fields using template
            strings: `%{meta[i]}` where `i` is the index of the
            `meta` item in question. `meta` can also be an object
            for example `{key: value}` which can be accessed
            %{meta[key]}.
        metasrc
            Sets the source reference on Chart Studio Cloud for
            `meta`.
        minreducedheight
            Minimum height of the plot with margin.automargin
            applied (in px)
        minreducedwidth
            Minimum width of the plot with margin.automargin
            applied (in px)
        modebar
            :class:`plotly.graph_objects.layout.Modebar` instance
            or dict with compatible properties
        newselection
            :class:`plotly.graph_objects.layout.Newselection`
            instance or dict with compatible properties
        newshape
            :class:`plotly.graph_objects.layout.Newshape` instance
            or dict with compatible properties
        paper_bgcolor
            Sets the background color of the paper where the graph
            is drawn.
        piecolorway
            Sets the default pie slice colors. Defaults to the main
            `colorway` used for trace colors. If you specify a new
            list here it can still be extended with lighter and
            darker colors, see `extendpiecolors`.
        plot_bgcolor
            Sets the background color of the plotting area in-
            between x and y axes.
        polar
            :class:`plotly.graph_objects.layout.Polar` instance or
            dict with compatible properties
        scene
            :class:`plotly.graph_objects.layout.Scene` instance or
            dict with compatible properties
        selectdirection
            When `dragmode` is set to "select", this limits the
            selection of the drag to horizontal, vertical or
            diagonal. "h" only allows horizontal selection, "v"
            only vertical, "d" only diagonal and "any" sets no
            limit.
        selectionrevision
            Controls persistence of user-driven changes in selected
            points from all traces.
        selections
            A tuple of
            :class:`plotly.graph_objects.layout.Selection`
            instances or dicts with compatible properties
        selectiondefaults
            When used in a template (as
            layout.template.layout.selectiondefaults), sets the
            default property values to use for elements of
            layout.selections
        separators
            Sets the decimal and thousand separators. For example,
            *. * puts a '.' before decimals and a space between
            thousands. In English locales, dflt is ".," but other
            locales may alter this default.
        shapes
            A tuple of :class:`plotly.graph_objects.layout.Shape`
            instances or dicts with compatible properties
        shapedefaults
            When used in a template (as
            layout.template.layout.shapedefaults), sets the default
            property values to use for elements of layout.shapes
        showlegend
            Determines whether or not a legend is drawn. Default is
            `true` if there is a trace to show and any of these: a)
            Two or more traces would by default be shown in the
            legend. b) One pie trace is shown in the legend. c) One
            trace is explicitly given with `showlegend: true`.
        sliders
            A tuple of :class:`plotly.graph_objects.layout.Slider`
            instances or dicts with compatible properties
        sliderdefaults
            When used in a template (as
            layout.template.layout.sliderdefaults), sets the
            default property values to use for elements of
            layout.sliders
        smith
            :class:`plotly.graph_objects.layout.Smith` instance or
            dict with compatible properties
        spikedistance
            Sets the default distance (in pixels) to look for data
            to draw spikelines to (-1 means no cutoff, 0 means no
            looking for data). As with hoverdistance, distance does
            not apply to area-like objects. In addition, some
            objects can be hovered on but will not generate
            spikelines, such as scatter fills.
        sunburstcolorway
            Sets the default sunburst slice colors. Defaults to the
            main `colorway` used for trace colors. If you specify a
            new list here it can still be extended with lighter and
            darker colors, see `extendsunburstcolors`.
        template
            Default attributes to be applied to the plot. This
            should be a dict with format: `{'layout':
            layoutTemplate, 'data': {trace_type: [traceTemplate,
            ...], ...}}` where `layoutTemplate` is a dict matching
            the structure of `figure.layout` and `traceTemplate` is
            a dict matching the structure of the trace with type
            `trace_type` (e.g. 'scatter'). Alternatively, this may
            be specified as an instance of
            plotly.graph_objs.layout.Template.  Trace templates are
            applied cyclically to traces of each type. Container
            arrays (eg `annotations`) have special handling: An
            object ending in `defaults` (eg `annotationdefaults`)
            is applied to each array item. But if an item has a
            `templateitemname` key we look in the template array
            for an item with matching `name` and apply that
            instead. If no matching `name` is found we mark the
            item invisible. Any named template item not referenced
            is appended to the end of the array, so this can be
            used to add a watermark annotation or a logo image, for
            example. To omit one of these items on the plot, make
            an item with matching `templateitemname` and `visible:
            false`.
        ternary
            :class:`plotly.graph_objects.layout.Ternary` instance
            or dict with compatible properties
        title
            :class:`plotly.graph_objects.layout.Title` instance or
            dict with compatible properties
        titlefont
            Deprecated: Please use layout.title.font instead. Sets
            the title font. Note that the title's font used to be
            customized by the now deprecated `titlefont` attribute.
        transition
            Sets transition options used during Plotly.react
            updates.
        treemapcolorway
            Sets the default treemap slice colors. Defaults to the
            main `colorway` used for trace colors. If you specify a
            new list here it can still be extended with lighter and
            darker colors, see `extendtreemapcolors`.
        uirevision
            Used to allow user interactions with the plot to
            persist after `Plotly.react` calls that are unaware of
            these interactions. If `uirevision` is omitted, or if
            it is given and it changed from the previous
            `Plotly.react` call, the exact new figure is used. If
            `uirevision` is truthy and did NOT change, any
            attribute that has been affected by user interactions
            and did not receive a different value in the new figure
            will keep the interaction value. `layout.uirevision`
            attribute serves as the default for `uirevision`
            attributes in various sub-containers. For finer control
            you can set these sub-attributes directly. For example,
            if your app separately controls the data on the x and y
            axes you might set `xaxis.uirevision=*time*` and
            `yaxis.uirevision=*cost*`. Then if only the y data is
            changed, you can update `yaxis.uirevision=*quantity*`
            and the y axis range will reset but the x axis range
            will retain any user-driven zoom.
        uniformtext
            :class:`plotly.graph_objects.layout.Uniformtext`
            instance or dict with compatible properties
        updatemenus
            A tuple of
            :class:`plotly.graph_objects.layout.Updatemenu`
            instances or dicts with compatible properties
        updatemenudefaults
            When used in a template (as
            layout.template.layout.updatemenudefaults), sets the
            default property values to use for elements of
            layout.updatemenus
        violingap
            Sets the gap (in plot fraction) between violins of
            adjacent location coordinates. Has no effect on traces
            that have "width" set.
        violingroupgap
            Sets the gap (in plot fraction) between violins of the
            same location coordinate. Has no effect on traces that
            have "width" set.
        violinmode
            Determines how violins at the same location coordinate
            are displayed on the graph. If "group", the violins are
            plotted next to one another centered around the shared
            location. If "overlay", the violins are plotted over
            one another, you might need to set "opacity" to see
            them multiple violins. Has no effect on traces that
            have "width" set.
        waterfallgap
            Sets the gap (in plot fraction) between bars of
            adjacent location coordinates.
        waterfallgroupgap
            Sets the gap (in plot fraction) between bars of the
            same location coordinate.
        waterfallmode
            Determines how bars at the same location coordinate are
            displayed on the graph. With "group", the bars are
            plotted next to one another centered around the shared
            location. With "overlay", the bars are plotted over one
            another, you might need to reduce "opacity" to see
            multiple bars.
        width
            Sets the plot's width (in px).
        xaxis
            :class:`plotly.graph_objects.layout.XAxis` instance or
            dict with compatible properties
        yaxis
            :class:`plotly.graph_objects.layout.YAxis` instance or
            dict with compatible properties
        
Did you mean "boxmode"?

Bad property path:
showborder
^^^^^^^^^^

## Specialization

In [5]:
df_country_w_continent = pd.read_excel("../data/countries_w_continent.xlsx", sheet_name=0)
df_country_w_continent

Unnamed: 0.1,Unnamed: 0,Country,Country ISO3,Country Code,Long Name,Income Group,Lending Category,Region,Currency Unit,Other Groups,WTO Member,latitude,longitude
0,0,Afghanistan,AFG,4,Islamic State of Afghanistan,Low income,IDA,South Asia,Afghan afghani,HIPC,No,33.939110,67.709953
1,1,Albania,ALB,8,Republic of Albania,Upper middle income,IBRD,Europe & Central Asia,Albanian lek,,Yes,41.153332,20.168331
2,2,Algeria,DZA,12,People's Democratic Republic of Algeria,Upper middle income,IBRD,Middle East & North Africa,Algerian dinar,,No,28.033886,1.659626
3,3,American Samoa,ASM,16,American Samoa,Upper middle income,,East Asia & Pacific,U.S. dollar,,No,-14.270972,-170.132217
4,4,Andorra,AND,20,,Others,,Europe & Central Asia,,,No,42.546245,1.601554
...,...,...,...,...,...,...,...,...,...,...,...,...,...
222,265,Venezuela,VEN,862,República Bolivariana de Venezuela,High income: nonOECD,IBRD,Latin America & Caribbean,Venezuelan bolivar fuerte,,Yes,6.423750,-66.589730
223,266,Vietnam,VNM,704,Socialist Republic of Vietnam,Lower middle income,Blend,East Asia & Pacific,Vietnamese dong,,Yes,14.058324,108.277199
224,270,Yemen,YEM,887,Republic of Yemen,Lower middle income,IDA,Middle East & North Africa,Yemeni rial,,No,15.552727,48.516388
225,273,Zambia,ZMB,894,Republic of Zambia,Lower middle income,IDA,Sub-Saharan Africa,New Zambian kwacha,HIPC,Yes,-13.133897,27.849332


In [5]:
lst_sdg = ["SDG" + str(i) for i in range(1, 18)]
lst_cat = ["Environment", "Society", "Economy"]
lst_dt_shortened = ['AI', 'robotics', 'IOT']
lst_dt = ['AI', 'big_data', 'IOT', 'computing_infrastructure', 'blockchain', 'robotics',
          'additive_manufacturing']
lst_cat_dt = ['Environment-AI', 'Environment-robotics', 'Environment-IOT',
              'Society-AI', 'Society-robotics', 'Society-IOT',
              'Economy-AI', 'Economy-robotics', 'Economy-IOT']
df_sdg = pd.read_pickle("../data/dataframes/SDG/all_sdg_fixed_dst.pkl")
df_dt = pd.read_pickle("../data/dataframes/DT/all_digital.pkl")

# Filtering for Journal only
df_sdg = df_sdg[df_sdg.PT == 'J']
df_dt = df_dt[df_dt.PT == 'J']
df_sdg_dt = df_sdg[df_sdg['DST']]

In [7]:
df_sdg_dt

Unnamed: 0,PT,AU,TI,SO,DE,AB,C1,EM,TC,PY,...,IOT,computing_infrastructure,blockchain,robotics,additive_manufacturing,Society,Economy,Environment,EU,DST
131,J,"Nasseri, A; Jamshidi, S; Yazdifar, H; Percy, D...",Evaluating the performances of over-the-counte...,JOURNAL OF APPLIED ACCOUNTING RESEARCH,Developing countries; Portfolio optimization; ...,"Purpose With suitable optimization criteria, h...","[Nasseri, Ahmad; Jamshidi, Sajad] Univ Sistan ...",nasseri@acc.usb.ac.ir; usb.sajad@gmail.com; hy...,0,2020,...,False,False,False,False,False,False,True,False,True,True
347,J,"Khalifa, N; Abd Elghany, M; Abd Elghany, M",Exploratory research on digitalization transfo...,COGENT BUSINESS & MANAGEMENT,Artificial intelligence; supply chain; supply ...,"With no doubt, the adoption of Artificial Inte...","[Khalifa, Nermin; Abd Elghany, Marwa] Arab Aca...",nermine_khalifa@aast.edu; monam@aast.edu; marw...,0,2021,...,False,False,False,False,True,False,True,False,False,True
356,J,"Emmanuel, O; Ananya, M; Misra, S; Koyuncu, M",A Deep Neural Network-Based Advisory Framework...,SUSTAINABILITY,sustainability development goals; predictive a...,"Research in sustainable development, program d...","[Emmanuel, Okewu] Univ Lagos, Ctr Informat & T...",eokewu@unilag.edu.ng; ge25daj@mytum.de; Sanjay...,1,2020,...,False,False,False,False,False,True,True,False,True,True
437,J,"Kharal, AY; Khalid, HA; Gastli, A; Guerrero, JM",A Novel Features-Based Multivariate Gaussian D...,IEEE ACCESS,Developing countries; Meters; Companies; Tarif...,"According to statistics, developing countries ...","[Kharal, Ammar Yousaf; Khalid, Hassan Abdullah...",adel.gastli@qu.edu.qa,0,2021,...,False,False,False,False,False,False,True,False,True,True
438,J,"Hoekman, B; Shepherd, B",Services Trade Policies and Economic Integrati...,WORLD TRADE REVIEW,International trade; trade in services; machin...,This paper applies machine learning to recreat...,"[Hoekman, Bernard] European Univ Inst, Fiesole...",bernard.hoekman@eui.eu; ben@developing-trade.com,1,2021,...,False,False,False,False,False,False,True,False,True,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1199147,J,"Herrera-Ibata, DM; Orbegozo-Medina, RA; Gonzal...",Multiscale Mapping of AIDS in US Countries vs ...,CURRENT BIOINFORMATICS,Anti-HIV drugs; AIDS in US at county level; Gi...,"In this work, we reviewed different aspects ab...","[Maria Herrera-Ibata, Diana] Univ A Coruna UDC...",diana.herrera@udc.es,5,2015,...,False,False,False,False,False,True,False,False,True,True
1199205,J,"Feng, XP; Zhu, J; Lin, PS; Steen-Adams, MM",Composite likelihood estimation for models of ...,ENVIRONMETRICS,Gaussian latent variable; Godambe information;...,"In this paper, we consider a spatial ordered p...","[Feng, Xiaoping; Zhu, Jun] Univ Wisconsin, Dep...",jzhu@stat.wisc.edu,7,2014,...,False,True,False,False,False,True,False,False,False,True
1199228,J,"Herrera-Ibata, DM; Pazos, A; Orbegozo-Medina, ...",Mapping networks of anti-HIV drug cocktails vs...,CHEMOMETRICS AND INTELLIGENT LABORATORY SYSTEMS,ChEMBL; AIDSVu; anti-HIV drug cocktails; HAART...,The implementation of the highly active antire...,"[Maria Herrera-Ibata, Diana; Pazos, Alejandro]...",diana.herrera@udc.es; humberto.gonzalezdiaz@eh...,9,2014,...,False,False,False,False,False,True,False,False,True,True
1200469,J,"Wolfson, M; Wallace, SE; Masca, N; Rowe, G; Sh...",DataSHIELD: resolving a conflict in contempora...,INTERNATIONAL JOURNAL OF EPIDEMIOLOGY,Pooling; analysis; meta-analysis; individual-l...,Methods Data aggregation through anonymous sum...,"[Masca, Nicholas; Sheehan, Nuala A.; Tobin, Ma...",pb51@le.ac.uk,86,2010,...,False,True,False,False,False,True,False,False,True,True


In [8]:
idx_sdg = {name: i for i, name in enumerate(list(df_sdg), start=0)}
idx_dt = {name: i for i, name in enumerate(list(df_dt), start=0)}
idx_sdg_dt = {name: i for i, name in enumerate(list(df_sdg_dt), start=0)}


In [11]:
%%time
import itertools
from tqdm import tqdm
dic = {c:{elem:0 for elem in lst_sdg+lst_cat+lst_dt+lst_cat_dt} for c in set_countries}
dic_world = {elem:0 for elem in lst_sdg+lst_cat+lst_dt+lst_cat_dt}

# SDG dataframe
for row in tqdm(df_sdg.itertuples(index=False, name=None), total=df_sdg.shape[0]):
    lst_actors = row[idx_sdg['CN']].split(", ")
    size_actors = len(lst_actors)
    sdgs_in_pubs = []
    # Country count
    for sdg in lst_sdg:
        if row[idx_sdg[sdg]]:
            sdgs_in_pubs += sdg
            for actor in lst_actors:
                dic_world[sdg] += 1
                dic[actor][sdg] += 1
    for cat in lst_cat:
        if row[idx_sdg[cat]]:
            for actor in lst_actors:
                dic_world[cat] += 1
                dic[actor][cat] += 1


# DT dataframe
for row in tqdm(df_dt.itertuples(index=False, name=None), total=df_dt.shape[0]):
    lst_actors = row[idx_dt['CN']].split(", ")
    # Country count
    for dt in lst_dt:
        if row[idx_dt[dt]]:
            for actor in lst_actors:
                if actor in dic:
                    dic[actor][dt] += 1
                    dic_world[dt] += 1

# Intersection
for row in tqdm(df_sdg_dt.itertuples(index=False, name=None), total=df_sdg_dt.shape[0]):
    # Country count
    lst_actors = row[idx_sdg_dt['CN']].split(", ")
    for cat in lst_cat:
        if row[idx_sdg_dt[cat]]:
            for dt in lst_dt:
                if row[idx_sdg_dt[dt]]:
                    for actor in lst_actors:
                        dic_world[str(cat) + "-" + str(dt)] += 1
                        dic[actor][str(cat) + "-" + str(dt)] += 1


100%|██████████| 1072858/1072858 [00:06<00:00, 175293.32it/s]
100%|██████████| 744201/744201 [00:02<00:00, 302921.79it/s]
  0%|          | 1/33097 [00:00<00:01, 21290.88it/s]


KeyError: 'Economy-additive_manufacturing'

In [30]:
%%time
# Fractional version

import itertools
from tqdm import tqdm
dic = {c:{elem:0 for elem in lst_sdg+lst_cat+lst_dt+lst_cat_dt} for c in set_countries}
dic_world = {elem:0 for elem in lst_sdg+lst_cat+lst_dt+lst_cat_dt}

# SDG dataframe
for row in tqdm(df_sdg.itertuples(index=False, name=None), total=df_sdg.shape[0]):
    lst_actors = row[idx_sdg['CN']].split(", ")
    size_actors = len(lst_actors)
    sdgs_in_pub = []
    cats_in_pub = []
    # Country count
    for sdg in lst_sdg:
        if row[idx_sdg[sdg]]:
            sdgs_in_pub.append(sdg)
    for cat in lst_cat:
        if row[idx_sdg[cat]]:
            cats_in_pub.append(cat)
    tot_sdg_in_pub = len(sdgs_in_pub)
    tot_cat_in_pub = len(cats_in_pub)
    for sdg in sdgs_in_pub:
        dic_world[sdg] += 1/tot_sdg_in_pub
    for cat in cats_in_pub:
        dic_world[cat] += 1/tot_cat_in_pub

    for actor in lst_actors:
        for sdg in sdgs_in_pub:
            dic[actor][sdg] += 1/(size_actors*tot_sdg_in_pub)
        for cat in cats_in_pub:
            dic[actor][cat] += 1/(size_actors*tot_cat_in_pub)


# DT dataframe
for row in tqdm(df_dt.itertuples(index=False, name=None), total=df_dt.shape[0]):
    lst_actors = row[idx_dt['CN']].split(", ")
    tot_actors = len(lst_actors)
    dt_in_pub = []
    # Country count
    for dt in lst_dt:
        if row[idx_dt[dt]]:
            dt_in_pub.append(dt)
    tot_dt = len(dt_in_pub)
    for dt in dt_in_pub:
        dic_world[dt] += 1/tot_dt
    for actor in lst_actors:
        if actor in dic:
            for dt in dt_in_pub:
                dic[actor][dt] += 1/(tot_actors*tot_dt)


# Intersection
for row in tqdm(df_sdg_dt.itertuples(index=False, name=None), total=df_sdg_dt.shape[0]):
    # Country count
    lst_actors = row[idx_sdg_dt['CN']].split(", ")
    tot_actors = len(lst_actors)
    inter_in_pub = []
    for cat in lst_cat:
        if row[idx_sdg_dt[cat]]:
            for dt in lst_dt:
                if row[idx_sdg_dt[dt]]:
                    inter_in_pub.append(str(cat) + "-" + str(dt))
    tot_inter = len(inter_in_pub)
    for inter in inter_in_pub:
        if inter in dic_world:
            dic_world[inter] += 1/tot_inter

    for actor in lst_actors:
        for inter in inter_in_pub:
            if inter in lst_cat_dt:
                dic[actor][inter] += (1 / (tot_inter*tot_actors))


100%|██████████| 1072858/1072858 [00:06<00:00, 164869.97it/s]
100%|██████████| 744201/744201 [00:02<00:00, 280970.37it/s]
100%|██████████| 33097/33097 [00:00<00:00, 188167.50it/s]

CPU times: user 9.38 s, sys: 9.22 ms, total: 9.39 s
Wall time: 9.38 s





In [59]:
df_spec_raw = pd.DataFrame.from_dict(data=dic, orient="index", columns=lst_sdg+lst_cat+lst_dt+lst_cat_dt)
df_spec_raw

Unnamed: 0,SDG1,SDG2,SDG3,SDG4,SDG5,SDG6,SDG7,SDG8,SDG9,SDG10,...,additive_manufacturing,Environment-AI,Environment-robotics,Environment-IOT,Society-AI,Society-robotics,Society-IOT,Economy-AI,Economy-robotics,Economy-IOT
Costa Rica,11.620346,4.846962,22.184238,6.491811,17.698195,40.498821,5.932318,6.536299,2.530253,5.525912,...,4.120177,1.581685,1.354762,0.000000,0.266917,1.230769,1.000000,0.690115,0.000000,0.000000
Djibouti,0.000000,0.000000,0.497182,0.000000,1.750000,1.333333,1.133333,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
British Virgin Islands,0.000000,0.000000,0.000000,0.208333,0.319444,0.000000,0.000000,0.000000,0.444444,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
Pakistan,232.625673,272.863896,1278.458961,271.031851,178.267194,1133.430620,283.967963,167.659723,218.210217,89.803850,...,76.671853,35.479886,9.130907,6.552943,83.426089,6.378739,60.752050,14.271337,1.155732,10.940575
Peru,40.573444,24.037914,151.416516,27.188956,46.775481,79.062414,10.970984,28.194388,23.493587,14.024855,...,6.734921,4.709991,2.295112,0.285714,6.846755,1.029412,5.400000,1.666667,0.043478,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Dominican Republic,1.449713,1.701008,9.261524,2.510606,2.458589,2.353968,2.457540,0.762500,1.366667,1.225000,...,0.000000,0.000000,0.000000,0.000000,0.052632,0.000000,0.000000,0.000000,0.000000,0.000000
Chile,109.974318,48.649704,272.237137,136.228628,182.708862,409.047202,123.273027,71.322740,76.836620,56.892057,...,65.717615,22.455288,2.785915,2.559524,20.814520,3.105577,6.952778,2.159890,2.029814,1.603175
Suriname,0.583333,1.689286,3.205776,2.677778,0.200000,0.341982,0.298413,0.500000,0.000000,0.000000,...,0.000000,0.000000,0.200000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
United Kingdom,2463.008787,718.270979,6569.767232,3233.334322,2529.853424,2521.011413,1612.875329,1637.611038,932.211677,1892.000897,...,2544.575995,214.494374,109.164275,38.340099,344.949860,70.249021,122.012536,78.389966,20.766258,38.675685


In [60]:
df_spec_raw.loc['world', :] = df_spec_raw.sum(axis=0)
df_spec_raw

Unnamed: 0,SDG1,SDG2,SDG3,SDG4,SDG5,SDG6,SDG7,SDG8,SDG9,SDG10,...,additive_manufacturing,Environment-AI,Environment-robotics,Environment-IOT,Society-AI,Society-robotics,Society-IOT,Economy-AI,Economy-robotics,Economy-IOT
Costa Rica,11.620346,4.846962,22.184238,6.491811,17.698195,40.498821,5.932318,6.536299,2.530253,5.525912,...,4.120177,1.581685,1.354762,0.000000,0.266917,1.230769,1.000000,0.690115,0.000000,0.000000
Djibouti,0.000000,0.000000,0.497182,0.000000,1.750000,1.333333,1.133333,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
British Virgin Islands,0.000000,0.000000,0.000000,0.208333,0.319444,0.000000,0.000000,0.000000,0.444444,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
Pakistan,232.625673,272.863896,1278.458961,271.031851,178.267194,1133.430620,283.967963,167.659723,218.210217,89.803850,...,76.671853,35.479886,9.130907,6.552943,83.426089,6.378739,60.752050,14.271337,1.155732,10.940575
Peru,40.573444,24.037914,151.416516,27.188956,46.775481,79.062414,10.970984,28.194388,23.493587,14.024855,...,6.734921,4.709991,2.295112,0.285714,6.846755,1.029412,5.400000,1.666667,0.043478,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Chile,109.974318,48.649704,272.237137,136.228628,182.708862,409.047202,123.273027,71.322740,76.836620,56.892057,...,65.717615,22.455288,2.785915,2.559524,20.814520,3.105577,6.952778,2.159890,2.029814,1.603175
Suriname,0.583333,1.689286,3.205776,2.677778,0.200000,0.341982,0.298413,0.500000,0.000000,0.000000,...,0.000000,0.000000,0.200000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
United Kingdom,2463.008787,718.270979,6569.767232,3233.334322,2529.853424,2521.011413,1612.875329,1637.611038,932.211677,1892.000897,...,2544.575995,214.494374,109.164275,38.340099,344.949860,70.249021,122.012536,78.389966,20.766258,38.675685
Netherlands,522.432262,306.952476,1447.455326,956.151140,547.425423,1113.766029,524.692707,411.364796,283.222451,474.680714,...,619.051493,55.353642,18.160269,2.643754,85.560965,26.556285,18.998476,17.413233,8.926885,6.320833


In [39]:
df_world = pd.DataFrame.from_dict(data=dic_world, orient="index", columns=['tot']).transpose()
df_world

Unnamed: 0,SDG1,SDG2,SDG3,SDG4,SDG5,SDG6,SDG7,SDG8,SDG9,SDG10,...,additive_manufacturing,Environment-AI,Environment-robotics,Environment-IOT,Society-AI,Society-robotics,Society-IOT,Economy-AI,Economy-robotics,Economy-IOT
tot,27851.0,15794.709524,113784.730159,45341.452381,33099.368254,101124.846825,39971.106349,20183.583333,16413.189683,16117.011905,...,46532.880952,6175.722222,1686.088889,853.005556,8843.188889,1512.597222,3839.655556,1648.088889,348.697222,609.838889


In [64]:
# Keeping only some relevant countries
all_pub_wos = pd.read_excel("../data/total_number_pubs_2010_2022.xlsx", index_col=0)
tot_pub_countries = all_pub_wos.loc['TOT', :]
index_countries = list(tot_pub_countries.index)[2:]
lst_eu = ["Austria", "Belgium", "Bulgaria", "Croatia", "Cyprus", "Czech Republic", "Denmark",
          "Estonia", "Finland", "France", "Germany", "Greece", "Hungary", "Ireland", "Italy",
          "Latvia", "Lithuania", "Luxembourg", "Malta", "Netherlands", "Poland",
          "Portugal", "Romania" ,"Slovakia", "Slovenia", "Spain", "Sweden"]
lst_eu.sort()
rest = sorted(list(set(index_countries).difference(set(lst_eu))))
rest.remove('Liechtenstein')
rest

['Australia',
 'Canada',
 'China',
 'Iceland',
 'Israel',
 'Japan',
 'Norway',
 'South Korea',
 'Switzerland',
 'United Kingdom',
 'United States']

In [58]:
df_spec_raw_categories = df_spec_raw.loc[:, ['Environment', 'Society', 'Economy']]
df_spec_raw_categories.loc[:, 'tot'] = df_spec_raw_categories.sum(axis=1)


KeyError: "None of [Index(['Environment', 'Society', 'Economy'], dtype='object')] are in the [columns]"

In [41]:
df_spec_raw_dt = df_spec_raw.loc[:, lst_dt]
df_spec_raw_dt.loc[:, 'tot'] = df_spec_raw_dt.sum(axis=1)
df_spec_raw_dt

Unnamed: 0,AI,big_data,IOT,computing_infrastructure,blockchain,robotics,additive_manufacturing,tot
Costa Rica,42.452743,2.651587,5.570238,7.728227,0.000000,15.655597,4.120177,78.178570
Djibouti,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
British Virgin Islands,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
Pakistan,3286.726553,259.439355,896.176491,681.182289,103.892222,434.254503,76.671853,5738.343267
Peru,158.713209,7.177040,11.364881,16.900621,3.375000,22.645160,6.734921,226.910831
...,...,...,...,...,...,...,...,...
Chile,900.570854,74.875789,64.964029,68.248133,9.453571,167.676834,65.717615,1351.506824
Suriname,2.450000,0.000000,0.000000,0.000000,0.000000,0.200000,0.000000,2.650000
United Kingdom,15597.854576,2133.699738,2702.992236,2245.691577,415.988247,4752.807304,2544.575995,30393.609672
Netherlands,4355.906330,522.020797,638.041692,412.758596,91.634319,1418.260439,619.051493,8057.673666


In [42]:
df_spec_raw_sdg = df_spec_raw.loc[:, lst_sdg]
df_spec_raw_sdg.loc[:, 'tot'] = df_spec_raw_sdg.sum(axis=1)
df_spec_raw_sdg

Unnamed: 0,SDG1,SDG2,SDG3,SDG4,SDG5,SDG6,SDG7,SDG8,SDG9,SDG10,SDG11,SDG12,SDG13,SDG14,SDG15,SDG16,SDG17,tot
Costa Rica,11.620346,4.846962,22.184238,6.491811,17.698195,40.498821,5.932318,6.536299,2.530253,5.525912,20.046035,24.704008,132.536268,138.149292,65.740721,15.020039,14.905047,5.349666e+02
Djibouti,0.000000,0.000000,0.497182,0.000000,1.750000,1.333333,1.133333,0.000000,0.000000,0.000000,0.000000,0.000000,0.983631,4.213889,0.447917,0.250000,0.083333,1.069262e+01
British Virgin Islands,0.000000,0.000000,0.000000,0.208333,0.319444,0.000000,0.000000,0.000000,0.444444,0.000000,0.000000,0.000000,0.143478,0.569444,0.208333,0.000000,0.000000,1.893478e+00
Pakistan,232.625673,272.863896,1278.458961,271.031851,178.267194,1133.430620,283.967963,167.659723,218.210217,89.803850,411.148771,625.825926,1258.063896,393.294999,420.742353,266.016252,346.303137,7.847715e+03
Peru,40.573444,24.037914,151.416516,27.188956,46.775481,79.062414,10.970984,28.194388,23.493587,14.024855,36.538973,46.574410,206.667291,162.426031,88.703927,38.998254,56.931063,1.082578e+03
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Chile,109.974318,48.649704,272.237137,136.228628,182.708862,409.047202,123.273027,71.322740,76.836620,56.892057,255.245626,228.418731,1109.285515,1272.279206,280.048366,118.513986,114.234930,4.865197e+03
Suriname,0.583333,1.689286,3.205776,2.677778,0.200000,0.341982,0.298413,0.500000,0.000000,0.000000,1.100000,0.083333,1.840668,2.966847,1.261155,1.346561,1.755952,1.985108e+01
United Kingdom,2463.008787,718.270979,6569.767232,3233.334322,2529.853424,2521.011413,1612.875329,1637.611038,932.211677,1892.000897,2954.232053,2504.358263,16259.382303,6124.986704,2723.655900,4128.571649,2400.787567,6.120592e+04
Netherlands,522.432262,306.952476,1447.455326,956.151140,547.425423,1113.766029,524.692707,411.364796,283.222451,474.680714,1132.491660,1087.581950,4432.123599,1566.230074,786.829078,1084.449138,655.935762,1.733378e+04


In [43]:
df_spec_raw_sdg_dt = df_spec_raw.loc[:, lst_cat_dt]
df_spec_raw_sdg_dt.loc[:, 'tot'] = df_spec_raw_sdg_dt.sum(axis=1)
df_spec_raw_sdg_dt

Unnamed: 0,Environment-AI,Environment-robotics,Environment-IOT,Society-AI,Society-robotics,Society-IOT,Economy-AI,Economy-robotics,Economy-IOT,tot
Costa Rica,1.581685,1.354762,0.000000,0.266917,1.230769,1.000000,0.690115,0.000000,0.000000,6.124249
Djibouti,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
British Virgin Islands,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
Pakistan,35.479886,9.130907,6.552943,83.426089,6.378739,60.752050,14.271337,1.155732,10.940575,228.088259
Peru,4.709991,2.295112,0.285714,6.846755,1.029412,5.400000,1.666667,0.043478,0.000000,22.277129
...,...,...,...,...,...,...,...,...,...,...
Chile,22.455288,2.785915,2.559524,20.814520,3.105577,6.952778,2.159890,2.029814,1.603175,64.466481
Suriname,0.000000,0.200000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.200000
United Kingdom,214.494374,109.164275,38.340099,344.949860,70.249021,122.012536,78.389966,20.766258,38.675685,1037.042075
Netherlands,55.353642,18.160269,2.643754,85.560965,26.556285,18.998476,17.413233,8.926885,6.320833,239.934343


['Australia',
 'Canada',
 'China',
 'Iceland',
 'Israel',
 'Japan',
 'Liechtenstein',
 'Norway',
 'South Korea',
 'Switzerland',
 'United Kingdom',
 'United States']

In [65]:
lst_df_spec = []
for df_spec_raw in [df_spec_raw_categories,df_spec_raw_sdg, df_spec_raw_dt, df_spec_raw_sdg_dt]:
    df_spec = df_spec_raw.copy().iloc[:-1, :-1].loc[lst_eu+rest, :]
    for cat in df_spec.columns:
        for country in df_spec.index:
            df_spec.loc[country,cat] = (df_spec_raw.loc[country,cat] / df_spec_raw.loc[country, 'tot']) / (df_spec_raw.loc["world", cat] / df_spec_raw.loc['world', 'tot'])
    lst_df_spec.append(df_spec)


                 Environment       Society       Economy
Austria          2577.619824   1953.081453    841.934038
Belgium          3367.682342   3671.939373   1331.885397
Bulgaria          498.386897    381.683896    129.099104
Croatia          1211.941616   1000.270243    409.455492
Cyprus            284.492136    428.381636    138.865110
Czech Republic   2178.121783   1687.507668    753.006572
Denmark          3672.461444   3176.451521   1129.049004
Estonia           691.806510    401.529104    145.048533
Finland          3711.514002   2946.200896   1061.505828
France          13282.584572   7437.898539   2570.477427
Germany         18017.923878  13044.787452   5020.884601
Greece           3303.587274   3049.769150    944.688646
Hungary          1179.920775   1196.883137    390.733839
Ireland          1686.258362   2070.466163    724.481379
Italy           16574.557903  15429.183025   5288.428966
Latvia            269.004360    204.459507    108.659105
Lithuania         736.049719   

[                Environment   Society   Economy
 Austria            1.125941  0.822581  1.187497
 Belgium            0.944086  0.992516  1.205605
 Bulgaria           1.159008  0.855826  0.969395
 Croatia            1.084896  0.863347  1.183507
 Cyprus             0.783877  1.138073  1.235460
 Czech Republic     1.106757  0.826757  1.235455
 Denmark            1.080312  0.900940  1.072415
 Estonia            1.311032  0.733681  0.887564
 Finland            1.128396  0.863644  1.042055
 France             1.338379  0.722618  0.836312
 Germany            1.171869  0.818037  1.054417
 Greece             1.062339  0.945597  0.980898
 Hungary            1.000560  0.978597  1.069866
 Ireland            0.883107  1.045488  1.225107
 Italy              1.043058  0.936205  1.074610
 Latvia             1.084498  0.794764  1.414468
 Lithuania          0.985790  0.851112  1.544488
 Luxembourg         0.650181  1.093532  1.816316
 Malta              1.052169  0.910061  1.132741
 Netherlands        

In [68]:
with pd.ExcelWriter('output.xlsx') as writer:
     for i, df in enumerate(lst_df_spec):
        df.to_excel(writer, sheet_name=f'Sheet{i+1}')

## New Specialization graphs

In [77]:
def transform_discrete_value(value):
    if value < 0.75:
        return "Under-specialized (RTA < 0.75)"
    elif 0.75 <= value <= 1.25:
        return "Not-specialized (0.75 <= RTA <= 1.25)"
    elif value > 1.25:
        return "Specialized (RTA > 1.25)"
    else:
        return "error"

In [141]:
def reorder_values(dataframe, col):
    """

    Args:
        dataframe:
        col:

    Returns:
        The dataframe reordered so that Specialized is on top, Not specialized in the middle, and under specialized is
        at the bottom of the legend
    """
    lst_index = list(dataframe.index)

    bottom_index = dataframe.loc[dataframe[col] == "Under-specialized (RTA < 0.75)"].index
    middle_index = dataframe.loc[dataframe[col] == "Not-specialized (0.75 <= RTA <= 1.25)"].index
    top_index = dataframe.loc[dataframe[col] == "Specialized (RTA > 1.25)"].index

    new_order = [top_index, middle_index, bottom_index]
    new_order_single_elem = []
    for elem in new_order:
        if len(elem) > 0:
            new_order_single_elem.append(elem[0])
            lst_index.remove(elem[0])

    return new_order_single_elem + lst_index

### CATGORIES SPEC GRAPHS

In [150]:
import plotly.express as px
# df_spec_cat = pd.read_excel("../img/Commission/spec_fractional.xlsx", sheet_name="cat", index_col=0)
df_spec_cat = pd.read_excel("../img/old_commission/specialization copy.xlsx", sheet_name="categories", index_col=0)
df_spec_cat = df_spec_cat.loc[lst_eu, :]
df_spec_cat = df_spec_cat.apply(np.vectorize(transform_discrete_value))


for cat in lst_cat:
    ordered_index = reorder_values(df_spec_cat, cat)
    fig = px.choropleth(
        # locations=df_spec_cat.loc[lst_eu, :].index,
        locations=ordered_index,
        color=df_spec_cat.loc[ordered_index, cat],
        color_discrete_map={"Under-specialized (RTA < 0.75)":"#2ECBE9", "Not-specialized (0.75 <= RTA <= 1.25)":"#128FC8", "Specialized (RTA > 1.25)":"#00468B"},
        locationmode="country names",
    )

    fig.update_layout(
        template='simple_white',
        font=dict(size=18),
        showlegend=True,
        legend=dict
            (
            title="Specialization",
            yanchor="top",
            y=1,
            xanchor="left",
            x=0.048,
            bordercolor="Black",
            borderwidth=2
            ),
        geo=go.layout.Geo(
            scope="world",
            landcolor='lightgray',
            projection_scale=float(6.1),
            center=dict(lon=15, lat=52),
            projection_type="azimuthal equal area",
            showland=True,
            showcountries=False,
        ),
        height=1080,
        width=1200


    )
    fig.write_image(f"../img/Commission/maps/specialization/full_count/{cat}.jpg")
    # pio.write_image(fig, folder, format='png')

### DT SPEC GRAPSH

In [151]:
import plotly.express as px
# df_spec_dt =pd.read_excel("../img/Commission/spec_fractional.xlsx", sheet_name="dt", index_col=0)
df_spec_dt =pd.read_excel("../img/old_commission/specialization copy.xlsx", sheet_name="dt", index_col=0)
df_spec_dt = df_spec_dt.loc[lst_eu, :]
df_spec_dt = df_spec_dt.apply(np.vectorize(transform_discrete_value))
for dt in ['AI', 'IOT', 'robotics']:
    ordered_index = reorder_values(df_spec_dt, dt)
    fig = px.choropleth(
        locations=ordered_index,
        color=df_spec_dt.loc[ordered_index, dt],
        color_discrete_map={"Under-specialized (RTA < 0.75)":"#2ECBE9", "Not-specialized (0.75 <= RTA <= 1.25)":"#128FC8", "Specialized (RTA > 1.25)":"#00468B"},
        locationmode="country names",
    )

    fig.update_layout(
        template='simple_white',
        font=dict(size=18),
        showlegend=True,

        legend=dict
            (
            title="Specialization",
            yanchor="top",
            y=1,
            xanchor="left",
            x=0.048,
            bordercolor="Black",
            borderwidth=2
            ),
        geo=go.layout.Geo(
            scope="world",
            landcolor='lightgray',
            projection_scale=float(6.1),
            center=dict(lon=15, lat=52),
            projection_type="azimuthal equal area",
            showland=True,
            showcountries=False,
        ),
        height=1080,
        width=1200


    )
    fig.write_image(f"../img/Commission/maps/specialization/full_count//{dt}.jpg")
    # pio.write_image(fig, folder, format='png')

## CAT DT SPEC GRAPHS

In [153]:
import plotly.express as px
# df_spec_cat_dt = pd.read_excel("../img/Commission/spec_fractional.xlsx", sheet_name="cat-dt", index_col=0)
df_spec_cat_dt = pd.read_excel("../img/old_commission/specialization copy.xlsx", sheet_name="cat-dt", index_col=0)
df_spec_cat_dt = df_spec_cat_dt.fillna(0)
df_spec_cat_dt = df_spec_cat_dt.loc[lst_eu, :]
df_spec_cat_dt = df_spec_cat_dt.apply(np.vectorize(transform_discrete_value))

for dt in df_spec_cat_dt.columns:
    ordered_index = reorder_values(df_spec_cat_dt, dt)
    fig = px.choropleth(
        locations=ordered_index,
        color=df_spec_cat_dt.loc[ordered_index, dt],
        color_discrete_map={"Under-specialized (RTA < 0.75)":"#2ECBE9", "Not-specialized (0.75 <= RTA <= 1.25)":"#128FC8", "Specialized (RTA > 1.25)":"#00468B"},
        locationmode="country names",
    )

    fig.update_layout(
        template='simple_white',
        font=dict(size=18),
        showlegend=True,

        legend=dict
            (
            title="Specialization",
            yanchor="top",
            y=1,
            xanchor="left",
            x=0.048,
            bordercolor="Black",
            borderwidth=2
            ),
        geo=go.layout.Geo(
            scope="world",
            landcolor='lightgray',
            projection_scale=float(6.1),
            center=dict(lon=15, lat=52),
            projection_type="azimuthal equal area",
            showland=True,
            showcountries=False,
        ),
        height=1080,
        width=1200


    )
    fig.write_image(f"../img/Commission/maps/specialization/full_count/{dt}.jpg")
    # pio.write_image(fig, folder, format='png')

## SDG SPEC GRAPHS

In [155]:
import plotly.express as px
# df_spec_sdg = pd.read_excel("../img/Commission/spec_fractional.xlsx", sheet_name="sdg", index_col=0)
df_spec_sdg = pd.read_excel("../img/old_commission/specialization copy.xlsx", sheet_name="sdg", index_col=0)
df_spec_sdg = df_spec_sdg.fillna(0)
df_spec_sdg = df_spec_sdg.loc[lst_eu, :]
df_spec_sdg = df_spec_sdg.apply(np.vectorize(transform_discrete_value))

for sdg in lst_sdg:
    ordered_index = reorder_values(df_spec_sdg, sdg)
    fig = px.choropleth(
        locations=ordered_index,
        color=df_spec_sdg.loc[ordered_index, sdg],
        color_discrete_map={"Under-specialized (RTA < 0.75)":"#2ECBE9", "Not-specialized (0.75 <= RTA <= 1.25)":"#128FC8", "Specialized (RTA > 1.25)":"#00468B"},
        locationmode="country names",
    )

    fig.update_layout(
        template='simple_white',
        font=dict(size=18),
        showlegend=True,

        legend=dict
            (
            title="Specialization",
            yanchor="top",
            y=1,
            xanchor="left",
            x=0.048,
            bordercolor="Black",
            borderwidth=2
            ),
        geo=go.layout.Geo(
            scope="world",
            landcolor='lightgray',
            projection_scale=float(6.1),
            center=dict(lon=15, lat=52),
            projection_type="azimuthal equal area",
            showland=True,
            showcountries=False,
        ),
        height=1080,
        width=1200


    )
    fig.write_image(f"../img/Commission/maps/specialization/full_count/SDG/{sdg}.jpg")
    # pio.write_image(fig, folder, format='png')