In [1]:
import pandas as pd
import plotly.graph_objects as go
from itertools import combinations
from collections import Counter


In [2]:
# FOR REMINDER HOW TO MAKE A DIC FROM TWO COLUMNS
# dic = dict(zip(df_continent.Country, df_continent.Continent))

In [3]:
def unique_actors(df, map_filter='World'):
    """

    Args:
        df:
        map_filter:

    Returns:

    """
    return_list = []
    df_lst_country = pd.read_csv("../data/country_continent.csv", sep='\t')

    if map_filter == 'World':
        for ind, row in df.iterrows():
            return_list += row.CN.split(", ")

    elif map_filter == 'Europe':
        return_list = []
        for ind, row in df.iterrows():
            countries = row.CN.split(", ")
            for country in countries:

                series_country = df_lst_country.query('name == "%s"' % country)['continent']
                if series_country.any():
                    continent = series_country.values[0]
                    if continent == 'Europe':
                        return_list.append(country)

    elif map_filter == 'Continent':
        return_list = []
        for ind, row in df.iterrows():
            countries = row.CN.split(", ")
            for country in countries:
                series_country = df_lst_country.query('name == "%s"' % country)['continent']
                if series_country.any():
                    continent = series_country.values[0]
                    return_list.append(continent)


    return_list = list(set(return_list))
    if "" in return_list:
        return_list.remove("")
    return return_list

In [4]:
df = pd.read_pickle("../data/output/WoS_SDG.pkl")
df_lst_country = pd.read_csv("../data/country_continent.csv", sep='\t')
# df = df.iloc[1:1000]

In [5]:
dic = dict(zip(df_lst_country.name, df_lst_country.continent))
dic

{'Andorra': 'Europe',
 'United Arab Emirates': 'Asia',
 'Afghanistan': 'Asia',
 'Antigua and Barbuda': 'North America',
 'Anguilla': nan,
 'Albania': 'Europe',
 'Armenia': 'Europe',
 'Netherlands Antilles': nan,
 'Angola': 'Africa',
 'Antarctica': nan,
 'Argentina': 'South America',
 'American Samoa': nan,
 'Austria': 'Europe',
 'Australia': 'Oceania',
 'Aruba': nan,
 'Azerbaijan': 'Europe',
 'Bosnia and Herzegovina': 'Europe',
 'Barbados': 'North America',
 'Bangladesh': 'Asia',
 'Belgium': 'Europe',
 'Burkina Faso': nan,
 'Bulgaria': 'Europe',
 'Bahrain': 'Asia',
 'Burundi': 'Africa',
 'Benin': 'Africa',
 'Bermuda': nan,
 'Brunei': 'Asia',
 'Bolivia': 'South America',
 'Brazil': 'South America',
 'Bahamas': 'North America',
 'Bhutan': 'Asia',
 'Bouvet Island': nan,
 'Botswana': 'Africa',
 'Belarus': 'Europe',
 'Belize': 'North America',
 'Canada': 'North America',
 'Cocos [Keeling] Islands': nan,
 'Congo [DRC]': nan,
 'Central African Republic': 'Africa',
 'Congo [Republic]': nan,
 '

In [6]:
def give_lat_long(country: str) -> tuple:
    """

    Args:
        country:

    Returns:
        a tuple (latitude, longitude)
    """
    quer = df_lst_country.query('name == "%s"' % country)[['latitude', 'longitude']]
    return tuple(quer.values[0])

In [7]:
df_unique_actors = pd.DataFrame(unique_actors(df, map_filter='World'), columns=["Country"])
df_unique_actors.sort_values(by="Country", inplace=True)

total_mention = Counter()
for ind, row in df.iterrows():
    lst_actor_in_row = sorted(row.CN.split(", "))
    for mention in lst_actor_in_row:
        total_mention[mention] += 1
df_unique_actors['total'] = df_unique_actors['Country'].apply(lambda x: total_mention[x])

#  We need to merge longitude and latitude
# df_lst_country = df_lst_country.set_index('name')
df_unique_actors = df_unique_actors.merge(df_lst_country, left_on="Country", right_on='name')
# df_unique_actors = df_unique_actors.merge(df_lst_country.set_index('name', drop=True), left_on="Country", right_index=True)
df_unique_actors.drop(columns=['name', 'country', 'Unnamed: 0'], axis=1, inplace=True)
max_total = df_unique_actors.total.sum()
df_unique_actors['percentage'] = df_unique_actors['total'].apply(lambda x:round(100*float(x)/max_total, 2))
df_unique_actors

Unnamed: 0,Country,total,latitude,longitude,continent,percentage
0,Afghanistan,316,33.93911,67.709953,Asia,0.01
1,Albania,804,41.153332,20.168331,Europe,0.02
2,Algeria,4274,28.033886,1.659626,Africa,0.13
3,Andorra,56,42.546245,1.601554,Europe,0.00
4,Angola,147,-11.202692,17.873887,Africa,0.00
...,...,...,...,...,...,...
181,Venezuela,1126,6.42375,-66.589730,South America,0.03
182,Vietnam,11725,14.058324,108.277199,Asia,0.35
183,Yemen,420,15.552727,48.516388,Asia,0.01
184,Zambia,1376,-13.133897,27.849332,Africa,0.04


In [8]:
# EDGE
total_edge = Counter()

for ind, row in df.iterrows():
    lst_actor_in_row = sorted(row.CN.split(", "))
    for edge in list(combinations(lst_actor_in_row, 2)):

        if len(set(edge)) > 1:
            res = tuple(sorted(list(edge)))
            total_edge[edge] += 1

df_edge = pd.DataFrame.from_dict(total_edge, orient="index").reset_index()
df_edge = df_edge.rename(columns={'index': 'country_pair', 0: 'edge'})

df_edge["c1"] = df_edge.country_pair.apply(lambda x: x[0])
df_edge["c2"] = df_edge.country_pair.apply(lambda x: x[1])
df_edge = df_edge.drop(columns="country_pair")
df_unique_actors = df_unique_actors.nlargest(100, "total").reset_index(drop=True)
cond_c1 = df_edge["c1"].isin(df_unique_actors.Country.values.tolist())
cond_c2 = df_edge["c2"].isin(df_unique_actors.Country.values.tolist())
df_edge = df_edge[cond_c1 & cond_c2]
df_edge['latitude_c1'] = df_edge['c1'].apply(lambda x:give_lat_long(x)[0])
df_edge['longitude_c1'] = df_edge['c1'].apply(lambda x:give_lat_long(x)[1])
df_edge['latitude_c2'] = df_edge['c2'].apply(lambda x:give_lat_long(x)[0])
df_edge['longitude_c2'] = df_edge['c2'].apply(lambda x:give_lat_long(x)[1])
max_total_edge = df_edge["edge"].max()
print(max_total_edge)
df_edge.reset_index(drop=True, inplace=True)

358522


In [9]:
df_edge

Unnamed: 0,edge,c1,c2,latitude_c1,longitude_c1,latitude_c2,longitude_c2
0,1465,Canada,Saudi Arabia,56.130366,-106.346771,23.885942,45.079162
1,145355,Canada,United States,56.130366,-106.346771,37.09024,-95.712891
2,5753,Saudi Arabia,United States,23.885942,45.079162,37.09024,-95.712891
3,20056,Canada,Spain,56.130366,-106.346771,40.463667,-3.749220
4,315,Canada,Tunisia,56.130366,-106.346771,33.886917,9.537499
...,...,...,...,...,...,...,...
4803,1,Cuba,Kazakhstan,21.521757,-77.781167,48.019573,66.923684
4804,1,Egypt,Madagascar,26.820553,30.802498,-18.766947,46.869107
4805,1,Madagascar,Sudan,-18.766947,46.869107,12.862807,30.217636
4806,1,Malawi,Tunisia,-13.254308,34.301525,33.886917,9.537499


In [15]:
fig = go.Figure()
# Plotting
fig = go.Figure()
for i in range(len(df_unique_actors)):
    fig.add_trace(
        go.Scattergeo(
            locationmode="country names",
            text=df_unique_actors.percentage,
            lon=df_unique_actors.longitude,
            lat=df_unique_actors.latitude,
            marker=dict(
                size = 5*df_unique_actors.percentage,
                # size=(df_unique_actors.total / max_total) * 100,
                line_width=0,
                # color="blue",
                showscale=True,
                # colorscale options
                # 'Greys' | 'YlGnBu' | 'Greens' | 'YlOrRd' | 'Bluered' | 'RdBu' |
                # 'Reds' | 'Blues' | 'Picnic' | 'Rainbow' | 'Portland' | 'Jet' |
                # 'Hot' | 'Blackbody' | 'Earth' | 'Electric' | 'Viridis' |
                colorscale='YlGnBu',
                reversescale=True,
                color=[],
                colorbar=dict(
                thickness=15,
                title='Node Connections',
                xanchor='left',
                titleside='right'
                ),

            )
        )
    )

for i in range(len(df_edge)):
    fig.add_trace(
        go.Scattergeo(
            locationmode='country names',
            lon=[df_edge.longitude_c1[i], df_edge.longitude_c2[i]],
            lat=[df_edge.latitude_c1[i], df_edge.latitude_c2[i]],
            mode="lines",
            line=dict(width=5, color='red'),
            opacity=df_edge["edge"][i] / max_total_edge
        )
    )
fig.update_layout(
    template = 'plotly',
    title_text="Country analysis",
    showlegend=True,
    geo=go.layout.Geo(
        scope="world",
        projection_scale=1,
        # center=dict(lon=15, lat=50),
        center=dict(lon=0, lat=0),
        projection_type="natural earth",
        showland=True,
        landcolor='rgb(243, 243, 243)',
        countrycolor='rgb(204, 204, 204)',
        showcountries=True,
    ),
    height=1080,
    width=1980

)
fig.show()
name = "test"
fig.write_image("/media/kevin-main/My Passport/SDG/img/network_map/" + name +".jpeg")