In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import networkx as nx

from sklearn.manifold import TSNE, MDS
from sklearn.decomposition import PCA

In [None]:
europe = [
    "Albania", "Austria", "Belarus", "Belgium", "Bosnia and Herzegovina", "Bulgaria",
    "Croatia", "Cyprus", "Czechia", "Denmark", "Estonia", "Finland", "France", 
    "Germany", "Greece", "Hungary", "Iceland", "Ireland", "Italy", "Kosovo", 
    "Latvia", "Lithuania", "Luxembourg", "Malta", "Moldova", "Montenegro", 
    "Netherlands", "North Macedonia", "Norway", "Poland", "Portugal", "Romania", 
    "Russia", "Serbia", "Slovakia", "Slovenia", "Spain", "Sweden", "Switzerland", 
    "Ukraine", "United Kingdom"
]

In [None]:
poverty_data = pd.read_csv("../data/poverty-share-on-less-than-30-per-day.csv")
poverty_data.columns = ["Entity", "Code", "Year", "Percentage"]

In [None]:
df = poverty_data.copy()
#df = df[df["Entity"].isin(europe)]
df = df.pivot(index="Entity", columns="Year", values="Percentage")
df = df.dropna()

df.head()

In [None]:
df_std = df.copy()
for col in df:
    c = df[[col]]
    mean = c.mean()
    std = c.std()
    
    c = (c - mean) / std
    df_std[[col]] = c
    
df_std.head()

In [None]:
pca = PCA(n_components=df.shape[1])
dfs = [df, df_std]
fig, ax = plt.subplots(1, 2, figsize=(12, 5))

for i, dataframe in enumerate(dfs):

    res = pca.fit(dataframe)

    loadings = pd.DataFrame(
        res.components_.T,
        columns=["PC%s" % _ for _ in range(len(res.components_))],
        index=df.columns,
    )

    #ax[i].plot(pca.explained_variance_ratio_)
    ax[i].scatter(res.components_[:, 0], res.components_[:, 1])
    
ax[0].set_title("Original data")
ax[1].set_title("Standardized data")
plt.suptitle("PCA with 2 components")
plt.show()

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(12, 5))

random_states = [0, 42]

for i, seed in enumerate(random_states):

    mds = MDS(n_components=2, random_state=seed)
    X = mds.fit_transform(df)
    x, y = X[:, 0], X[:, 1]
    ax[i].scatter(x, y)

    ax[i].set_title(f"Random Seed = {seed}")

fig.suptitle("MDS with 2 components")
plt.show()

In [None]:
fig, ax = plt.subplots(2, 3, figsize=(14, 10))
perplex = [5, 30, 50]
random_states = [0, 42]

for i, seed in enumerate(random_states):
    for j, p in enumerate(perplex):
        tsne = TSNE(n_components=2, perplexity=p, random_state=seed, init="random")
        X = tsne.fit_transform(df)
        x, y = X[:, 0], X[:, 1]
        
        ax[i, j].scatter(x, y)
        
        if i == 0:
            ax[i, j].set_title(f"Perplexity = {p}")
        if j == 0:
            ax[i, j].set_ylabel(f"Random seed = {seed}")
            
plt.suptitle("t-SNE on 2 components")

In [None]:
n_sdgs = 17
sdg_network = np.zeros((n_sdgs, n_sdgs))

idx_mask = np.array(
    [
        [0, 1],
        [0, 2],
        [0, 3],
        [0, 5],
        [0, 9],
        [1, 2],
        [1, 5],
        [1, 9],
        [2, 3],
        [2, 5],
        [2, 16],
        [3, 4],
        [3, 7],
        [3, 8],
        [4, 7],
        [4, 9],
        [4, 16],
        [5, 9],
        [6, 10],
        [6, 11],
        [6, 12],
        [6, 13],
        [6, 14],
        [7, 8],
        [7, 11],
        [8, 10],
        [9, 16],
        [10, 15],
        [11, 12],
        [11, 13],
        [11, 14],
        [12, 13],
        [12, 14],
        [12, 16],
        [13, 14],
        [15, 16],
    ]
)

sdg_network[idx_mask[:, 0], idx_mask[:, 1]] = 1
G = nx.from_numpy_array(sdg_network)
l = {n: n + 1 for n in range(17)}

In [None]:
fig, ax = plt.subplots(1, figsize=(8, 6))
radial = nx.circular_layout(G)
nx.draw(G, pos=radial, labels=l, ax=ax, with_labels=True)
plt.title("Radial layout")
plt.show()

In [None]:
seeds = [0, 42]

fig, ax = plt.subplots(1, 2, figsize=(12, 6))

for i, s in enumerate(seeds):
    init_pos = nx.random_layout(G, seed=s)
    kk = nx.kamada_kawai_layout(G, pos=init_pos)
    nx.draw(G, labels=l, ax=ax[i], pos=kk)
    
    ax[i].set_title(f"seed = {s}")
    
plt.suptitle("Kamada-Kawai layout with two different seeds")
plt.show()

In [None]:
seeds = [0, 42]

fig, ax = plt.subplots(1, 2, figsize=(12, 6))

for i, s in enumerate(seeds):
    np.random.seed(s)
    fr = nx.fruchterman_reingold_layout(G)
    nx.draw(G, labels=l, ax=ax[i], pos=fr)
    
    ax[i].set_title(f"seed = {s}")
    
plt.suptitle("Fruchter-Reingold layout with two different seeds")
plt.show()