In [None]:
import matplotlib.pyplot as plt
from matplotlib import colormaps
import pandas as pd
import numpy as np
import seaborn as sns
import plotly.express as px

from sklearn.manifold import TSNE, MDS
from sklearn.decomposition import PCA

In [None]:
europe = [
    "Albania", "Austria", "Belarus", "Belgium", "Bosnia and Herzegovina", "Bulgaria",
    "Croatia", "Cyprus", "Czechia", "Denmark", "Estonia", "Finland", "France", 
    "Germany", "Greece", "Hungary", "Iceland", "Ireland", "Italy", "Kosovo", 
    "Latvia", "Lithuania", "Luxembourg", "Malta", "Moldova", "Montenegro", 
    "Netherlands", "North Macedonia", "Norway", "Poland", "Portugal", "Romania", 
    "Russia", "Serbia", "Slovakia", "Slovenia", "Spain", "Sweden", "Switzerland", 
    "Ukraine", "United Kingdom"
]

In [None]:
poverty_data = pd.read_csv("../data/poverty-share-on-less-than-30-per-day.csv")
poverty_data.columns = ["Entity", "Code", "Year", "Percentage"]

In [None]:
df = poverty_data.copy()
#df = df[df["Entity"].isin(europe)]
df = df.pivot(index="Entity", columns="Year", values="Percentage")
df = df.dropna()

df.head()

In [None]:
df_std = df.copy()
for col in df:
    c = df[[col]]
    mean = c.mean()
    std = c.std()
    
    c = (c - mean) / std
    df_std[[col]] = c
    
df_std.head()

In [None]:
pca = PCA(n_components=df.shape[1])
dfs = [df, df_std]
fig, ax = plt.subplots(1, 2, figsize=(12, 5))

for i, dataframe in enumerate(dfs):

    res = pca.fit(dataframe)

    loadings = pd.DataFrame(
        res.components_.T,
        columns=["PC%s" % _ for _ in range(len(res.components_))],
        index=df.columns,
    )

    #ax[i].plot(pca.explained_variance_ratio_)
    ax[i].scatter(res.components_[:, 0], res.components_[:, 1])

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(12, 5))

random_states = [0, 42]

for i, seed in enumerate(random_states):

    mds = MDS(n_components=2, random_state=seed)
    X = mds.fit_transform(df)
    x, y = X[:, 0], X[:, 1]
    ax[i].scatter(x, y)

    ax[i].set_title(f"Random Seed = {seed}")

plt.show()

In [None]:
fig, ax = plt.subplots(2, 3, figsize=(14, 10))
perplex = [5, 30, 50]
random_states = [0, 42]

for i, seed in enumerate(random_states):
    for j, p in enumerate(perplex):
        tsne = TSNE(n_components=2, perplexity=p, random_state=seed)
        X = tsne.fit_transform(df)
        x, y = X[:, 0], X[:, 1]
        
        ax[i, j].scatter(x, y)
        
        if i == 0:
            ax[i, j].set_title(f"Perplexity = {p}")
        if j == 0:
            ax[i, j].set_ylabel(f"Random seed = {seed}")