## Initiate

In [1]:
import numpy as np
import pandas as pd
import sklearn as sk
import time

In [30]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.decomposition import TruncatedSVD
from sklearn.decomposition import PCA
from sklearn.decomposition import KernelPCA
from sklearn.decomposition import FastICA
from sklearn.decomposition import NMF
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.manifold import MDS
from sklearn.manifold import TSNE
from sklearn.manifold import Isomap
from sklearn.manifold import LocallyLinearEmbedding

In [3]:
%pip install umap-learn
from umap.umap_ import UMAP

Collecting umap-learn
  Downloading umap_learn-0.5.6-py3-none-any.whl.metadata (21 kB)
Collecting pynndescent>=0.5 (from umap-learn)
  Downloading pynndescent-0.5.13-py3-none-any.whl.metadata (6.8 kB)
Downloading umap_learn-0.5.6-py3-none-any.whl (85 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m85.7/85.7 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pynndescent-0.5.13-py3-none-any.whl (56 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.9/56.9 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pynndescent, umap-learn
Successfully installed pynndescent-0.5.13 umap-learn-0.5.6


In [4]:
from sklearn.metrics import r2_score

In [5]:
features = ['gdp', 'ineq', 'sav', 'edu', 'dr', 'rd', 'epi', 'frd', 'ls', 'corr', 'ecf', 'law', 'dem', 'prop', 'lce']

c_codes =['AUT', 'BEL', 'BGR', 'HRV', 'CYP', 'CZE', 'DNK', 'EST', 'FIN', 'FRA', 'DEU', 'GRC', 'HUN', 'IRL', 'ITA', 'LVA', 'LTU', 'LUX',
          'MLT', 'NLD', 'POL', 'PRT', 'ROU', 'SVK', 'SVN', 'ESP', 'SWE']
methods = ['TruncatedSVD', 'PCA', 'KernelPCA', 'ICA', 'NMF', 'UMAP', 'ISOMAP', 'LLE', 'Hessian Eigenmapping']

## Prepare Data  

In [6]:
df = pd.read_csv('data.csv', names=features)

df.index = c_codes


## Standartize Data

In [7]:
sc = MinMaxScaler()
df_scaled = sc.fit_transform(df.to_numpy())
df_scaled_df = pd.DataFrame(df_scaled, columns=features)
df_scaled_df.index = c_codes


## TruncatedSVD

In [8]:
st = time.time()

svd = TruncatedSVD(n_components=2)

svd_result = svd.fit_transform(df_scaled)

et = time.time()

elapsed_time = et - st
print('Execution time:', elapsed_time, 'seconds')


Execution time: 0.033203840255737305 seconds


In [9]:
df_inv_svd = svd.inverse_transform(svd_result)
r2_svd = r2_score(df_scaled, df_inv_svd)
print("coefficient_of_dermination ", r2_svd)

coefficient_of_dermination  0.65175874243164


In [45]:
print(svd.explained_variance_ratio_.cumsum())

[0.49686185 0.70368308]


# PCA

In [10]:
st = time.time()

pca = PCA(n_components=2)

pca_result = pca.fit_transform(df_scaled)

et = time.time()

elapsed_time = et - st
print('Execution time:', elapsed_time, 'seconds')


Execution time: 0.00770258903503418 seconds


In [11]:
df_inv_pca = pca.inverse_transform(pca_result)

r2_pca = r2_score(df_scaled, df_inv_pca)
print("coefficient_of_dermination ", r2_pca)
print(pca.explained_variance_ratio_.cumsum())

coefficient_of_dermination  0.6989125532430349
[0.6615273  0.74208863]


# Kernel PCA

In [16]:
st = time.time()

pca_kernel = KernelPCA(n_components = 2, kernel="rbf", gamma=0.04, fit_inverse_transform=True)

pca_kernel_result = pca_kernel.fit_transform(df_scaled)

et = time.time()

elapsed_time = et - st
print('Execution time:', elapsed_time, 'seconds')

Execution time: 0.01801443099975586 seconds


In [17]:
df_inv_pca_kernel = pca_kernel.inverse_transform(pca_kernel_result)

r2_pca_kernel = r2_score(df_scaled, df_inv_pca_kernel)
print("coefficient_of_dermination ", r2_pca_kernel)


coefficient_of_dermination  0.10221677020672756


# ICA

In [18]:
st = time.time()

ica = FastICA(n_components=2)

ica_result = ica.fit_transform(df_scaled)

et = time.time()
elapsed_time = et - st
print('Execution time:', elapsed_time, 'seconds')


Execution time: 0.0071108341217041016 seconds


In [19]:
df_inv_ica = ica.inverse_transform(ica_result)
r2_ica = r2_score(df_scaled, df_inv_ica)
print("coefficient_of_dermination ", r2_ica)

coefficient_of_dermination  0.6989125532430349


# NMF

In [20]:
st = time.time()

nmf = NMF(n_components=2)

nmf_result = nmf.fit_transform(df_scaled)

et = time.time()
elapsed_time = et - st
print('Execution time:', elapsed_time, 'seconds')


Execution time: 0.006576061248779297 seconds


In [21]:
df_inv_nmf = nmf.inverse_transform(nmf_result)
r2_nmf = r2_score(df_scaled, df_inv_nmf)
print("coefficient_of_dermination ", r2_nmf)

coefficient_of_dermination  0.6487691387777057


# UMAP

In [22]:
st = time.time()

umap = UMAP(n_components=2)

umap_result = umap.fit_transform(df_scaled)

et = time.time()
elapsed_time = et - st
print('Execution time:', elapsed_time, 'seconds')


Execution time: 11.789931535720825 seconds


In [23]:
df_inv_umap = umap.inverse_transform(umap_result)

r2_umap = r2_score(df_scaled, df_inv_umap)
print("coefficient_of_dermination ", r2_umap)

coefficient_of_dermination  0.1954955237414354


# Isomap

In [24]:
st = time.time()

isomap = Isomap(n_components=2)

isomap_result = isomap.fit_transform(df_scaled)

et = time.time()
elapsed_time = et - st
print('Execution time:', elapsed_time, 'seconds')


Execution time: 0.007590770721435547 seconds


In [25]:
print(isomap.reconstruction_error())
print(1-isomap.reconstruction_error())

0.24260365207748058
0.7573963479225194


# LLE

In [26]:
st = time.time()

lle = LocallyLinearEmbedding(n_components=2)

lle_result = lle.fit_transform(df_scaled)

et = time.time()
elapsed_time = et - st
print('Execution time:', elapsed_time, 'seconds')

Execution time: 0.00829172134399414 seconds


In [28]:
print(lle.reconstruction_error_)
print(1-lle.reconstruction_error_)

0.027827304515145403
0.9721726954848546


# Hessian Eigenmapping

In [43]:
st = time.time()

lle_h = LocallyLinearEmbedding(n_components=2, method = 'hessian', n_neighbors=6)

lle_h_result = lle_h.fit_transform(df_scaled)

et = time.time()
elapsed_time = et - st
print('Execution time:', elapsed_time, 'seconds')

Execution time: 0.03451061248779297 seconds


In [44]:
print(lle_h.reconstruction_error_)
print(1-lle_h.reconstruction_error_)

0.01386983477743742
0.9861301652225626
