In [1]:
import os
import sys
import pickle
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from glob import glob

import torch
import torch.nn as nn
import torch.nn.functional as F

sys.path.append("../src/SpaceMAP/")
from _spacemap import SpaceMAP

sys.path.append("../run/")
from experimet_config import Config

cfg = Config()

In [2]:
use_columns = ['id', 'folder_name'] + cfg.keyword_attribute

merged_df = pd.read_csv('../data/table/merged.csv')[use_columns].dropna().reset_index(drop=True)
merged_df.head()

Unnamed: 0,id,folder_name,Voluminous_Smart,Powerful_Delicate,Linear_Curvy,Functional_Decorative,Robust_Flexible,Calm_Dynamic,Realistic_Romantic,Elegant_Cute,Sophisticated_Youthful,Luxurious_Approachable,Formal_Everyday,Strict_Friendly,Uniform_Free,Special_Everyday
0,1,001_AR_Stelvio_Q4_2017,4.52,4.48,5.0,4.28,4.48,3.48,4.28,3.16,3.52,3.28,3.96,3.88,4.2,4.16
1,2,002_AR_Tonale_CPT_2019,4.0,3.4,4.65,5.0,4.4,3.85,5.05,3.5,3.25,2.6,3.7,3.35,4.2,3.55
2,4,004_AU_Q4_etron_Sline_2021,2.681818,2.636364,3.681818,3.909091,3.409091,4.545455,4.363636,2.954545,3.318182,2.818182,3.681818,3.454545,3.772727,3.863636
3,5,005_AU_Q7_2019,4.478261,3.956522,4.434783,4.565217,4.347826,2.956522,4.217391,2.782609,3.086957,2.608696,3.217391,3.434783,3.304348,3.608696
4,6,006_BM_X7_2019,2.6,2.95,3.4,3.85,3.2,3.35,3.05,2.85,2.7,2.85,3.75,3.65,3.3,4.15


In [3]:
keyword_array = merged_df[cfg.keyword_attribute].values
img_paths = [f'../../../data/img/{i}.jpg' for i in merged_df['folder_name'].values]


In [6]:
# let's see the decomposed latent space

# 1. PCA
from sklearn.decomposition import PCA
pca = PCA(n_components=2)
reduced_latent_codes = pca.fit_transform(keyword_array)
np.save('../outputs/decompose-analysis/keyword/reduced_keyword-PCA.npy', reduced_latent_codes)

# 2. tSNE
from sklearn.manifold import TSNE
tsne = TSNE(n_components=2, perplexity=30.0, n_iter=1000)
reduced_latent_codes = tsne.fit_transform(keyword_array)
np.save('../outputs/decompose-analysis/keyword/reduced_keyword-tSNE.npy', reduced_latent_codes)

# 3. UMAP
from umap import UMAP
umap = UMAP(n_neighbors=15, min_dist=0.1, metric='euclidean')
reduced_latent_codes = umap.fit_transform(keyword_array)
np.save('../outputs/decompose-analysis/keyword/reduced_keyword-UMAP.npy', reduced_latent_codes)

"""
# 4. SpaceMAP
spacemap = SpaceMAP(verbose=False)
reduced_latent_codes = spacemap.fit_transform(keyword_array)
np.save('../outputs/decompose-analysis/keyword/reduced_keyword-spaceMAP.npy', reduced_latent_codes)
"""

"\n# 4. SpaceMAP\nspacemap = SpaceMAP(verbose=False)\nreduced_latent_codes = spacemap.fit_transform(keyword_array)\nnp.save('../outputs/decompose-analysis/keyword/reduced_keyword-spaceMAP.npy', reduced_latent_codes)\n"

In [7]:
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import HoverTool, ColumnDataSource, CategoricalColorMapper

methods = [
    # 'spaceMAP',
    'UMAP',
    'PCA',
    'tSNE',
]

for method in methods:
    reduced_latent_codes = np.load(f'../outputs/decompose-analysis/keyword/reduced_keyword-{method}.npy')

    digits_df = pd.DataFrame(reduced_latent_codes, columns=['x', 'y'])
    digits_df['image'] = img_paths
    digits_df['digit'] = digits_df['image'].apply(lambda x: x.split('/')[-1].split('.')[0])

    datasource = ColumnDataSource(digits_df)

    #color_mapping = CategoricalColorMapper(factors=[str(9 - x) for x in digits.target_names],
    #                                       palette=Spectral10)

    plot_figure = figure(
        title='{} projection of the Digits dataset'.format(method),
        outer_width=600,
        outer_height=600,
        tools=('pan, wheel_zoom, reset')
    )

    plot_figure.add_tools(HoverTool(tooltips="""
    <div>
        <div>
            <img src='@image' style='float: left; margin: 5px 5px 5px 5px; width:300px;'/>
        </div>
        <div>
            <span style='font-size: 16px; color: #224499'>Digit:</span>
            <span style='font-size: 18px'>@digit</span>
        </div>
    </div>
    """))

    plot_figure.circle(
        'x',
        'y',
        source=datasource,
        # color=dict(field='digit', transform=color_mapping),
        line_alpha=0.6,
        fill_alpha=0.6,
        size=4
    )
    show(plot_figure)

    from bokeh.io import save

    save(plot_figure, '../outputs/decompose-analysis/keyword/keyword-mapping-{}.html'.format(method))

  save(plot_figure, '../outputs/decompose-analysis/keyword/keyword-mapping-{}.html'.format(method))
  save(plot_figure, '../outputs/decompose-analysis/keyword/keyword-mapping-{}.html'.format(method))
  save(plot_figure, '../outputs/decompose-analysis/keyword/keyword-mapping-{}.html'.format(method))
  save(plot_figure, '../outputs/decompose-analysis/keyword/keyword-mapping-{}.html'.format(method))
  save(plot_figure, '../outputs/decompose-analysis/keyword/keyword-mapping-{}.html'.format(method))
  save(plot_figure, '../outputs/decompose-analysis/keyword/keyword-mapping-{}.html'.format(method))
