## Notebook to vizualise the reduction of models activations

In [None]:
import os
import pandas as pd
import umap
import hdbscan
from sklearn.cluster import AgglomerativeClustering
from sklearn.decomposition import PCA
from sklearn.cluster import AgglomerativeClustering
from sklearn.manifold import LocallyLinearEmbedding
from sklearn.decomposition import FastICA 
from sklearn import manifold
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt

### Reading data

In [None]:
MODEL_NAME = "bert-base-cased"
PROJECT_PATH = "/neurospin/unicog/protocols/IRMf/LePetitPrince_Pallier_2018/LePetitPrince/"
DATA_PATH = os.path.join(PROJECT_PATH, 'data', 'stimuli-representations')
LANGUAGE = "english"
SEED=1111

In [None]:
paths = sorted(glob.glob(os.path.join(DATA_PATH, LANGUAGE, MODEL_NAME, 'activations*.csv')))

In [None]:
dataframes = [pd.read_csv(path) for path in paths]

In [None]:
data = np.vstack([df.values for df in dataframes])

### Plotting function

In [None]:
def plot_reduction(data, plot_type='2D', reduction_type='', **kwargs):
    """kwargs includes: 
     - s=5
     - c='density'
     - cmap='Spectral'
    """
    plt.close('all')
    if plot_type=='2D':
        plt.scatter(data[:, 0], data[:, 1], **kwargs)
    elif plot_type=='3D':
        plt.scatter(data[:, 0], data[:, 1], data[:, 2], **kwargs)
    plt.title('Activations embedding for {}'.format(reduction_type), fontsize=24);
    plt.show()

In [None]:
kwargs = {
    's'=5,
    'c':'density',
    'cmap':'Spectral'
}

### PCA

In [None]:
pca_2D = PCA(n_components=2, random_state=SEED)
pca_result_2D = pca_2D.fit_transform(data)
pca_3D = PCA(n_components=3, random_state=SEED)
pca_result_3D = pca_3D.fit_transform(data)

In [None]:
plot_reduction(pca_result_2D, plot_type='2D', reduction_type='PCA', **kwargs)

In [None]:
plot_reduction(pca_result_3D, plot_type='3D', reduction_type='PCA', **kwargs)

### ICA

In [None]:
ica_2D = FastICA(n_components=2, random_state=SEED)
ica_result_2D = ica_2D.fit_transform(data)
ica_3D = FastICA(n_components=3, random_state=SEED)
ica_result_3D = ica_3D.fit_transform(data)

In [None]:
plot_reduction(ica_result_2D, plot_type='2D', reduction_type='ICA', **kwargs)

In [None]:
plot_reduction(ica_result_3D, plot_type='3D', reduction_type='ICA', **kwargs)

### UMAP

In [None]:
umap_2D = umap.UMAP(n_components=2, random_state=SEED, n_neighbors=20, min_dist=0.1)
umap_result_2D = umap_2D.fit_transform(data)
umap_3D = umap.UMAP(n_components=3, random_state=SEED, n_neighbors=20, min_dist=0.1)
umap_result_3D = umap_3D.fit_transform(data)

In [None]:
plot_reduction(umap_result_2D, plot_type='2D', reduction_type='UMAP', **kwargs)

In [None]:
plot_reduction(umap_result_3D, plot_type='3D', reduction_type='UMAP', **kwargs)

### AGGLOMERATIVE CLUSTERING

In [None]:
def agglomerative_clustering(data, linkage):
    ac_2D = AgglomerativeClustering(n_clusters=2, random_state=SEED, linkage=linkage)
    ac_result_2D = np.zeros((data.shape[0], 2))
    ac_2D.fit(data.T)
    for component in range(2):
        ac_result_2D[:, component] = np.mean(data[:, ac_2D.labels_==component], axis=1) 
    ac_3D = AgglomerativeClustering(n_clusters=3, random_state=SEED, linkage=linkage)
    ac_result_3D = np.zeros((data.shape[0], 3))
    ac_3D.fit(data.T)
    for component in range(3):
        ac_result_3D[:, component] = np.mean(data[:, ac_3D.labels_==component], axis=1) 
    return ac_result_2D, ac_result_3D

#### WARD

In [None]:
ward_result_2D, ward_result_3D = agglomerative_clustering(data, "ward")

In [None]:
plot_reduction(ward_result_2D, plot_type='2D', reduction_type='AgglomerativeClustering - WARD', **kwargs)

In [None]:
plot_reduction(ward_result_3D, plot_type='3D', reduction_type='AgglomerativeClustering - WARD', **kwargs)

#### AVERAGE

In [None]:
average_result_2D, average_result_3D = agglomerative_clustering(data, "average")

In [None]:
plot_reduction(average_result_2D, plot_type='2D', reduction_type='AgglomerativeClustering - AVERAGE', **kwargs)

In [None]:
plot_reduction(average_result_3D, plot_type='3D', reduction_type='AgglomerativeClustering - AVERAGE', **kwargs)

#### COMPLETE

In [None]:
complete_result_2D, complete_result_3D = agglomerative_clustering(data, "complete")

In [None]:
plot_reduction(complete_result_2D, plot_type='2D', reduction_type='AgglomerativeClustering - COMPLETE', **kwargs)

In [None]:
plot_reduction(complete_result_3D, plot_type='3D', reduction_type='AgglomerativeClustering - COMPLETE', **kwargs)

#### SINGLE

In [None]:
single_result_2D, single_result_3D = agglomerative_clustering(data, "single")

In [None]:
plot_reduction(single_result_2D, plot_type='2D', reduction_type='AgglomerativeClustering - SINGLE', **kwargs)

In [None]:
plot_reduction(single_result_3D, plot_type='3D', reduction_type='AgglomerativeClustering - SINGLE', **kwargs)

### MANIFOLD REDUCTION

#### ISOMAP

In [None]:
isomap_2D = manifold.ISOMAP(n_components=2, random_state=SEED, n_neighbors=20)
isomap_result_2D = isomap_2D.fit_transform(data)
isomap_3D = manifold.ISOMAP(n_components=3, random_state=SEED, n_neighbors=20)
umap_result_3D = isomap_3D.fit_transform(data)

In [None]:
plot_reduction(isomap_result_2D, plot_type='2D', reduction_type='ISOMAP', **kwargs)

In [None]:
plot_reduction(isomap_result_3D, plot_type='3D', reduction_type='ISOMAP', **kwargs)

#### LOCALLY LINEAR EMBEDDING

In [None]:
lle_2D = LocallyLinearEmbedding(n_components=2, random_state=SEED, n_neighbors=20, method='standard') #‘standard’, ‘hessian’, ‘modified’ or ‘ltsa’
lle_result_2D = lle_2D.fit_transform(data)
lle_3D = LocallyLinearEmbedding(n_components=3, random_state=SEED, n_neighbors=20, method='standard')
lle_result_3D = lle_3D.fit_transform(data)

In [None]:
plot_reduction(lle_result_2D, plot_type='2D', reduction_type='LOCALLY LINEAR EMBEDDING', **kwargs)

In [None]:
plot_reduction(lle_result_3D, plot_type='3D', reduction_type='LOCALLY LINEAR EMBEDDING', **kwargs)

#### MDS

In [None]:
mds_2D = manifold.MDS(n_components=2, random_state=SEED)
mds_result_2D = mds_2D.fit_transform(data)
mds_3D = manifold.MDS(n_components=3, random_state=SEED)
mds_result_3D = mds_3D.fit_transform(data)

In [None]:
plot_reduction(mds_result_2D, plot_type='2D', reduction_type='MDS', **kwargs)

In [None]:
plot_reduction(mds_result_3D, plot_type='3D', reduction_type='MDS', **kwargs)

#### SPECTRAL EMBEDDING

In [None]:
se_2D = manifold.SpectralEmbedding(n_components=2, random_state=SEED, n_neighbors=20)
se_result_2D = se_2D.fit_transform(data)
se_3D = manifold.SpectralEmbedding(n_components=3, random_state=SEED, n_neighbors=20)
se_result_3D = se_3D.fit_transform(data)

In [None]:
plot_reduction(se_result_2D, plot_type='2D', reduction_type='SPECTRAL EMBEDDING', **kwargs)

In [None]:
plot_reduction(se_result_3D, plot_type='3D', reduction_type='SPECTRAL EMBEDDING', **kwargs)