# Dimensionality Reduction
This notebook contains sample code for dimensionality reduction

## Code for one matrix of data (e.g. one subject)

In [1]:
import numpy as np
from sklearn.decomposition import PCA
import hypertools as hyp
# import matplotlib.pyplot as plt

In [2]:
# Make fake data (or load data)
mean = np.array([1.0, 2.0, 3.0, 1.0, 2.0, 3.0])
cov = np.array([[1.0, 0.1, 0.1, 0.9, 0.1, 0.1],
                [0.1, 1.0, 0.1, 0.1, 0.9, 0.1],
                [0.1, 0.1, 1.0, 0.1, 0.1, 0.9],
                [0.9, 0.1, 0.1, 1.0, 0.1, 0.1],
                [0.1, 0.9, 0.1, 0.1, 1.0, 0.1],
                [0.1, 0.1, 0.9, 0.1, 0.1, 1.0]
               ])
X = np.random.multivariate_normal(mean,cov,100)

In [3]:
# Perform PCA
pca = PCA(n_components=X.shape[1])
trans_X = pca.fit_transform(X)
var_ex = pca.explained_variance_ratio_

In [4]:
# Plot the variance explained by as a function of components
# plt.plot(range(1,len(var_ex)+1), var_ex, 'k.-')
# plt.ylabel('Variance Explained')
# plt.xlabel('Component Number')
# plt.show()

In [5]:
# Perform classification on transformed data
# Note: you should slice the data to keep the first k components

## Code for multiple sets of data (e.g. multiple subjects)

In [6]:
# Make fake data (or load data)
# Note: Group data should be stored in a list called "dataset"
#       and each entry should be a t by n matrix where t is a
#       some number of timepoints and n is number of features

mean = np.array([1.0, 2.0, 3.0, 1.0, 2.0, 3.0])
cov = np.array([[1.0, 0.1, 0.1, 0.9, 0.1, 0.1],
                [0.1, 1.0, 0.1, 0.1, 0.9, 0.1],
                [0.1, 0.1, 1.0, 0.1, 0.1, 0.9],
                [0.9, 0.1, 0.1, 1.0, 0.1, 0.1],
                [0.1, 0.9, 0.1, 0.1, 1.0, 0.1],
                [0.1, 0.1, 0.9, 0.1, 0.1, 1.0]
               ])
X = np.random.multivariate_normal(mean,cov,100)
# R = np.random.normal(loc=0.0, scale=1.0, size=[6,6])

# Create "subjects" by randomly rotating and stretching X
dataset = []
for i in range(5):
    dataset.append(X @ np.random.normal(loc=0.0, scale=1.0, size=[6, 6]))

### PCA followed by Hyperalignment

In [7]:
# Perform PCA transformation ('T')
dataset_T = []
for data in dataset:
    pca = PCA(n_components=data.shape[1])
    dataset_T.append(pca.fit_transform(data))

In [8]:
# Perform hyperalignment ('A') iteratively adding PCA dimensions
dataset_TA = {}
for k in range(1,dataset_T[0].shape[1]+1):
    dataset_TA[k] = hyp.align(list(np.array(dataset_T)[:,:,0:k]))

### Hyperalignment followed by PCA

In [9]:
# Perform hyperalignment ('A')
dataset_A = hyp.align(dataset)

In [10]:
# Concatenate data to perform single PCA across subjects
all_data_A = np.concatenate(dataset_A)

In [11]:
# Perform PCA transformation ('T')
pca = PCA(n_components=all_data_A.shape[1])
all_data_AT = pca.fit_transform(all_data_A)
var_ex = pca.explained_variance_ratio_
dataset_AT = np.split(all_data_AT, len(dataset))

In [12]:
# # Plot the variance explained by as a function of components
# plt.plot(range(1,len(var_ex)+1), var_ex, 'k.-')
# plt.ylabel('Variance Explained')
# plt.xlabel('Component Number')
# plt.title('PCA on Hyperaligned Data')
# plt.show()

## Perform Classification on Lower Dimensional Data

In [None]:
# Insert classification code here