### Mounting Google Drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
cd 'drive/MyDrive/IIT J Summer Internship 2022/Code/Neural-Uncertainty-Representation/'

### Importing necessary libraries

In [None]:
!pip install phate
!pip install scprep

In [105]:
import numpy as np
import phate as ph
import pandas as pd
import scprep as scp
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
import os

In [10]:
import TPHATE

### Importing Data

#### Young Subject's Data

We import the data of a young subject's 3D-Tensor Data of dimensions (TR, Voxel, Number of Subjects) for the 5 different ROIs as follows:
- **Prefrontal areas:** dlPFC, vlPFC, lOFC, dmPFC, mPFC, mOFC
- **Default-mode areas:** mPFC, ACC, PCC, Precuneus, mOFC
- **Sensory areas:** VC
- **Multisensory area:** pSTS
- **Some other areas:** TP, IPL, mCC, Put, PCG, Nac, INS



In [115]:
file_names_young = [name for name in os.listdir('/content/drive/MyDrive/IIT J Summer Internship 2022/Hitchcock/Data/YOUNG/Voxel_BOLD/Numpy') if name.endswith("npy") and ("537" not in name) and ("(1)" not in name)]
file_names_young.sort() # Else the file names of young and old differed causing issues with mis-merge of data i.e. different ROIs data is merged.

In [116]:
data_young = list()
for file_name in file_names_young:
  data_young.append(np.load('/content/drive/MyDrive/IIT J Summer Internship 2022/Hitchcock/Data/YOUNG/Voxel_BOLD/Numpy/'+file_name))

We now find the BOLD data for all voxel and all the TRs averaged over the subjects.

In [117]:
data_avg_young = [np.mean(dat, axis=2) for dat in data_young]

In [118]:
data_avg_young[-1].shape

(189, 515)

#### Old Subject's Data

We import the data of a old subject's 3D-Tensor Data of dimensions (TR, Voxel, Number of Subjects) for the 5 different ROIs as follows:
- **Prefrontal areas:** dlPFC, vlPFC, lOFC, dmPFC, mPFC, mOFC
- **Default-mode areas:** mPFC, ACC, PCC, Precuneus, mOFC
- **Sensory areas:** VC
- **Multisensory area:** pSTS
- **Some other areas:** TP, IPL, mCC, Put, PCG, Nac, INS



In [119]:
file_names_old = [name for name in os.listdir('/content/drive/MyDrive/IIT J Summer Internship 2022/Hitchcock/Data/OLD/Voxel_BOLD/Numpy') if name.endswith("npy") and ("537" not in name)]
file_names_old.sort()

In [120]:
data_old = list()
for file_name in file_names_old:
  data_old.append(np.load('/content/drive/MyDrive/IIT J Summer Internship 2022/Hitchcock/Data/OLD/Voxel_BOLD/Numpy/'+file_name))

In [121]:
data_avg_old = [np.mean(dat, axis=2) for dat in data_old]

In [122]:
data_avg_old[0].shape

(189, 1092)

### T-PHATE Representation Compared

Here we will try to cluster the young and old subjects based on their T-PHATE projection values to see how well the T-PHATE classifies the individuals of the two different age groups, in an unsupervised way without supplying the information about the labels(i.e. the age group membership).

We start by calculating the T-PHATE 2 dimensional embeddings for both young and old subjects.

In [123]:
def phate_rep(data):
  # We find the temporal affinity matrix --- The Last Function in the TPHATE.py File
  P_T = TPHATE.temporal_affinity_matrix(data)

  # We now instantiate the PHATE operator(matrix)/object of the PHATE class
  phate_op = TPHATE.PHATE(P_T, random_state=43, verbose=0)
  # PHATE requires Voxel x TR Data
  phate_embd =  phate_op.fit_transform(data)

  return phate_embd

In [124]:
phate_embd_young = [phate_rep(data) for data in data_avg_young]

In [None]:
phate_embd_old = [phate_rep(data) for data in data_avg_old]

We will label the young subjects as 1 and the old subjects are 0.

In [None]:
phate_embd_young_with_label = [np.concatenate([embd, np.ones((embd.shape[0],1))], axis = 1) for embd in phate_embd_young]

In [None]:
phate_embd_old_with_label = [np.concatenate([embd, np.zeros((embd.shape[0],1))], axis = 1) for embd in phate_embd_old]

In [None]:
merged_data = [np.concatenate([young_data, old_data], axis=0) for young_data, old_data in zip(phate_embd_young_with_label, phate_embd_old_with_label)]

In [None]:
ROI_names = [file_name.replace("data_","").replace(".npy","") for file_name in file_names_young]

In [None]:
i = 0
for data_old, data_young in zip(phate_embd_old, phate_embd_young):
  plt.figure(figsize=(10,10))
  plt.scatter(data_old[:,0], data_old[:,1], label="old")
  plt.scatter(data_young[:,0], data_young[:,1], label="young")
  plt.title(ROI_names[i])
  plt.ylabel('TPHATE 2')
  plt.xlabel('TPHATE 1')
  plt.legend()
  plt.savefig("Plots/BOTH/TPHATE/TPHATE-"+ROI_names[i]+".png")
  i += 1

### PCA Representation Compared

In [None]:
def pca_rep(data, n_comps=2):
  # We find the PCA embedding for the data
  pca = PCA(n_components=n_comps, random_state=43)
  principalComponents = pca.fit_transform(data)

  return principalComponents

In [None]:
pca_embd_young = [pca_rep(data) for data in data_avg_young]

In [None]:
pca_embd_old = [pca_rep(data) for data in data_avg_old]

In [None]:
i = 0
for data_old, data_young in zip(pca_embd_old, pca_embd_young):
  plt.figure(figsize=(10,10))
  plt.scatter(data_old[:,0], data_old[:,1], label="old")
  plt.scatter(data_young[:,0], data_young[:,1], label="young")
  plt.title(ROI_names[i])
  plt.ylabel('PCA 2')
  plt.xlabel('PCA 1')
  plt.legend()
  plt.savefig("Plots/BOTH/PCA/PCA-"+ROI_names[i]+".png")
  i += 1

### t-SNE Representation Comprared

In [None]:
def tSNE_rep(data, n_comps=2):
  # We find the tSNE embedding for the data
  tsne = TSNE(n_components=n_comps, learning_rate="auto", random_state=43, init="random")
  tsne_embd = tsne.fit_transform(data)

  return tsne_embd

In [None]:
tSNE_embds_young = [tSNE_rep(data) for data in data_avg_young]

In [None]:
tSNE_embds_old = [tSNE_rep(data) for data in data_avg_old]

In [None]:
i = 0
for data_old, data_young in zip(tSNE_embds_old, tSNE_embds_young):
  plt.figure(figsize=(10,10))
  plt.scatter(data_old[:,0], data_old[:,1], label="old")
  plt.scatter(data_young[:,0], data_young[:,1], label="young")
  plt.title(ROI_names[i])
  plt.ylabel('tSNE 2')
  plt.xlabel('tSNE 1')
  plt.legend()
  plt.savefig("Plots/BOTH/tSNE/tSNE-"+ROI_names[i]+".png")
  i += 1