#Imports

In [None]:
import numpy as np
from scipy.spatial.distance import cdist, squareform
from sklearn.cross_decomposition import CCA
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from scipy.special import softmax

import pandas as pd
import seaborn as sns
from google.colab import drive

drive.mount('/content/gdrive',force_remount=True)

# Load Data

In [None]:
with open('/content/gdrive/MyDrive/measures_marcos/tabular_data.npy', 'rb') as f:
    questionnaire_data = np.load(f)
with open('/content/gdrive/MyDrive/measures_marcos/tabular_names.npy', 'rb') as f:
    questionnaire_names = np.load(f)
with open('/content/gdrive/MyDrive/measures_marcos/InterModel_MetricData.npy', 'rb') as f:
    metric_data = np.load(f)

questionnaire_data = np.delete(questionnaire_data, (28,33), axis=0) #Delete missing subjects

In [None]:
plt.boxplot(metric_data)
plt.show()

## Normalize questionnaire data

In [None]:
min_tabular = [1,18,1,0,1,1,1,1,0,1,1,1,1,1,0,1,0,0,1,1,
               1,1,1,1,0,0,1,1,1,1,1,1,0,0,1,1,1,1,1,1,
               0,0,1,1,1,1,1,1,0,0,1,0] #Minimos valores posibles

max_tabular = [4,33,5,24,5,5,5,5,100,5,5,5,5,5,8,5,6,100,
               5,5,5,5,5,5,4.5,100,5,5,5,5,5,5,4,100,5,5,
               5,5,5,5,5,100,5,5,5,5,5,5,3,100,5,100] # Maximos valores posibles

norm_tabular = (questionnaire_data - min_tabular)
norm_tabular /= (np.array(max_tabular) - np.array(min_tabular))

# Reajuste para la pregunta sobre cafe
# El no tomar cafe (0) es más cercano a 24 horas (1) que haber tomado hace 1h (0.04)
norm_tabular[np.where(norm_tabular[:,3]==0)[0], 3] += 1

norm_tabular.shape, metric_data.shape

In [None]:
#subjects groups
g1 = [0, 2, 3, 4, 5, 9, 12, 13, 22, 25, 27, 32, 34, 38, 40, 41, 43, 44, 45, 46, 47]
g2 = [7, 8, 11, 14, 17, 19, 20, 21, 23, 24, 29, 33, 36, 39, 42, 48, 49]
g3 = [1, 6, 10, 15, 16, 18, 26, 28, 30, 31, 35, 37]
g_ = [g1,g2,g3]
group_ = np.zeros((50,1))
group_[g1] = 1
group_[g2] = 2
group_[g3] = 3

In [None]:
plt.pcolormesh(norm_tabular)
plt.colorbar()
plt.show()

plt.pcolormesh(metric_data)
plt.colorbar()
plt.show()

In [None]:
#np.corrcoef(X[:,0],X[:,2])[0,1]

def fro_p(X,Y): #X, Y \in NxM,
  return np.trace(X.T.dot(Y))

def cca_M(X,Y): # X \in N x P, Y \in N x M
  Cxx = X.T.dot(X)
  Cyy = Y.T.dot(Y)
  Cxy = X.T.dot(Y)
  return fro_p(Cxy,Cxy)/np.sqrt(fro_p(Cxx,Cxx)*fro_p(Cyy,Cyy))

def sig_feature(X, bias=0):
  N,P = X.shape
  sig_f = np.zeros((P))
  K_f = np.zeros((N,N,P))
  for p in range(P):
    Df = cdist(X[:,p].reshape(-1,1),X[:,p].reshape(-1,1))
    sig_f[p] = np.median(squareform(0.5*(Df+Df.T))) + bias
    K_f[:,:,p] = np.exp(-Df**2/(2*sig_f[p]**2))
  return K_f, sig_f



def cca_k(X, Y, n_components='full',normalize_ = True):
    N,P = X.shape
    N,Q = Y.shape


    if n_components == 'full':
        n_components = min(P,Q)

    #linear cca
    cca_X = CCA(n_components=n_components)

    if normalize_:
        scaler = MinMaxScaler() #StandardScaler()
        X = scaler.fit_transform(X)
        Y = scaler.fit_transform(Y)



    X_c,Y_c = cca_X.fit_transform(X, Y) #\in Real N x ncomponents
    #x_rotations_ \in Real P x ncomponents
    XY_c = X_c.T.dot(Y_c) #cross covariance after cca \in Real ncomponents x ncomponents
    XY_N = X_c.dot(Y_c.T)


    temp=1
    rel_x = softmax(np.sum(np.diag(np.diag(XY_c)).dot(np.abs(cca_X.x_rotations_.T)),axis=0)/temp)
    rel_y = softmax(np.sum(np.diag(np.diag(XY_c)).dot(np.abs(cca_X.y_rotations_.T)),axis=0)/temp)

    rel_x /= np.max(rel_x)
    rel_y /= np.max(rel_y)

    #kernel and sigma per feature
    K_fX,sig_X = sig_feature(X, bias=1e-8)
    K_fY,sig_Y = sig_feature(Y, bias=1e-8)

    #mean kernel for each feature
    K_fXX = K_fX.mean(axis=0) #in Real N x P
    K_fYY = K_fY.mean(axis=0) #in Real N x Q

    # print(sig_Y)
    # plt.pcolormesh(K_fYY)
    # plt.show()
    if normalize_:
      scaler = MinMaxScaler() #StandardScaler()
      K_fXX = scaler.fit_transform(K_fXX)
      K_fYY = scaler.fit_transform(K_fYY)

    #kernel cca
    cca_K = CCA(n_components=n_components)
    KX_c,KY_c = cca_K.fit_transform(K_fXX, K_fYY) #\in Real N x ncomponents
    KXY_c = KX_c.T.dot(KY_c) #cross covariance after cca
    KXY_N = KX_c.dot(KY_c.T)

    KXY_c = KX_c.T.dot(KY_c) #cross covariance after cca \in Real ncomponents x ncomponents

    Ktemp = 1
    Krel_x = np.sum(np.diag(np.diag(KXY_c)).dot(np.abs(cca_K.x_rotations_.T)),axis=0)#softmax(np.sum(np.diag(np.diag(KXY_c)).dot(np.abs(cca_K.x_rotations_.T)),axis=0) / Ktemp)
    Krel_y = np.sum(np.diag(np.diag(KXY_c)).dot(np.abs(cca_K.y_rotations_.T)),axis=0) #softmax(np.sum(np.diag(np.diag(KXY_c)).dot(np.abs(cca_K.y_rotations_.T)),axis=0) / Ktemp)

    Krel_x /= np.max(Krel_x)
    Krel_y /= np.max(Krel_y)


    return XY_c, XY_N, rel_x, rel_y, KXY_c, KXY_N, Krel_x, Krel_y, sig_X, sig_Y



In [None]:
# Set the parameters for the multidimensional Gaussian distribution
mean = np.zeros(5)  # 5-dimensional Gaussian, mean of 0 for each feature
covariance_matrix = np.array([[1, 0.5, 0.3, 0.2, 0.1],
                              [0.5, 2, 0.4, 0.3, 0.2],
                              [0.3, 0.4, 3, 0.5, 0.3],
                              [0.2, 0.3, 0.5, 0.8, 0.4],
                              [0.1, 0.2, 0.3, 0.4, 0.6]])  # Fixed covariance matrix

# Generate 100 samples from the Gaussian distribution
X = np.copy(norm_tabular) #np.random.multivariate_normal(mean, covariance_matrix, 100)

# Set new parameters for a 3-dimensional Gaussian
mean_3d = np.zeros(3)  # 3-dimensional Gaussian, mean of 0 for each feature
covariance_matrix_3d = np.array([[1, 0.5, 0.3],
                                 [0.5, 2, 0.4],
                                 [0.3, 0.4, 3]])  # Fixed covariance matrix for 3 features

# Generate 100 samples from the 3-dimensional Gaussian distribution
model_list =  ['EEGNet', 'KREEGNet', 'KCS-FCNet', 'DeepConvNet', 'ShallowConvNet', 'TCNet_fusion']
i = model_list.index('EEGNet')
Y = np.copy(metric_data)#[:,i:(i+1)*6] #np.random.multivariate_normal(mean_3d, covariance_matrix_3d, 100)
# for i in range(36):
#     min_val = np.min(Y[:,i])
#     max_val = np.max(Y[:,i])
#     if min_val<0 or max_val>1:
#         Y[:,i] -= min_val
#         Y[:,i] /= max_val-min_val

In [None]:
np.min(Y), np.max(Y)

In [None]:
XY_c, XY_N, rel_x, rel_y, KXY_c, KXY_N, Krel_x, Krel_y, sig_X, sig_Y = cca_k(X,Y,n_components='full',normalize_=False)

In [None]:
from matplotlib import colormaps
pastel_colors = colormaps['Pastel1']
pastel_colors = pastel_colors(np.linspace(0, 1, 7))
pastel_colors2 = colormaps['Pastel2']
pastel_colors2 = pastel_colors2(np.linspace(0, 1, 6))
model_colors = np.array([[pastel_colors[x]]*5 for x in range(6)]).reshape(30,4)

In [None]:
# Bar width
bar_width = 0.4
q_ticks = [0,9,19,29,39,49]

# X positions for the bars
xx = np.arange(len(rel_x))

# Create the plot
fig, ax = plt.subplots()

# Plotting two bars in the same position
bar1 = ax.bar(xx - bar_width/2, rel_x, bar_width, label='CCA', color='#1f78b4', edgecolor='k', zorder=3)
bar2 = ax.bar(xx + bar_width/2, Krel_x, bar_width, label='CCA K', color='#fe7f0e', edgecolor='k', zorder=3)

# Add labels, title, and customizations
ax.set_xlabel('Features')
ax.set_ylabel('Relevance Value')
#ax.set_xticks(xx)
#ax.set_xticklabels(categories)
ax.legend()
#Backgrounds
plt.axvline(4.5, lw= 75, c=pastel_colors[0], zorder=1)
plt.axvline(13, lw= 65, c=pastel_colors[1], zorder=1)
plt.axvline(21.5, lw= 58.5, c=pastel_colors[2], zorder=1)
plt.axvline(29.8, lw= 59, c=pastel_colors[3], zorder=1) # Yellow
plt.axvline(37.5, lw= 57, c=pastel_colors[4], zorder=1)
plt.axvline(45.5, lw= 54, c=pastel_colors[5], zorder=1)
plt.axvline(53.5, lw= 55, c=pastel_colors[6], zorder=1)

#Extra pizzazz
plt.grid(True, zorder=2)
plt.xlim(-1,52)
plt.ylabel('Relevance Value', size=12)
plt.yticks(size=12)
plt.xlabel('Questions', size=12)
plt.xticks(q_ticks, [x+1 for x in q_ticks], size=12)
plt.ylim(0,1.0)
plt.savefig('Intermodel_CCA_questions.pdf',bbox_inches='tight')
plt.show()

# Show the plot
plt.show()


# X positions for the bars
yy = np.arange(len(rel_y))

# Create the plot
fig, ax = plt.subplots()

# Plotting two bars in the same position
bar1 = ax.bar(yy - bar_width/2, rel_y, bar_width, label='CCA', color='#1f78b4', edgecolor='k', zorder=3)
bar2 = ax.bar(yy + bar_width/2, Krel_y, bar_width, label='CCA K', color='#fe7f0e', edgecolor='k', zorder=3)

# Add labels, title, and customizations
ax.set_xlabel('Features')
ax.set_ylabel('Relevance Value')
#ax.set_xticks(xx)
#ax.set_xticklabels(categories)
ax.legend()

# Show the plot

#Extra pizzazz
plt.grid(True, zorder=2)
plt.xticks(np.arange(-1,37), np.arange(-1,37), rotation=90)
from_ = (yy - bar_width-0.1)[[x for x in range(0,36,6)]]
until_ = (yy + bar_width+0.1)[[x for x in range(5,36,6)]]
plt.xticks(np.linspace(2.5,32.5,6), ['EEGNet', 'KREEGNet', 'KCS-FCNet', 'DeepConvNet', 'ShallowConvNet', 'TCFusion'], rotation=75, ha='right', size=12)
plt.hlines([0.5]*6,from_, until_, colors=pastel_colors2, linewidths=275, zorder=1)
plt.ylabel('Relevance Value', size=12)
plt.yticks(size=12)
plt.xlabel('Performance Measures', size=12)
plt.xlim(-0.4,35.4)
plt.ylim(bottom=0, top=1)
plt.savefig('Intermodel_CCA.pdf',bbox_inches='tight')
plt.show()

plt.show()

In [None]:
questionnaire_names[Krel_x >= 0.5], Krel_x[Krel_x >= 0.5]

In [None]:
np.where(Krel_y >= 0.05)[0]%6, np.where(rel_y >= 0.5)[0]%6

In [None]:
for var, info in zip(['XY_c', 'KXY_c', 'XY_N', 'KXY_N'], [XY_c, KXY_c, XY_N, KXY_N]):
    print(f'np.trace({var}) = {np.trace(info):0.3f}')

In [None]:
XY_c.shape

In [None]:
plt.imshow(XY_c)
plt.colorbar()
plt.show()

plt.imshow(KXY_c)
plt.colorbar()
plt.show()

plt.imshow(XY_N)
plt.colorbar()
plt.show()

plt.imshow(KXY_N)
plt.colorbar()
plt.show()



In [None]:
!pip install umap-learn -q

In [None]:
import umap
# Make the affinity matrix symmetric (if it isn't already)
affinity_matrix = np.abs(XY_N)
affinity_matrix = (affinity_matrix + affinity_matrix.T) / 2

# Ensure all diagonal elements are 1 (optional, based on specific use case)
#np.fill_diagonal(affinity_matrix, 1)

# UMAP projection to 2D using the precomputed affinity matrix
reducer = umap.UMAP(n_neighbors=10, n_components=2, metric='precomputed')

# Fit and transform the affinity matrix into 2D space
embedding = reducer.fit_transform(affinity_matrix)

embedding = pd.DataFrame(embedding,columns=['UMAP1','UMAP2'])
embedding['Group'] = group_

# Plotting the UMAP projection
plt.figure(figsize=(8, 6))
plt.scatter(embedding.iloc[:, 0], embedding.iloc[:, 1], c=metric_data[:,0], s=20, edgecolor='k')
sns.kdeplot(
    data=embedding, x="UMAP1", y="UMAP2", hue="Group",
    levels=3, thresh=.2,
)
plt.title('2D UMAP Projection CCA')
plt.xlabel('UMAP Dimension 1')
plt.ylabel('UMAP Dimension 2')
plt.colorbar()
plt.show()

In [None]:
# Make the affinity matrix symmetric (if it isn't already)
affinity_matrix = np.abs(KXY_N)

affinity_matrix = (affinity_matrix + affinity_matrix.T) / 2

# Ensure all diagonal elements are 1 (optional, based on specific use case)
#np.fill_diagonal(affinity_matrix, 1)

# UMAP projection to 2D using the precomputed affinity matrix
reducer = umap.UMAP(n_neighbors=10, n_components=2, metric='precomputed')

# Fit and transform the affinity matrix into 2D space
embedding = reducer.fit_transform(affinity_matrix)

embedding = pd.DataFrame(embedding,columns=['UMAP1','UMAP2'])
embedding['Group'] = group_

# Plotting the UMAP projection
plt.figure(figsize=(8, 6))
plt.scatter(embedding.iloc[:, 0], embedding.iloc[:, 1], c=metric_data[:,0], s=20, edgecolor='k')
sns.kdeplot(
    data=embedding, x="UMAP1", y="UMAP2", hue="Group",
    levels=3, thresh=.2,
)
plt.title('2D UMAP Projection CCA_K')
plt.xlabel('UMAP Dimension 1')
plt.ylabel('UMAP Dimension 2')
plt.colorbar()
plt.show()

In [None]:
# UMAP projection to 2D using the precomputed affinity matrix
reducer = umap.UMAP(n_neighbors=10, n_components=2, random_state=5)

# Fit and transform the affinity matrix into 2D space
embedding = reducer.fit_transform(metric_data)

In [None]:
embedding = pd.DataFrame(embedding,columns=['UMAP1','UMAP2'])
embedding['Group'] = group_

# Plotting the UMAP projection
plt.figure(figsize=(8, 6))
plt.scatter(embedding.iloc[:, 0], embedding.iloc[:, 1], c=metric_data[:,0], s=20, edgecolor='k')
sns.kdeplot(
    data=embedding, x="UMAP1", y="UMAP2", hue="Group",
    levels=3, thresh=.2,
)
plt.title('2D UMAP Projection Metric Data')
plt.xlabel('UMAP Dimension 1')
plt.ylabel('UMAP Dimension 2')
plt.colorbar()
plt.show()

In [None]:
# UMAP projection to 2D using the precomputed affinity matrix
reducer = umap.UMAP(n_neighbors=10, n_components=2)

# Fit and transform the affinity matrix into 2D space
embedding = reducer.fit_transform(norm_tabular)

In [None]:
# Plotting the UMAP projection
embedding = pd.DataFrame(embedding,columns=['UMAP1','UMAP2'])
embedding['Group'] = group_

plt.figure(figsize=(8, 6))
plt.scatter(embedding.iloc[:, 0], embedding.iloc[:, 1], c=metric_data[:,0], s=20, edgecolor='k')
sns.kdeplot(
    data=embedding, x="UMAP1", y="UMAP2", hue="Group",
    levels=3, thresh=.2,
)
plt.title('2D UMAP Projection Tabular Data')
plt.xlabel('UMAP Dimension 1')
plt.ylabel('UMAP Dimension 2')
plt.colorbar()
plt.show()