# CA3 model

This notebook is for building out the CA3 model as previous implementation doesn't work.

Formula is:
$$
min_{\vec{w}_{x_1}, \vec{w}_{x_2}, \vec{w}_a, \vec{w}_b} \left( -\vec{w}_{x_1}^T S_{xa} \vec{w}_a - \vec{w}_{x_2}^T S_{xb} \vec{w}_b + \sum_{i \in \{x_1, x_2, a, b\}} \frac{1}{2} \lambda_i \left( \vec{w}_i^T S_{ii} \vec{w}_i - 1 \right) + \theta_{rr}(\vec{w}_{x_1}, \vec{w}_{x_2}) \right)
$$


In [1]:
from gemmr.generative_model import GEMMR
from gemmr.estimators import SVDCCA
import numpy as np
from scipy.optimize import minimize

## Generate some data

In [2]:
model_definition = GEMMR('cca', wx=1, wy=1, r_between=0.3)
behavioural_data_study1, imging_data_study1 = model_definition.generate_data(n=200)
behavioural_data_study2, imging_data_study2 = model_definition.generate_data(n=190)
study1 = (imging_data_study1, behavioural_data_study1) 
study2 = (imging_data_study2, behavioural_data_study2)

In [3]:
test= SVDCCA().fit(imging_data_study1, behavioural_data_study1)
test.corrs_

array([0.2994662])

In [5]:
optimum = test.transform(imging_data_study1, behavioural_data_study1)

## Step 1: Covariance matrix

In [27]:
def mean_center(data: np.ndarray) -> np.ndarray:
    """
    Function to demean data.

    Parmeteres
    ----------
    data: np.ndarray
        data to demean

    Returns
    -------
    np.ndarray: array
        demeaned data
    """
    return data - data.mean(axis=0)

In [28]:
def cross_cov(matrix_1: np.ndarray, matrix_2: np.ndarray) -> np.ndarray:
    """
    Function to calculate 
    covariance matrix

    Parameters
    ----------
    matrix_1: np.ndarray
        A matrix tht should 
        correspond to subject by 
        features
    matrix_2: np.ndarray
        A matrix that should 
        correspond to features by
        feautres 

    Returns
    -------
    np.ndarray: array
        array of cross covariance matrix
    """
    return (matrix_1.T @ matrix_2) / matrix_1.shape[0] 

In [29]:
def data_able_to_process(study_pair: tuple, behav_data: np.ndarray, img_data: np.ndarray) -> bool:
    """
    Function to check that data
    is in correct format to be processed

    Parameters
    ----------
     study_pair: tuple, 
         tuple of behavioural data 
         and imging data
     behav_data: np.ndarray
         array of behav_data 
     img_data: np.ndarray
         array of img_data
    
    Returns
    -------
    bool: boolean
        bool of if failed or not
    """
    if not isinstance(study_pair, (tuple, list)) or len(study_pair) != 2:
        print("Given argument isn't a pair of datasets")
        return False
    if not isinstance(behav_data, np.ndarray) or not isinstance(img_data, np.ndarray):
        print("Data provided isn't a numpy array")
        return False
    if behav_data.shape[0] == 0 or img_data.shape[0] == 0 or behav_data.shape[0] != img_data.shape[0]:
        print(f"Mismatch between ({behav_data.shape[0]} and {img_data.shape[0]})")

    return True

In [30]:
def calculate_covariance_matricies(*study_pairs) -> dict:
    """
    Calculates covariance matrices and auto covariance
    matricies

    Parameters
    ----------
    study_pairs: tuple
        a tuple or list containing two numpy arrays:
        (behavioural_data, imaging_data).
        Assumes data is (subjects x features).

    Returns
    -------
    covariance_results: dict
        dictionary of covariance and auto-covariance matrices

    """
    covariance_results = {}
    for idx, study_pair in enumerate(study_pairs):
        img_data, behav_data  = study_pair
        if not data_able_to_process(study_pair, behav_data, img_data):
            continue
        behav_data = mean_center(behav_data)
        img_data = mean_center(img_data)
        study_num = idx + 1
        try:
            covariance_results[f"s_behav{study_num}_behav{study_num}"] = cross_cov(behav_data, behav_data)
            covariance_results[f"s_img{study_num}_img{study_num}"] = cross_cov(img_data, img_data)
            covariance_results[f"s_img{study_num}_behav{study_num}"] = cross_cov(img_data, behav_data)

        except Exception as e:
            print(f"Error calculating covariances for Study {study_num}: {e}")
            return None
    return covariance_results

In [31]:
covariance_mat = calculate_covariance_matricies(study1, study2)

In [32]:
s_x1a = cross_cov(behavioural_data_study1, imging_data_study1)
s_x2b = cross_cov(behavioural_data_study2, imging_data_study2)
s_x1x1 = cross_cov(behavioural_data_study1, behavioural_data_study1)
s_x2x2 = cross_cov(behavioural_data_study2, behavioural_data_study2)
s_aa = cross_cov(imging_data_study1, imging_data_study1)
s_bb = cross_cov(imging_data_study2, imging_data_study2)

## Step 2. Intialization of weights 

In [33]:
def weight_intialization(*weights) -> np.ndarray:
    """
    Define a set of random starting 
    weights
    
    Parameters
    ----------
    weights: tuple(int)
        tuple of set amount
        of int values
    
    Returns
    -------
    np.ndarrray
        array of numpy values
    """
    return np.random.randn(sum(weights))

In [34]:
dim = {
  '1': behavioural_data_study1.shape[1], 
   '2': behavioural_data_study2.shape[1],
    '3': imging_data_study1.shape[1],
    '4': imging_data_study2.shape[1]
 }

In [35]:
weights_0 = weight_intialization(
    behavioural_data_study1.shape[1], 
    behavioural_data_study2.shape[1],
    imging_data_study1.shape[1],
    imging_data_study2.shape[1]
    )

## Step 3. Objective function

In [36]:
dx1_shape = s_x1x1.shape[0]
dx2_shape = s_x2x2.shape[0]
da_shape = s_aa.shape[0]
db_shape = s_bb.shape[0]
dx_shape = dx1_shape + dx2_shape
dac_shape =  dx_shape + da_shape

def get_dimensions(s_x1x1, s_x2x2, s_aa):
    dx1_shape = s_x1x1.shape[0]
    dx2_shape = s_x2x2.shape[0]
    da_shape = s_aa.shape[0]
    dx_shape =  dx1_shape + dx2_shape
    return {
        'dx1_shape': dx1_shape,
        'dx_shape' : dx_shape,
        'dac_shape':  dx_shape + da_shape
    }

def get_weights(weight_array, dx1_shape, dx_shape, dac_shape):
    return {
        "wx1": weight_array[:dx1_shape],
        "wx2": weight_array[dx1_shape:dx_shape],
        "wa": weight_array[dx_shape:dac_shape],
        "wb": weight_array[dac_shape:]
    }


In [37]:
def cross_cov_term(weight_beh, cov_mat, weight_img):
    return -weight_img.T @ (cov_mat @ weight_beh)

In [38]:
def regularization_term(weight, cov_mat):
    return 0.5 * 1.0 * (weight.T @ (cov_mat @ weight) - 1)

In [39]:
def dissimilarity_penality(theta_r, img_weight1, img_weight2):
    return theta_r * 0.5 * np.sum((img_weight1 - img_weight2) ** 2)


In [40]:
def objective_function(weights, s_x1a, s_x2b, s_x1x1, s_x2x2, s_aa, s_bb, theta_r):
    dimensions = get_dimensions(s_x1x1, s_x2x2, s_aa)
    weights = get_weights(
            weights, 
            dimensions['dx1_shape'], 
            dimensions['dx_shape'], 
            dimensions['dac_shape'])
    term1 = cross_cov_term(weights['wx1'],s_x1a, weights['wa'])
    term2 = cross_cov_term(weights['wx2'],s_x2b, weights['wb'])
    reg_x1 = regularization_term(weights['wx1'], s_x1x1)
    reg_x2 = regularization_term(weights['wx2'], s_x2x2)
    reg_a = regularization_term(weights['wa'], s_aa)
    reg_b = regularization_term(weights['wb'], s_bb)
    theta_r = dissimilarity_penality(theta_r, weights['wa'], weights['wb'])
    return term1 + term2 + reg_x1 + reg_x2 + reg_a + reg_b + theta_r


## Step 4: Minimise

In [None]:
best_loss = float('inf')
optimal_theta_r = None
optimium_model = None

for theta_r in np.logspace(-3, 2, 10):
    weights_0 = weight_intialization(
        behavioural_data_study1.shape[1], 
        behavioural_data_study2.shape[1],
        imging_data_study1.shape[1],
        imging_data_study2.shape[1])  # re-init each time
    res = minimize(
        objective_function,
        weights_0,
        args=(s_x1a, s_x2b, s_x1x1, s_x2x2, s_aa, s_bb, theta_r),
        method='L-BFGS-B'
    )
    if res.status !=0:
        print(res.status)
        continue


    if res.fun < best_loss:
        best_loss = res.fun
        best_theta_r = theta_r
        optimium_model = res

print(f"Best θ_r: {best_theta_r}")
print(f"Best loss: {best_loss}")

In [None]:
dimensions = get_dimensions(s_x1x1, s_x2x2, s_aa)
weights = get_weights(optimium_model.x,
            dimensions['dx1_shape'], 
            dimensions['dx_shape'], 
            dimensions['dac_shape'])

In [None]:
# Get projections (scores)
scores_x1 = behavioural_data_study1 @ weights['wx1']
scores_x2 = behavioural_data_study2 @ weights['wx2']
scores_a  = imging_data_study1 @ weights['wa']
scores_b  = imging_data_study2 @ weights['wb']


In [None]:
display(np.corrcoef(scores_x1, scores_a)[0, 1])
display(np.corrcoef(scores_x2, scores_b)[0, 1])
display(np.linalg.norm(weights['wa'] - weights['wb']))

np.float64(0.11538425261898907)

np.float64(0.07889484464506155)

np.float64(0.00014508777456870848)

## Putting it all together

In [6]:
import numpy as np

class GridSearchCA3:
    def __init__(self, l2_values, theta=0, tol=1e-6, maxiter=500, verbose=False):
        """
        Custom grid search to find the best l2 value for the CA3 model.

        Parameters
        ----------
        l2_values : list of float
            The l2 regularization parameters to search over.
        theta : float
            The dissimilarity regularization parameter (shared across all models).
        tol : float
            Tolerance for optimization.
        maxiter : int
            Maximum number of optimization iterations.
        verbose : bool
            If True, print progress during search.
        """
        self.l2_values = l2_values
        self.theta = theta
        self.tol = tol
        self.maxiter = maxiter
        self.verbose = verbose
        self.best_model_ = None
        self.best_score_ = -np.inf
        self.best_l2_ = None
        self.all_results_ = []

    def fit(self, *data_sets):
        """
        Fit CA3 models with each l2 value and track the one with best score.
        """
        for l2 in self.l2_values:
            model = CA3(l2=l2, theta=self.theta, tol=self.tol, maxiter=self.maxiter)
            model.fit(*data_sets)
            score = model._score(*data_sets)

            self.all_results_.append((l2, score))

            if self.verbose:
                print(f"l2: {l2:.4f}, score: {score:.4f}")

            if score > self.best_score_:
                self.best_score_ = score
                self.best_model_ = model
                self.best_l2_ = l2

        if self.verbose:
            print(f"Best l2: {self.best_l2_:.4f}, Best score: {self.best_score_:.4f}")

    def get_best_model(self):
        return self.best_model_

    def get_best_l2(self):
        return self.best_l2_

    def get_all_results(self):
        return self.all_results_


In [4]:
class CA3:
    """
    CA3 class. 
    A class to do CA3 

    Usage
    -----
    ca3 = CA3(l2=0.5, theta=1)
    ca3.fit(study1, study2)
    transformed = ca3.transform(study1, study2)
    """
    def __init__(self, l2: float=1, theta: float=0, tol=1e-6, maxiter=500, normalise_weights=True):
        self.l2_ = l2
        self.theta_ = theta
        self.intial_weights_ = None
        self.dims_ = []
        self.best_loss = float('inf')
        self.weights_ = None
        self.covariances_ = {}
        self.tol_ = tol
        self.maxiter_ = maxiter
        self.normalise_weights = normalise_weights
        self.canonical_correlations_ = None

    def fit(self, *data_sets: tuple) -> None:
        """
        Method to fit the CA3 model to a given 
        set of datasets

        Parameters
        ----------
        data_sets: tuple
            a tuple of X, Y data 
            from an arbituray number of 
            datasets
        
        Returns
        -------
        None
        """
        self._calculate_covariance_matricies(*data_sets)
        self._get_dimensions(*data_sets)
        self._weight_intialization()
        self._optimise()


    def transform(self, *data_sets: tuple) -> list[np.ndarray]:
        """
        Methods to transform data sets into canonical
        projects.

        Parameters
        ----------
        data_sets: tuple
            a tuple of X, Y data 
            from an arbituray number of 
            datasets

        Returns
        --------
        projects: list[np.ndarray]
            conatins a list of the 
            projections of each dataset in 
            ndarry of n_components by n_samples
        """
        assert self.weights_ is not None, "Model must be fitted before transform can be called."
        assert len(data_sets) == len(self.dims_), "Model fitted with different number of datasets."
        
        projects =  [
            np.stack([
                self._normalise(img_data @ wx) if self.normalise_weights else img_data @ wx,
                self._normalise(beh_data @ wb) if self.normalise_weights else beh_data @ wb
            ], axis=0)
            for (img_data, beh_data), (wx, wb) in zip(data_sets, self.weights_)
        ]
        
        self.canonical_correlations_ = [np.corrcoef(data_sets[0], data_sets[1])[0,1] for data_sets in projects]   
        return projects
    
    
    def fit_transform(self, *data_sets) -> list[np.ndarray]:
        """
        Methods to fit a CA3 model and then transform
        the data.

        Parameters
        ----------
        data_sets: tuple
            a tuple of X, Y data 
            from an arbituray number of 
            datasets

        Returns
        --------
        projects: list[np.ndarray]
            conatins a list of the 
            projections of each dataset in 
            ndarry of n_components by n_samples.
        """
        self.fit(*data_sets)
        return self.transform(*data_sets)
    
    def calculate_canonical_correlations(self) -> list[float]:
        """
        Method to obtain the canonical correlations.
        Model must have been fitted and transfomed 
        before.

        Parameters
        ----------
        None

        Returns
        -------
        canonical_correlations: list[float]
            list of canonical correlations
        """
        assert self.canonical_correlations_ is not None, "Model must be fitted and transfomed before correlations can be returned"
        return self.canonical_correlations_
    

    def _weight_intialization(self) -> np.ndarray:
        """
        Method to define a set of random starting 
        weights
        
        Parameters
        ----------
        weights: tuple(int)
            tuple of set amount
            of int values
        
        Returns
        -------
        np.ndarrray
            array of numpy values
        """ 
        init_weights = []

        for idx, _ in enumerate(self.dims_):
            s_xb = self.covariances_[f"s_img{idx+1}_behav{idx+1}"]
            # Perform SVD on the cross-covariance matrix
            try:
                U, _, Vt = np.linalg.svd(s_xb, full_matrices=False)
            except np.linalg.LinAlgError as e:
                raise RuntimeError(f"SVD failed for dataset {idx+1}: {e}")
    

            wx = U[:, 0]
            wb = Vt.T[:, 0]    
            s_xx = self.covariances_[f"s_img{idx+1}_img{idx+1}"]
            s_bb = self.covariances_[f"s_behav{idx+1}_behav{idx+1}"]
    
            wx = wx / np.sqrt(wx.T @ s_xx @ wx)
            wb = wb / np.sqrt(wb.T @ s_bb @ wb)
    
            init_weights.extend(wx)
            init_weights.extend(wb)
    
        self.intial_weights_ = np.array(init_weights)

    def _calculate_covariance_matricies(self, *data_sets) -> dict:
        """
        Calculates covariance and auto covariance
        matricies
    
        Parameters
        ----------
        study_pairs: tuple
            a tuple or list containing two numpy arrays:
            (behavioural_data, imaging_data).
            Assumes data is (subjects x features).
    
        Returns
        -------
        covariance_results: dict
            dictionary of covariance and auto-covariance matrices
    
        """
        for idx, study_pair in enumerate(data_sets):
            img_data, behav_data  = study_pair
            self._data_able_to_process(study_pair)
            behav_data = self._normalise(behav_data)
            img_data = self._normalise(img_data)
            study_num = idx + 1
            try:
                self.covariances_[f"s_behav{study_num}_behav{study_num}"] = self._create_covariance_matrix(behav_data, behav_data)
                self.covariances_[f"s_img{study_num}_img{study_num}"] = self._create_covariance_matrix(img_data, img_data)
                self.covariances_[f"s_img{study_num}_behav{study_num}"] = self._create_covariance_matrix(img_data, behav_data)
    
            except Exception as e:
                print(f"Error calculating covariances for Study {study_num}: {e}")

    def _data_able_to_process(self, study_pair: tuple) -> bool:
        """
        Method to check that data
        is in correct format to be processed
    
        Parameters
        ----------
         study_pair: tuple, 
             tuple of behavioural data 
             and imging data

        
        Returns
        -------
        bool: boolean
            bool of if failed or not
        """
        assert isinstance(study_pair, (tuple, list)) and len(study_pair) == 2, "Given argument isn't a pair of datasets"
        assert isinstance(study_pair[0], np.ndarray) or not isinstance(study_pair[1], np.ndarray), "Data provided ins't numpy array"
        assert (study_pair[0].shape[0] != 0) and (study_pair[1].shape[0] != 0), \
             "Study pairs contains not data"
        assert study_pair[0].shape[0] == study_pair[1].shape[0], \
              f"Mismatch between ({study_pair[0].shape[0]} and {study_pair[1].shape[0]})"
            
    def _optimise(self):
        """
        Method to minimise the 
        objective function

        Parameters
        ----------
        None

        Returns
        --------
        None 
        """
        model = minimize(
            self._objective_function,
            self.intial_weights_,
            options={'gtol': self.tol_, "maxiter": self.maxiter_},
            args=(self.covariances_, self.theta_, self.l2_),
        )
        self.best_loss = model.fun
        self.weights_ = self._split_weights(model.x)

    
    def _get_dimensions(self, *data_sets) -> None:
        """
        Method to check the number of dimensions
        that the 
        """
        self.dims_ = [(behav.shape[1], img.shape[1]) for behav, img in data_sets]
    
    def _split_weights(self, weights: np.ndarray) -> list[np.ndarray]:
        """
        Splits the flat weight vector weights into individual vectors
        for each x and b dataset.

        Parameters
        ----------
        weights: np.ndarray
            flatten numpy array
        
        Returns
        -------
        split_weights: list[np.ndarray]
            list of weights split
            wx and wb 

        """
        offset = 0
        split_weights = []
        for img_dim, behav_dim in self.dims_:
            wx = weights[offset:offset + img_dim]
            offset += img_dim  
            wb = weights[offset:offset + behav_dim]
            offset += behav_dim
            split_weights.append((wx, wb))
        return split_weights 
    
    def _objective_function(self, 
                            weights: np.ndarray, 
                            covariances: dict, 
                            theta: float, 
                            l2: float) -> float:
        """
        Objective function of the CA3 class

        Parameters
        ----------
        weights: np.ndarray
            weights 
        covariances: dict
            dict of cross/auto covariance
            matricies
        theta: float
            theta penality
        l2: float
            regularization penailty
        
        Returns
        -------
        total_loss: float
           total loss of the objective function
        """
        total_loss = 0
        weights_ = self._split_weights(weights)
        for idx, (wx, wb) in enumerate(weights_):
            s_xb = covariances[f"s_img{idx+1}_behav{idx+1}"]
            s_xx = covariances[f"s_img{idx+1}_img{idx+1}"]
            s_bb = covariances[f"s_behav{idx+1}_behav{idx+1}"]
            total_loss += self._cross_cov_term(wb, s_xb, wx) 
            total_loss += self._regularization_term(wx, s_xx, l2)
            total_loss += self._regularization_term(wb, s_bb, l2)
    
        # Similarity penalty across imaging weights
        if theta > 0 and len(weights_) > 1:
            for img_data in range(len(weights_)):
                for next_img_data in range(img_data + 1, len(weights_)):
                    total_loss += self._dissimilarity_penality(theta, weights_[img_data][0], weights_[next_img_data][0])
    
        return total_loss
    
    def _create_covariance_matrix(self, matrix_1: np.ndarray, matrix_2: np.ndarray) -> np.ndarray:
        """
        Function to calculate cross-auto
        covariance matrix
    
        Parameters
        ----------
        matrix_1: np.ndarray
            A matrix tht should 
            correspond to subject by 
            features
        matrix_2: np.ndarray
            A matrix that should 
            correspond to features by
            feautres 
    
        Returns
        -------
        np.ndarray: array
            array of covariance matrix
        """
        return (matrix_1.T @ matrix_2) / matrix_1.shape[0] 

    
    def _normalise(self, data: np.ndarray) -> np.ndarray:
        """
        Function to normalise data.
    
        Parmeteres
        ----------
        data: np.ndarray
            data to demean
    
        Returns
        -------
        np.ndarray: array
            demeaned data
        """
        dmean = data - data.mean(axis=0)
        std = data.std(axis=0, ddof=1)
        std = np.where(std == 0.0, 1.0, std)
        return dmean / std

    def _cross_cov_term(self, weight_beh: np.ndarray, cov_mat: np.ndarray, weight_img: np.ndarray) -> np.ndarray:
        """
        Method to calculate the cross covarance term
        in the objective function

        Parameters
        ----------
        weight_beh: np.ndarray
            set of weights for wb
        cov_mat: np.ndarray
             covariance matrix for 
             wx wb
        weight_img: np.ndarray
            set of weights for wx

        Returns
        -------
        np.ndarray: np.array
            cross covariance term
        """
        return -weight_img.T @ (cov_mat @ weight_beh)

    def _regularization_term(self, weight: np.ndarray, cov_mat: np.ndarray, lambda_i: float) -> float:
        """
        Method to calculate the regularization term
        in the objective function

        Parameters
        ----------
        weight: np.ndarray
            set of weights
        cov_mat: np.ndarray
            auto covariance matrix
        lambda_i: float
            regularization parameter

        Returns
        -------
        float: float
            regularization term of the objective function
        
        """
        return 0.5 * lambda_i * (weight.T @ (cov_mat @ weight) - 1)

    def _dissimilarity_penality(self, theta_r: float, img_weight1: np.ndarray, img_weight2: np.ndarray) -> float:
        """
        Method to return dissimilarity penality

        Parameters
        -----------
        theta_r: float
           theta penality. 
        img_weight1: np.ndarray
            weights of imaging data 
        img_weight2: np.ndarray
            weights of second imaging
            data
        
        Returns
        -------
        float: float
            dissimilarity penality
        """
        return theta_r * 0.5 * np.sum((img_weight1 - img_weight2) ** 2)
    
    def _score(self, *data_sets: tuple) -> float:
        """
        Method used to evaluate model performance. 

        Parameters
        -----------
        data_sets: tuple
            a tuple of X, Y data 
            from an arbituray number of 
            datasets

        Returns
        -------
        float: float
            mean of correlation 
            values across datasets

        """
        if self.weights_ is None:
            raise ValueError("Model must be fitted before scoring.")

        result = self.transform(*data_sets)
        correlations = self.calculate_canonical_correlations()
        all_corrs = [corr[0] for corr in correlations.values()]
        return np.mean(all_corrs)
    

In [None]:
from sklearn.model_selection import GridSearchCV

# Assuming `data` is a list of tuples: [(img1, beh1), (img2, beh2), ...]
# Define a custom scorer if needed, but the `score` method should suffice

search = GridSearchCA3(l2_values=[0.01, 0.1, 1.0, 10.0], theta=0.5, verbose=True)
search.fit(study1)

best_model = search.get_best_model()
print("Best L2:", search.get_best_l2())
print("Best score:", search.best_score_)

l2: 0.0100, score: 0.4592
l2: 0.1000, score: 0.4614
l2: 1.0000, score: 0.1377
l2: 10.0000, score: 0.0029
Best l2: 0.1000, Best score: 0.4614
Best L2: 0.1
Best score: 0.4614104420916759


In [5]:
ca3 = CA3(l2=0.0)
#ca3.fit(study1, study2)
transfomed = ca3.fit_transform(study1)

In [199]:
transfomed

[array([[-2.01541487,  1.54195171, -0.82720464,  0.44531447, -1.10737123,
          1.89149745,  1.42566874, -2.75281769,  0.14666389, -0.83988258,
         -2.65004166, -2.08228398, -0.63950613,  0.30895288,  0.11925364,
          0.82569118, -0.71717044, -0.16163601,  0.5003831 , -1.11803469,
          1.20368527,  0.66843183, -0.35736554,  1.58343172,  0.46523375,
          1.67110053,  0.30132398,  1.01197457, -1.33225903,  0.06807949,
          1.78555422, -2.02771024, -0.67956878,  0.43363717,  0.66626076,
         -0.77252174, -0.06647668, -0.60448251, -0.70095669,  0.39902183,
         -0.84551603,  0.16606879,  1.60429135, -1.10802307, -0.58143167,
         -0.43654064,  1.46537596, -0.69549266,  1.09734877, -1.17342806,
         -0.8643437 , -1.59824084,  0.07726928,  1.56111942,  0.00839318,
         -2.54446766,  1.18810269, -0.32386299,  0.28368249,  1.07915144,
         -0.41293849,  0.39427985, -0.80754778,  0.14618212, -0.25924027,
          0.35763027, -0.79107642,  0.

In [6]:
correl = ca3.calculate_canonical_correlations()
correl

[np.float64(0.29946620246301203)]

In [7]:
from sklearn.cross_decomposition import CCA

test_sck = CCA(n_components=1)
test_sck.fit(study1[0], study1[1])
X1_proj_cca, Y1_proj_cca = test_sck.transform(study1[0], study1[1])
sklearn_corr = np.corrcoef(X1_proj_cca.ravel(), Y1_proj_cca.ravel())[0,1]
print(f"Correlation from sklearn CCA: {sklearn_corr}")

Correlation from sklearn CCA: 0.29946620246301203


In [162]:
import matplotlib.pyplot as plt
import scipy.stats 
X1_proj_ca3, Y1_proj_ca3 = transfomed['projections']['study0']
display(scipy.stats.ttest_ind(X1_proj_ca3.flatten(), X1_proj_cca.flatten() ))
plt.figure(figsize=(12, 5))


plt.subplot(1, 2, 1)
plt.scatter(X1_proj_ca3, Y1_proj_ca3, c='teal')
m, b = np.polyfit(X1_proj_ca3.flatten(), Y1_proj_ca3.flatten(), 1)
plt.plot(X1_proj_ca3, m*X1_proj_ca3 + b, color='black', linestyle='--')
plt.text(0.05, 0.95, f"r = {transfomed['correlations']['study0'][0]:.2f}", 
         transform=plt.gca().transAxes, va='top', ha='left')
plt.title("CA3 projections")
plt.xlabel("Imaging")
plt.ylabel("Behavior")

plt.subplot(1, 2, 2)
plt.scatter(X1_proj_cca, Y1_proj_cca, c='orange', label=f"r = {sklearn_corr:.2f}")
m, b = np.polyfit(X1_proj_cca.flatten(), Y1_proj_cca.flatten(), 1)
plt.plot(X1_proj_cca, m*X1_proj_cca + b, color='black', linestyle='--')
plt.text(0.05, 0.95, f"r = {sklearn_corr:.2f}", 
         transform=plt.gca().transAxes, va='top', ha='left')
plt.title("sklearn CCA projections")
plt.xlabel("Imaging")
plt.ylabel("Behavior")
plt.legend()
plt.tight_layout()
plt.show()

TypeError: list indices must be integers or slices, not str