<a href="https://colab.research.google.com/github/jameschapman19/cca_zoo/blob/master/interactive_cca.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Visualizing the effect of regularisation on CCA using IPython Widgets!
Learnt how to use widgets in IPython and thought it would be nice to demonstrate the effect of l2 regularisation

## Install cca-zoo and import packages

In [17]:
!pip install cca-zoo



In [18]:
import ipywidgets as widgets
import seaborn as sns
from cca_zoo.models import rCCA
from cca_zoo.data import generate_covariance_data
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

## Plotting Helpers

In [19]:
# Plotting Helpers
def plot_latent_train_test(train_scores, test_scores, title=None):
    train_data = pd.DataFrame(
        {'phase': np.asarray(['train'] * train_scores[0].shape[0]).astype(str)})
    x_vars=[f'view 1 projection {f}' for f in range(1,train_scores[0].shape[1]+1)]
    y_vars=[f'view 2 projection {f}' for f in range(1,train_scores[1].shape[1]+1)]
    train_data[x_vars] = train_scores[0]
    train_data[y_vars] = train_scores[1]
    test_data = pd.DataFrame(
        {'phase': np.asarray(['test'] * test_scores[0].shape[0]).astype(str)})
    test_data[x_vars] = test_scores[0]
    test_data[y_vars] = test_scores[1]
    data = pd.concat([train_data, test_data], axis=0)
    cca_pp = sns.pairplot(data, hue='phase',x_vars=x_vars,y_vars=y_vars, corner=True)
    if title:
      cca_pp.fig.suptitle(title)
      cca_pp.fig.tight_layout()
    latent_dims=len(x_vars)
    test_corrs=np.diag(np.corrcoef(test_scores[0],test_scores[1],rowvar=False)[:latent_dims,latent_dims:])
    for latent_dim in range(len(x_vars)):
      cca_pp.axes[latent_dim,latent_dim].annotate(f'test correlation: {test_corrs[latent_dim]:.2f}',(0,0))
    cca_pp.fig.tight_layout()
    return cca_pp

## Make Data

### Parameters

In [30]:
N=200
X_features=200
Y_features=200
latent_dims=2

## Generation

In [31]:
(X,Y),_=generate_covariance_data(N,view_features=[X_features,Y_features],latent_dims=latent_dims,correlation=1,structure=['toeplitz','toeplitz'],sigma=0.999,decay=0.9)

X_tr,X_te,Y_tr,Y_te=train_test_split(X,Y)

## Change the ammount of regularisation from 0 (CCA) to 1 (PLS)
In order to have more sensitivity closer to 1 we subtract the widget value from 1! The title in the figure gives the ammount of regularisation used by the model

There's a bit of a lag as the model needs to fit in the background

In [32]:
from ipywidgets import HBox, Label

style = {'description_width': 'initial'}
@widgets.interact(c=widgets.FloatLogSlider(value=1,base=10, min=-5, max=0,description='1 minus c',readout=True,readout_format='.5f',style=style))
def interactive_cca(c):
    rcca=rCCA(latent_dims=latent_dims,c=1-c).fit(X_tr,Y_tr)
    test_scores=rcca.transform(X_te,Y_te)
    plot_latent_train_test(rcca.scores,test_scores,f'Pair plot of latent dimensions for train and test data c={1-c:.5f}')

interactive(children=(FloatLogSlider(value=1.0, description='1 minus c', max=0.0, min=-5.0, readout_format='.5…