<a href="https://colab.research.google.com/github/jameschapman19/cca_zoo/blob/master/interactive_cca.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Visualizing the effect of regularisation on CCA using IPython Widgets!
Learnt how to use widgets in IPython and thought it would be nice to demonstrate the effect of l2 regularisation

## Install cca-zoo and import packages

In [59]:
!pip install cca-zoo



In [102]:
import ipywidgets as widgets
import seaborn as sns
from cca_zoo.models import rCCA
from cca_zoo.data import generate_covariance_data
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
sns.set(font_scale=1)

## Plotting Helpers

In [109]:
# Plotting Helpers
def plot_latent_train_test(train_scores, test_scores, title=None):
    train_data = pd.DataFrame(
        {'phase': np.asarray(['train'] * train_scores[0].shape[0]).astype(str)})
    x_vars=[f'X dimension {f}' for f in range(1,train_scores[0].shape[1]+1)]
    y_vars=[f'Y dimension {f}' for f in range(1,train_scores[1].shape[1]+1)]
    train_data[x_vars] = train_scores[0]
    train_data[y_vars] = train_scores[1]
    test_data = pd.DataFrame(
        {'phase': np.asarray(['test'] * test_scores[0].shape[0]).astype(str)})
    test_data[x_vars] = test_scores[0]
    test_data[y_vars] = test_scores[1]
    data = pd.concat([train_data, test_data], axis=0)
    cca_pp = sns.pairplot(data, hue='phase',x_vars=x_vars,y_vars=y_vars, corner=True)
    cca_pp.fig.set_size_inches(10,5)
    if title:
      cca_pp.fig.suptitle(title)
    latent_dims=len(x_vars)
    train_corrs=np.diag(np.corrcoef(train_scores[0],train_scores[1],rowvar=False)[:latent_dims,latent_dims:])
    test_corrs=np.diag(np.corrcoef(test_scores[0],test_scores[1],rowvar=False)[:latent_dims,latent_dims:])
    train_corr_data=pd.DataFrame({'correlation':train_corrs,'dimension':np.arange(latent_dims)+1,'phase': np.asarray(['train'] * latent_dims).astype(str)})
    test_corr_data=pd.DataFrame({'correlation':test_corrs,'dimension':np.arange(latent_dims)+1,'phase': np.asarray(['test'] * latent_dims).astype(str)})
    corr_data = pd.concat([train_corr_data, test_corr_data], axis=0)
    # setting the dimensions of the plot
    fig2, ax = plt.subplots(figsize=(cca_pp.fig.get_size_inches()[0],cca_pp.fig.get_size_inches()[1]))
    cca_bp=sns.barplot(x="dimension", y="correlation", hue="phase", data=corr_data,ax=ax)

## Make Data

### Choose the parameters of the data

In [110]:
# @markdown Execute this cell to choose parameters!

style = {'description_width': 'initial'}

N_train= widgets.IntSlider(value=100,min=20,max=500,description='Train Samples',style=style,continuous_update=False)
N_test= widgets.IntSlider(value=100,min=20,max=500,description='Test Samples',style=style,continuous_update=False)
X_features=widgets.IntSlider(value=100,min=20,max=500,description='X_features',style=style,continuous_update=False)
Y_features=widgets.IntSlider(value=100,min=20,max=500,description='Y_features',style=style,continuous_update=False)
latent_dims=widgets.IntSlider(value=1,min=1,max=5,description='Latent Dimensions',style=style,continuous_update=False)

def generate_data(N_train,N_test,X_features,Y_features,latent_dims):
    (X,Y),_=generate_covariance_data(N_train+N_test,view_features=[X_features,Y_features],latent_dims=latent_dims,correlation=1,decay=0.9)
    X_tr,X_te,Y_tr,Y_te=train_test_split(X,Y,train_size=N_train)
    return (X_tr,X_te,Y_tr,Y_te)

out=widgets.interactive(generate_data, N_train=N_train,N_test=N_test,X_features=X_features,Y_features=Y_features,latent_dims=latent_dims)
display(out)

interactive(children=(IntSlider(value=100, continuous_update=False, description='Train Samples', max=500, min=…

## Change the ammount of regularisation from 0 (CCA) to 1 (PLS)
In order to have more sensitivity closer to 1 we subtract the widget value from 1! The title in the figure gives the ammount of regularisation used by the model

The model and plot will update when the mouse is released. There's a bit of a lag as the model needs to fit in the background!

In [111]:
# @markdown Execute this cell to change model regularisation

X_tr,X_te,Y_tr,Y_te=out.result[0],out.result[1],out.result[2],out.result[3]

style = {'description_width': 'initial'}
c=widgets.FloatLogSlider(value=1-1e-3,base=10, min=-5, max=0,description='1 minus c',readout=True,readout_format='.5f',style=style,continuous_update=False)

def interactive_cca(c):
    rcca=rCCA(latent_dims=latent_dims.value,c=1-c).fit(X_tr,Y_tr)
    test_scores=rcca.transform(X_te,Y_te)
    plot_latent_train_test(rcca.scores,test_scores,f'Pair plot of latent dimensions for train and test data c={1-c:.5f}')

plot_widget=widgets.interactive(interactive_cca, c=c)
display(plot_widget)

interactive(children=(FloatLogSlider(value=0.999, continuous_update=False, description='1 minus c', max=0.0, m…