In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import numpy as np
import tensorflow as tf
import pickle

from datetime import datetime

from complexity_regularized_dcca.data.xrmb import XRMBData
from complexity_regularized_dcca.algorithms.correlation import CCA

from sklearn.decomposition import PCA
from sklearn.svm import LinearSVC as SVM
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import MinMaxScaler
from complexity_regularized_dcca.algorithms.losses_metrics import MetricDict, MovingMetric
from tqdm.auto import tqdm

# Dataprovider

In [4]:
dataprovider = XRMBData(10000)

2023-09-05 10:57:44.886184: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-09-05 10:57:44.892036: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-09-05 10:57:44.892172: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-09-05 10:57:44.892690: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compil

# Raw

In [None]:
data_splits_for_acc = dataprovider.test_data

accs = []
for split in data_splits_for_acc:
    outputs_met_train, labels_met_train = MetricDict(), MetricDict()
    for data in split['train']:
        outputs_met_train.update(dict(view_0=data['nn_input_0']))
        labels_met_train.update(dict(labels=data['labels'].numpy()))

    netw_output_train = outputs_met_train.output()
    labels_train = labels_met_train.output()['labels']
    X_train = netw_output_train['view_0']

    scaler = MinMaxScaler(feature_range=(-1,1)).fit(X_train)
    X_train = scaler.transform(X_train)    

    svm_model = SVM(random_state=333)
    svm_model.fit(X_train, labels_train)

    outputs_met_val, labels_met_val = MetricDict(), MetricDict()
    for data in split['val']:
        outputs_met_val.update(dict(view_0=data['nn_input_0']))
        labels_met_val.update(dict(labels=data['labels'].numpy()))

    netw_output_val = outputs_met_val.output()
    labels_val = labels_met_val.output()['labels']

    X_val = netw_output_val['view_0']

    X_val = scaler.transform(X_val)

    predictions = svm_model.predict(X_val)
    svm_acc = accuracy_score(labels_val, predictions)
    accs.append(svm_acc)
    
print(accs)
print(np.mean(accs))

# PCA

In [6]:
pca_train_met = MetricDict()
for batch in dataprovider.training_data:
    pca_train_met.update(batch)
    
pca_train = pca_train_met.output()

data_splits_for_acc = dataprovider.test_data

results_dict = dict()

for pca_dim in tqdm([120, 110, 100, 90, 80, 70, 60, 50, 40, 30, 20, 10]):

    pca = PCA(n_components=pca_dim, random_state=333).fit(pca_train['nn_input_0'])

    accs = []
    for split in data_splits_for_acc:
        outputs_met_train, labels_met_train = MetricDict(), MetricDict()
        for data in split['train']:
            pca_transformed = pca.transform(data['nn_input_0'])
            outputs_met_train.update(dict(latent_view_0=pca_transformed))
            labels_met_train.update(dict(labels=data['labels'].numpy()))

        netw_output_train = outputs_met_train.output()
        labels_train = labels_met_train.output()['labels']
        X_train = netw_output_train['latent_view_0']

        scaler = MinMaxScaler(feature_range=(-1,1)).fit(X_train)
        X_train = scaler.transform(X_train)    

        svm_model = SVM(random_state=333)
        svm_model.fit(X_train, labels_train)

        outputs_met_val, labels_met_val = MetricDict(), MetricDict()
        for data in split['val']:
            pca_transformed = pca.transform(data['nn_input_0'])
            outputs_met_val.update(dict(latent_view_0=pca_transformed))
            labels_met_val.update(dict(labels=data['labels'].numpy()))

        netw_output_val = outputs_met_val.output()
        labels_val = labels_met_val.output()['labels']

        X_val = netw_output_val['latent_view_0']

        X_val = scaler.transform(X_val)

        predictions = svm_model.predict(X_val)
        svm_acc = accuracy_score(labels_val, predictions)
        accs.append(svm_acc)
        
    results_dict[pca_dim] = dict(accuracies=accs, mean=np.mean(accs))
        
    print(pca_dim)
    print(accs)
    print(np.mean(accs))
    print("---")

  0%|          | 0/1 [00:00<?, ?it/s]

110
[0.4377038135022234, 0.41549656380541705, 0.43364596808969386]
0.4289487817991114
---


# CCA

In [5]:
cca_train_met = MetricDict()
for batch in dataprovider.training_data:
    cca_train_met.update(batch)
    
cca_train = cca_train_met.output()
    
data_splits_for_acc = dataprovider.test_data

results_dict = dict()

for cca_dim in tqdm([120, 110, 100, 90, 80, 70, 60,]):
    results_dict[cca_dim] = dict()
    for reg in tqdm([1e-1, 1e-2, 1e-3, 1e-4, 1e-5, ]):
        try:
            B1, B2, epsilon, omega, ccor, mean_v0, mean_v1 = CCA(
                cca_train['nn_input_0'],
                cca_train['nn_input_1'],
                num_shared_dim=cca_dim,
                    r1=reg,
                    r2=reg
                )

            accs = []
            for split in data_splits_for_acc:
                outputs_met_train, labels_met_train = MetricDict(), MetricDict()
                for data in split['train']:

                    m = tf.cast(tf.shape(data['nn_input_0'])[:1], tf.float32)
                    v0_bar = tf.subtract(data['nn_input_0'], mean_v0) 
                    v1_bar = tf.subtract(data['nn_input_1'], mean_v1)
                    epsilon = B1@tf.transpose(v0_bar)
                    omega = B2@tf.transpose(v1_bar)

                    cca_transformed_0 = epsilon.numpy().T
                    cca_transformed_1 = omega.numpy().T

                    outputs_met_train.update(dict(latent_view_0=cca_transformed_0, latent_view_1=cca_transformed_1))
                    labels_met_train.update(dict(labels=data['labels'].numpy()))

                netw_output_train = outputs_met_train.output()
                labels_train = labels_met_train.output()['labels']
                X_train = netw_output_train['latent_view_0']

                scaler = MinMaxScaler(feature_range=(-1,1)).fit(X_train)
                X_train = scaler.transform(X_train)    

                svm_model = SVM(random_state=333)
                svm_model.fit(X_train, labels_train)

                outputs_met_val, labels_met_val = MetricDict(), MetricDict()
                for data in split['val']:

                    m = tf.cast(tf.shape(data['nn_input_0'])[:1], tf.float32)
                    v0_bar = tf.subtract(data['nn_input_0'], mean_v0) 
                    v1_bar = tf.subtract(data['nn_input_1'], mean_v1)
                    epsilon = B1@tf.transpose(v0_bar)
                    omega = B2@tf.transpose(v1_bar)

                    cca_transformed_0 = epsilon.numpy().T
                    cca_transformed_1 = omega.numpy().T

                    outputs_met_val.update(dict(latent_view_0=cca_transformed_0, latent_view_1=cca_transformed_1))
                    labels_met_val.update(dict(labels=data['labels'].numpy()))

                netw_output_val = outputs_met_val.output()
                labels_val = labels_met_val.output()['labels']

                X_val = netw_output_val['latent_view_0']

                X_val = scaler.transform(X_val)

                predictions = svm_model.predict(X_val)
                svm_acc = accuracy_score(labels_val, predictions)
                accs.append(svm_acc)

            results_dict[cca_dim][reg] = dict(accuracies=accs, mean=np.mean(accs))

            print(cca_dim)
            print(reg)
            print(accs)
            print(np.mean(accs))
            print("---")
        except:
            pass

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

2023-09-05 10:57:52.076066: I tensorflow/stream_executor/cuda/cuda_blas.cc:1760] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.
2023-09-05 10:57:52.101850: I tensorflow/core/util/cuda_solvers.cc:180] Creating CudaSolver handles for stream 0x55a186b682c0


110
0.1
[0.43905134078965097, 0.41487670125320036, 0.4335651142733937]
0.42916438543874835
---
