# Preliminaries

In [5]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import numpy as np
import sys
from sklearn.utils import resample

from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.metrics import r2_score

# sys.path.append("../LinCFA")
# from LinCFA import LinCFA
# from LinCFA_fast import LinCFA_fast
# 
# sys.path.append("../NonLinCFA")
# from NonLinCFA import NonLinCFA
# 
# sys.path.append("../GenLinCFA")
# from GenLinCFA import GenLinCFA
# from GenLinCFA_fast import GenLinCFA_fast


In [3]:
def compute_CI(list,n):
    print(f'{np.mean(list)} +- {1.96*np.std(list)/np.sqrt(n)}')

In [14]:
from sklearn.base import TransformerMixin, BaseEstimator
from sklearn import utils
from sklearn.preprocessing import KernelCenterer, scale
from sklearn.metrics.pairwise import pairwise_kernels
from scipy import linalg
from scipy.sparse.linalg import eigsh as ssl_eigsh
from scipy.io import arff
from sklearn.impute import SimpleImputer

class spca(BaseEstimator, TransformerMixin):
    
    def __init__(self, num_components, kernel="linear", eigen_solver='auto', 
                 max_iterations=None, gamma=0, degree=3, coef0=1, alpha=1.0, 
                 tolerance=0, fit_inverse_transform=False):
        
        self._num_components = num_components
        self._gamma = gamma
        self._tolerance = tolerance
        self._fit_inverse_transform = fit_inverse_transform
        self._max_iterations = max_iterations
        self._degree = degree
        self._kernel = kernel
        self._eigen_solver = eigen_solver
        self._coef0 = coef0
        self._centerer = KernelCenterer()
        self._alpha = alpha
        self._alphas = []
        self._lambdas = []
        
        
    def _get_kernel(self, X, Y=None):
        # Returns a kernel matrix K such that K_{i, j} is the kernel between the ith and jth vectors 
        # of the given matrix X, if Y is None. 
        
        # If Y is not None, then K_{i, j} is the kernel between the ith array from X and the jth array from Y.
        
        # valid kernels are 'linear, rbf, poly, sigmoid, precomputed'
        
        args = {"gamma": self._gamma, "degree": self._degree, "coef0": self._coef0}
        
        return pairwise_kernels(X, Y, metric=self._kernel, n_jobs=-1, filter_params=True, **args)
    
    
    
    def _fit(self, X, Y):
        
        # calculate kernel matrix of the labels Y and centre it and call it K (=H.L.H)
        K = self._centerer.fit_transform(self._get_kernel(Y))
        
        # deciding on the number of components to use
        if self._num_components is not None:
            num_components = min(K.shape[0], self._num_components)
        else:
            num_components = self.K.shape[0]
        
        # Scale X
        # scaled_X = scale(X)
        
        # calculate the eigen values and eigen vectors for X^T.K.X
        Q = (X.T).dot(K).dot(X)
        
        # If n_components is much less than the number of training samples, 
        # arpack may be more efficient than the dense eigensolver.
        if (self._eigen_solver=='auto'):
            if (Q.shape[0]/num_components) > 20:
                eigen_solver = 'arpack'
            else:
                eigen_solver = 'dense'
        else:
            eigen_solver = self._eigen_solver
        
        if eigen_solver == 'dense':
            # Return the eigenvalues (in ascending order) and eigenvectors of a Hermitian or symmetric matrix.
            self._lambdas, self._alphas = linalg.eigh(Q, eigvals=(Q.shape[0] - num_components, Q.shape[0] - 1))
            # argument eigvals = Indexes of the smallest and largest (in ascending order) eigenvalues
        
        elif eigen_solver == 'arpack':
            # deprecated :: self._lambdas, self._alphas = utils.arpack.eigsh(A=Q, num_components, which="LA", tol=self._tolerance)
            self._lambdas, self._alphas = ssl_eigsh(A=Q, k=num_components, which="LA", tol=self._tolerance)
            
        indices = self._lambdas.argsort()[::-1]
        
        self._lambdas = self._lambdas[indices]
        self._lambdas = self._lambdas[self._lambdas > 0]  # selecting values only for non zero eigen values
        
        self._alphas = self._alphas[:, indices]
        #return self._alphas
        self._alphas = self._alphas[:, self._lambdas > 0]  # selecting values only for non zero eigen values
        
        self.X_fit = X

        
    def _transform(self):
        return self.X_fit.dot(self._alphas)
        
        
    def transform(self, X):
        return X.dot(self._alphas)
        
        
    def fit(self, X, Y):
        self._fit(X,Y)
        return
        
        
    def fit_and_transform(self, X, Y):
        self.fit(X, Y)
        return self._transform()
    

KeyboardInterrupt: 

# Data

In [4]:
from keras.datasets import mnist

(train_X, train_y), (test_X, test_y) = mnist.load_data()

df_train = pd.DataFrame(train_X.reshape(train_X.shape[0],-1))
df_train["mean_std"] = train_y.reshape(train_y.shape[0],-1)
df_train = df_train[(df_train.mean_std==7) | (df_train.mean_std==1)].reset_index(drop=True)

df_test = pd.DataFrame(test_X.reshape(test_X.shape[0],-1))
df_test["mean_std"] = test_y.reshape(test_y.shape[0],-1)
df_test = df_test[(df_test.mean_std==7) | (df_test.mean_std==1)].reset_index(drop=True)

print(df_train.shape, df_test.shape)


(13007, 785) (2163, 785)


In [6]:
2163+13007

15170

In [5]:
df_trainVal_withTar = df_train
df_test_withTar = df_test
df_trainVal = df_train.iloc[:,:-1]
df_test = df_test.iloc[:,:-1]
target_df_trainVal = df_train.iloc[:,-1]
target_df_test = df_test.iloc[:,-1]


In [115]:
pd.read_pickle("/Users/Paolo/Downloads/full_statistics.pkl")

[vo      -5.213126e-07
 r        4.562266e+01
 u_200    1.191977e+01
 u_850   -1.760111e+00
 v_200   -1.733425e-01
 v_850    1.947758e-01
 sst      2.991964e+02
 tcwv     3.505484e+01
 tclw     5.413118e-02
 tciw     2.203104e-02
 shear    2.212577e+01
 dtype: float64,
 vo        0.000016
 r        24.907686
 u_200    20.080151
 u_850     6.071662
 v_200     9.645549
 v_850     3.694273
 sst       3.267417
 tcwv     14.296923
 tclw      0.056405
 tciw      0.043020
 shear    14.392576
 dtype: float64]

In [117]:
d1 = pd.read_csv("/Users/paolo/Documents/TC/Paolo/data/mutiple_regions/train_dailymeans_Sindian.csv")
d2 = pd.read_csv("/Users/paolo/Documents/TC/Paolo/data/mutiple_regions/train_dailymeans_Nindian.csv")
d3 = pd.read_csv("/Users/paolo/Documents/TC/Paolo/data/mutiple_regions/train_dailymeans_Australia.csv")
d4 = pd.read_csv("/Users/paolo/Documents/TC/Paolo/data/mutiple_regions/train_dailymeans_Natlantic.csv")
d5 = pd.read_csv("/Users/paolo/Documents/TC/Paolo/data/mutiple_regions/train_dailymeans_Spacific.csv")
d6 = pd.read_csv("/Users/paolo/Documents/TC/Paolo/data/mutiple_regions/train_dailymeans_Epacific.csv")
d7 = pd.read_csv("/Users/paolo/Documents/TC/Paolo/data/mutiple_regions/train_dailymeans_NWpacific.csv")


In [None]:
d = pd.concat((d1,d2,d3,d4,d5,d6,d7),axis=0).reset_index(drop=True)
d.head()

# Classification

## GenLinCFA

In [45]:
from sklearn.svm import SVC
for eps in [0.6,0.7,0.8,0.9,1]:
    res = []
    res_SVR = []
    nums = []
    for curr_seed in [0,1,2,3,4]:
        curr_df_trainVal = df_trainVal[np.random.default_rng(seed=curr_seed).permutation(df_trainVal.columns.values)]
        curr_df_test = df_test[np.random.default_rng(seed=curr_seed).permutation(df_test.columns.values)]
        curr_df_trainVal_withTar = pd.concat((curr_df_trainVal,target_df_trainVal), axis=1)
        
        output = GenLinCFA_fast(curr_df_trainVal_withTar,'mean_std', eps, -5 , 0, 1).compute_clusters()
        
        aggregate_trainVal = pd.DataFrame()
        aggregate_test = pd.DataFrame()
        for i in range(len(output)):
            aggregate_trainVal[str(i)] = curr_df_trainVal_withTar[output[i]].mean(axis=1)
            aggregate_trainVal = aggregate_trainVal.copy()
            aggregate_test[str(i)] = curr_df_test[output[i]].mean(axis=1)
            aggregate_test = aggregate_test.copy()
        print(f'Number of aggregated features: {len(output)}\n')
        mod = LogisticRegression().fit(aggregate_trainVal, df_trainVal_withTar.mean_std)
        mod_svr = SVC().fit(aggregate_trainVal, df_trainVal_withTar.mean_std)
        score = mod.score(aggregate_test, df_test_withTar.mean_std)
        score_SVR = mod_svr.score(aggregate_test, df_test_withTar.mean_std)

        print(score)
        print(score_SVR)
        res.append(score)
        res_SVR.append(score_SVR)
        nums.append(len(output))
    compute_CI(res,5)
    compute_CI(nums,5)
    compute_CI(res_SVR,5)

479
116
1
11
44
1
1
9
1
1
56
1
1
1
4
1
1
1
1
1
1
1
1
1
1
1
1
2
1
1
4
1
1
3
1
1
1
1
1
1
1
1
1
3
3
1
1
1
1
1
1
1
1
1
1
1
2
1
1
1
1
Number of aggregated features: 61



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.9870550161812298
0.9921405455386038
518
30
16
4
120
1
6
1
1
4
1
1
1
2
1
1
2
1
1
1
7
1
1
1
1
8
2
1
1
1
1
1
4
1
1
1
4
1
2
1
1
1
1
1
1
3
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
Number of aggregated features: 68



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.9879796578825705
0.9930651872399445
449
36
1
2
1
18
149
1
5
1
1
1
7
26
7
1
1
1
6
4
1
2
1
22
1
1
2
1
1
1
1
1
1
1
2
1
1
1
1
1
1
1
2
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
Number of aggregated features: 61



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.989828941285252
0.9926028663892742
455
1
225
4
2
5
1
18
3
1
1
5
7
1
1
1
3
1
2
1
1
2
1
1
2
1
1
1
1
1
1
1
1
1
1
1
1
2
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
Number of aggregated features: 63



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.9865926953305594
0.9935275080906149
456
57
38
5
46
6
1
1
5
1
1
1
1
1
1
1
1
1
7
8
1
1
1
1
1
94
1
2
1
1
1
1
1
2
3
1
1
1
1
1
1
1
1
1
2
1
1
2
1
2
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
Number of aggregated features: 65



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.989828941285252
0.9926028663892742
0.9882570503929727 +- 0.0011911636202054277
63.6 +- 2.3257198455531993
0.9927877947295423 +- 0.00041326746799709197
489
118
1
10
2
3
45
10
1
1
1
1
1
1
9
1
2
1
1
1
2
1
1
1
4
53
1
2
1
1
1
1
2
1
2
1
1
1
1
1
1
3
1
1
Number of aggregated features: 44



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.9907535829865927
0.9926028663892742
539
49
12
1
1
7
14
26
1
1
2
3
1
4
1
2
1
3
1
1
1
1
91
1
1
1
1
1
1
2
1
1
1
1
1
1
1
1
1
1
1
1
1
Number of aggregated features: 43



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.9865926953305594
0.9916782246879334
480
155
1
69
1
1
5
4
1
7
1
1
17
4
3
2
1
7
2
2
1
1
1
1
1
2
1
1
1
1
1
1
1
1
1
1
2
1
Number of aggregated features: 38



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.9870550161812298
0.9912159038372631
479
69
189
4
1
1
7
1
1
1
1
1
4
1
1
1
2
1
2
1
2
1
1
1
1
1
1
1
1
1
1
1
1
1
1
Number of aggregated features: 35



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.9824318076745261
0.9921405455386038
484
50
56
57
1
2
1
4
1
1
2
1
1
1
8
1
1
1
1
1
1
1
1
1
1
80
2
1
1
1
1
1
3
1
1
2
1
2
1
1
1
1
1
1
1
Number of aggregated features: 45



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.9865926953305594
0.9902912621359223
0.9866851595006935 +- 0.0023123669199566306
41.0 +- 3.3721115046807095
0.9915857605177993 +- 0.000697204859193266
514
119
1
19
13
1
1
1
3
2
94
4
2
2
1
1
1
1
1
1
1
1
Number of aggregated features: 22



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.9833564493758669
0.9902912621359223
527
70
43
1
2
88
4
2
6
1
1
1
1
1
2
1
1
1
29
1
1
Number of aggregated features: 21



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.9833564493758669
0.9842810910772076
525
17
109
40
1
4
3
4
5
4
60
4
1
1
2
1
1
1
1
Number of aggregated features: 19



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.984743411927878
0.9902912621359223
508
82
41
65
3
1
7
1
3
1
1
1
2
2
59
1
1
1
1
1
1
1
Number of aggregated features: 22



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.9819694868238558
0.9889042995839112
505
81
70
1
3
1
1
4
4
2
25
1
1
2
2
73
3
1
1
1
1
1
Number of aggregated features: 22



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.9875173370319001
0.9879796578825705
0.9841886269070734 +- 0.0016490913587063557
21.2 +- 1.0222109371357755
0.9883495145631068 +- 0.0019417820924655335
546
108
3
59
1
66
1
Number of aggregated features: 7

0.9824318076745261
0.9838187702265372
556
91
120
2
5
1
2
Number of aggregated features: 8

0.984743411927878
0.9754969949144706
620
33
12
37
1
5
73
1
1
1
Number of aggregated features: 10



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.981044845122515
0.9865926953305594
556
113
10
90
2
1
6
Number of aggregated features: 8

0.9819694868238558
0.9787332408691632
557
92
5
99
1
4
1
24
1
Number of aggregated features: 9

0.9782709200184928
0.9805825242718447
0.9816920943134535 +- 0.0018410661632497609
8.4 +- 0.8938975332777239
0.9810448451225151 +- 0.0033904986029019432
578
94
1
Number of aggregated features: 4

0.9764216366158114
0.9773462783171522
583
77
Number of aggregated features: 3

0.9722607489597781
0.9717984281091078
602
11
73
Number of aggregated features: 4

0.9704114655570967
0.9736477115117892
579
79
Number of aggregated features: 3

0.9713361072584373
0.9648636153490523
594
72
1
Number of aggregated features: 4

0.970873786407767
0.970873786407767
0.9722607489597781 +- 0.0019007538995963872
3.6 +- 0.42941448508405017
0.9717059639389737 +- 0.0035734908606162253


# Regression

In [31]:
from keras.datasets import mnist
import keras
from keras import layers
from keras.callbacks import EarlyStopping
from sklearn.base import TransformerMixin, BaseEstimator
from sklearn import utils
from sklearn.preprocessing import KernelCenterer, scale
from sklearn.metrics.pairwise import pairwise_kernels
from scipy import linalg
from scipy.sparse.linalg import eigsh as ssl_eigsh
from scipy.io import arff
from sklearn.impute import SimpleImputer

(train_X, train_y), (test_X, test_y) = mnist.load_data()

df_train = pd.DataFrame(train_X.reshape(train_X.shape[0],-1))
df_train["mean_std"] = train_y.reshape(train_y.shape[0],-1)
df_train = df_train[(df_train.mean_std==7) | (df_train.mean_std==1)].reset_index(drop=True)

df_test = pd.DataFrame(test_X.reshape(test_X.shape[0],-1))
df_test["mean_std"] = test_y.reshape(test_y.shape[0],-1)
df_test = df_test[(df_test.mean_std==7) | (df_test.mean_std==1)].reset_index(drop=True)

print(df_train.shape, df_test.shape)

df_trainVal_withTar = df_train.iloc[:,:-1]
df_trainVal_withTar["mean_std"] = df_train.iloc[:,267]
df_trainVal_withTar = df_trainVal_withTar.iloc[:,list(range(267))+list(range(268,785))]

df_test_withTar = df_test.iloc[:,:-1]
df_test_withTar["mean_std"] = df_test.iloc[:,267]
df_test_withTar = df_test_withTar.iloc[:,list(range(267))+list(range(268,785))]

df_trainVal = df_trainVal_withTar.iloc[:,:-1]
df_test = df_test_withTar.iloc[:,:-1]
target_df_trainVal = df_trainVal_withTar.iloc[:,-1]
target_df_test = df_test_withTar.iloc[:,-1]

df_test = df_test_withTar.iloc[:,:-1]
curr_df_trainVal_withTar = resample(df_trainVal_withTar, random_state=0)
curr_df_trainVal = curr_df_trainVal_withTar.iloc[:,:-1]


(13007, 785) (2163, 785)


In [32]:
i = 5
input_tabular = keras.Input(shape=(curr_df_trainVal.shape[1]))
encoded = layers.Dense(i*4, activation='relu')(input_tabular)
encoded = layers.Dense(i*2, activation='relu')(encoded)
encoded = layers.Dense(i, activation='relu')(encoded)

decoded = layers.Dense(i*2, activation='relu')(encoded)
decoded = layers.Dense(i*4, activation='relu')(decoded)
decoded = layers.Dense(curr_df_trainVal.shape[1], activation='linear')(decoded)

autoencoder = keras.Model(input_tabular, decoded)
autoencoder.compile(optimizer='adam', loss='mse')
autoencoder.summary()

monitor = EarlyStopping(monitor='val_loss', min_delta=1e-5, patience=20, 
                verbose=1, mode='auto', restore_best_weights=True)

autoencoder.fit(curr_df_trainVal, curr_df_trainVal,
                epochs=1000,
                shuffle=True,
                verbose=0,
                callbacks=[monitor],
                validation_data=(df_test, df_test))
        
encoder = keras.Model(inputs=input_tabular, outputs=encoded)
        
trainVal_reduced = encoder.predict(curr_df_trainVal)
test_reduced = encoder.predict(df_test)

ValueError: Shapes used to initialize variables must be fully-defined (no `None` dimensions). Received: shape=(None, 20) for variable path='dense_30/kernel'

In [102]:
# maxlen = [0,0]
# for col in df_train.iloc[:,:-1].columns:
#     col_len = df_train[df_train[col]>0].shape[0]
#     print(col_len)
#     if col_len > maxlen[1]: maxlen = [col,col_len]
# maxlen


In [111]:
from sklearn.svm import SVR
for eps in [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8]:
    res = []
    res_SVR = []
    nums = []
    for curr_seed in [0,1,2,3,4]:
        curr_df_trainVal = df_trainVal[np.random.default_rng(seed=curr_seed).permutation(df_trainVal.columns.values)]
        curr_df_test = df_test[np.random.default_rng(seed=curr_seed).permutation(df_test.columns.values)]
        curr_df_trainVal_withTar = pd.concat((curr_df_trainVal,target_df_trainVal), axis=1)
        
        output = GenLinCFA_fast(curr_df_trainVal_withTar,'mean_std', eps, -5 , 0, 1).compute_clusters()
        
        aggregate_trainVal = pd.DataFrame()
        aggregate_test = pd.DataFrame()
        for i in range(len(output)):
            aggregate_trainVal[str(i)] = curr_df_trainVal_withTar[output[i]].mean(axis=1)
            aggregate_trainVal = aggregate_trainVal.copy()
            aggregate_test[str(i)] = curr_df_test[output[i]].mean(axis=1)
            aggregate_test = aggregate_test.copy()
        print(f'Number of aggregated features: {len(output)}\n')
        mod = LinearRegression().fit(aggregate_trainVal, df_trainVal_withTar.mean_std)
        mod_svr = SVR().fit(aggregate_trainVal, df_trainVal_withTar.mean_std)
        score = mod.score(aggregate_test, df_test_withTar.mean_std)
        score_SVR = mod_svr.score(aggregate_test, df_test_withTar.mean_std)

        print(score)
        print(score_SVR)
        res.append(score)
        res_SVR.append(score_SVR)
        nums.append(len(output))
    compute_CI(res,5)
    compute_CI(nums,5)
    compute_CI(res_SVR,5)

440
1
171
1
1
1
1
1
1
1
1
1
1
1
1
2
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
2
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
3
2
1
1
1
1
1
1
1
1
1
1
1
2
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
Number of aggregated features: 168

0.9021305620166494
0.759123444351197
411
1
159
1
1
1
1
1
1
2
1
1
31
1
1
1
1
1
1
1
1
1
1
1
1
2
1
1
1
1
1
1
2
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
2
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
2
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
2
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
2
1
1
1
1
1
1
1
1
Number of aggregated features: 178

0.9021335176326556
0.7530087195927888
413
1
1
1
1
109
1
3
75
2
5
1
1
8
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
2
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
3
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1

In [None]:
0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8

# 0.902399783990707 +- 0.00026129172487060666
# 175.2 +- 4.551253365832317
# 0.7588400106943157 +- 0.00280125596003041

0.9015304967695931 +- 0.00016911121510500277
59.0 +- 5.787810639611492
0.8669230556416142 +- 0.002800677394456654

0.8947228729341816 +- 0.005723824292634713
22.2 +- 2.8484134531349197
0.9160531216217673 +- 0.0025808879181222497

0.8604212095072687 +- 0.0015158906230385997
10.8 +- 1.2882434552521507
0.9344645531633251 +- 0.0030438408214308005

0.8104829957128089 +- 0.08290247821462289
6.8 +- 0.8588289701681003
0.900391788124181 +- 0.06735449737845821

0.7986658784345281 +- 0.04236079044629013
6.0 +- 1.2396128427860047
0.8721561370383373 +- 0.054038517514309756

# 0.5486966156734959 +- 0.08212096870608812
# 3.2 +- 0.350615458871967
# 0.5682436022602569 +- 0.12512656130296435
# 
# 0.4447092999440092 +- 0.015067795188246724
# 2.2 +- 0.350615458871967
# 0.41163181311176256 +- 0.013623155248732071


In [113]:
%%time
from NonLinCFA_fast import NonLinCFA_fast

for eps in [0.1,0.01,0.005,0.001,0.0001]:
    res = []
    res_SVR = []
    nums = []
    for curr_seed in [0,1,2,3,4]:
        curr_df_trainVal = df_trainVal[np.random.default_rng(seed=curr_seed).permutation(df_trainVal.columns.values)]
        curr_df_test = df_test[np.random.default_rng(seed=curr_seed).permutation(df_test.columns.values)]
        curr_df_trainVal_withTar = pd.concat((curr_df_trainVal,target_df_trainVal), axis=1)
        
        output = NonLinCFA_fast(curr_df_trainVal_withTar,'mean_std', eps, -5 , 0).compute_clusters()
        
        aggregate_trainVal = pd.DataFrame()
        aggregate_test = pd.DataFrame()
        for i in range(len(output)):
            aggregate_trainVal[str(i)] = curr_df_trainVal_withTar[output[i]].mean(axis=1)
            aggregate_trainVal = aggregate_trainVal.copy()
            aggregate_test[str(i)] = curr_df_test[output[i]].mean(axis=1)
            aggregate_test = aggregate_test.copy()
        print(f'Number of aggregated features: {len(output)}\n')
        mod = LinearRegression().fit(aggregate_trainVal, df_trainVal_withTar.mean_std)
        mod_svr = SVR().fit(aggregate_trainVal, df_trainVal_withTar.mean_std)
        score = mod.score(aggregate_test, df_test_withTar.mean_std)
        score_SVR = mod_svr.score(aggregate_test, df_test_withTar.mean_std)

        print(score)
        print(score_SVR)
        res.append(score)
        res_SVR.append(score_SVR)
        nums.append(len(output))
    compute_CI(res,5)
    compute_CI(nums,5)
    compute_CI(res_SVR,5)

772
10
1
Number of aggregated features: 3

0.6354193357206055
0.7147030297530053
773
9
1
Number of aggregated features: 3

0.6898261098407782
0.8339661283357862
774
Number of aggregated features: 2

0.6782428535111931
0.8376762277265162
775
Number of aggregated features: 2

0.7234870728227653
0.8708485739412084
772
9
Number of aggregated features: 3

0.7133674615798213
0.8642304962101238
0.6880685666950326 +- 0.027062425668565235
2.6 +- 0.42941448508405017
0.8242848911933279 +- 0.04965429589584592
650
27
52
23
10
5
7
4
2
1
Number of aggregated features: 11

0.864310617124382
0.901672883915581
647
61
32
8
10
8
4
4
2
5
1
1
Number of aggregated features: 12

0.7995517145923081
0.8877515738606045
651
32
11
53
8
2
7
4
12
1
1
1
Number of aggregated features: 12

0.7329664882073447
0.783918583585497
652
26
31
43
10
8
7
3
Number of aggregated features: 9

0.778635008400387
0.8451285798781061
656
24
28
43
8
16
3
3
Number of aggregated features: 9

0.6816301872932258
0.7225311433932233
0.7714188

In [None]:
[0.1,0.01,0.005,0.001,0.0001]

0.6880685666950326 +- 0.027062425668565235
2.6 +- 0.42941448508405017
0.8242848911933279 +- 0.04965429589584592

0.7714188031235295 +- 0.05405195235699166
10.6 +- 1.188994869627283
0.8282005529266024 +- 0.05861413675667858

0.8056329361025257 +- 0.0407864081979483
14.4 +- 0.8938975332777239
0.8598201796514633 +- 0.034731215601400314

0.8361514418564278 +- 0.020101263543721878
28.2 +- 1.0222109371357755
0.8676118707312087 +- 0.013462057893111723

0.8591241402558382 +- 0.026475728213295496
44.4 +- 1.3118829216054302
0.8786930481063807 +- 0.01315880996027271

In [18]:
from sklearn.svm import SVR
import warnings
warnings.simplefilter('ignore')

res = []
res_svr = []

nums = []
for curr_seed in [0,1,2,3,4]:
    print(f'seed={curr_seed}')
    curr_df_trainVal = df_trainVal[np.random.default_rng(seed=curr_seed).permutation(df_trainVal.columns.values)]
    curr_df_test = df_test[np.random.default_rng(seed=curr_seed).permutation(df_test.columns.values)]
    curr_df_trainVal_withTar = pd.concat((curr_df_trainVal,target_df_trainVal), axis=1)
    
    output = LinCFA(curr_df_trainVal_withTar,'mean_std', 0, 0).compute_clusters()
    
    aggregate_trainVal = pd.DataFrame()
    aggregate_test = pd.DataFrame()
    for i in range(len(output)):
        aggregate_trainVal[str(i)] = curr_df_trainVal_withTar[output[i]].mean(axis=1)
        aggregate_trainVal = aggregate_trainVal.copy()
        aggregate_test[str(i)] = curr_df_test[output[i]].mean(axis=1)
        aggregate_test = aggregate_test.copy()
    print(f'Number of aggregated features: {len(output)}\n')
    mod = LinearRegression().fit(aggregate_trainVal, df_trainVal_withTar.mean_std)
    mod_svr = SVR().fit(aggregate_trainVal, df_trainVal_withTar.mean_std)

    score = mod.score(aggregate_test, df_test_withTar.mean_std)
    score_svr = mod_svr.score(aggregate_test, df_test_withTar.mean_std)

    print(score)
    res.append(score)
    res_svr.append(score_svr)
    nums.append(len(output))
compute_CI(res,5)
compute_CI(nums,5)
compute_CI(res_svr,5)


seed=0
Number of aggregated features: 192

0.8744562946210707
seed=1
Number of aggregated features: 198

0.8850246356861624
seed=2
Number of aggregated features: 201

0.8757932005383724
seed=3
Number of aggregated features: 198

0.8893330232260995
seed=4
Number of aggregated features: 197

0.8776490472657461
0.8804512402674902 +- 0.0050397695714634
197.2 +- 2.5645309902592324
0.8997613927705655 +- 0.0013114313219560524


In [24]:
from sklearn.decomposition import PCA
from sklearn.utils import resample

best_scores = []
best_scores_svr = []
best_nums = []
for curr_seed in [0,1,2,3,4]: 
    best_score = 0
    best_score_svr = 0
    best_num = 0
    for dim in range(50):
        curr_df_trainVal_withTar = resample(df_trainVal_withTar, random_state=curr_seed)
        curr_df_trainVal = curr_df_trainVal_withTar.iloc[:,:-1]
        dimRedMethod = PCA(n_components=dim+1)
        trainVal_reduced = pd.DataFrame(dimRedMethod.fit_transform(curr_df_trainVal.values,curr_df_trainVal_withTar.mean_std.values.reshape(-1, 1)))
        test_reduced = pd.DataFrame(dimRedMethod.transform(df_test))
        print(test_reduced.shape)
        mod = LinearRegression().fit(trainVal_reduced, curr_df_trainVal_withTar.mean_std)
        mod_svr = SVR().fit(trainVal_reduced, curr_df_trainVal_withTar.mean_std)

        actual_score = mod.score(test_reduced, df_test_withTar.mean_std)
        actual_score_svr = mod_svr.score(test_reduced, df_test_withTar.mean_std)
        print(actual_score)
        if actual_score> best_score:
            best_score=actual_score
            best_score_svr=actual_score_svr
            best_num = test_reduced.shape[1]
    print(best_score)
    print(best_score_svr)
    print(best_num)
    best_scores.append(best_score)
    best_scores_svr.append(best_score_svr)
    best_nums.append(best_num)
compute_CI(best_scores,5)
compute_CI(best_scores_svr,5)
compute_CI(best_nums,5)


(2163, 1)
0.0036208801167254556
(2163, 2)
0.02280251577339032
(2163, 3)
0.1061450605052906
(2163, 4)
0.15486722644626505
(2163, 5)
0.3184544205214055
(2163, 6)
0.407525991701133
(2163, 7)
0.40854577203418696
(2163, 8)
0.41235083262471395
(2163, 9)
0.4353132834222285
(2163, 10)
0.48437694540931253
(2163, 11)
0.4878386785392529
(2163, 12)
0.4913803855444322
(2163, 13)
0.5462946693613457
(2163, 14)
0.5623757855954482
(2163, 15)
0.5733076004905181
(2163, 16)
0.5812824339886478
(2163, 17)
0.6024241338739609
(2163, 18)
0.6294917200363526
(2163, 19)
0.6462986238877895
(2163, 20)
0.6462097583855737
(2163, 21)
0.6704020557822601
(2163, 22)
0.6788908911915653
(2163, 23)
0.6793931200506145
(2163, 24)
0.7038118834206997
(2163, 25)
0.707319948675639
(2163, 26)
0.7274506210640749
(2163, 27)
0.7275337569497554
(2163, 28)
0.72720122543671
(2163, 29)
0.7467470432259058
(2163, 30)
0.7489209207939793
(2163, 31)
0.7562344006117747
(2163, 32)
0.7741966666793012
(2163, 33)
0.7765257870775882
(2163, 34)
0.77

In [25]:
from sklearn.decomposition import PCA
from sklearn.utils import resample

best_scores = []
best_scores_svr = []
best_nums = []
for curr_seed in [0,1,2,3,4]: 
    best_score = 0
    best_score_svr = 0
    best_num = 0
    for dim in [0.95]:
        curr_df_trainVal_withTar = resample(df_trainVal_withTar, random_state=curr_seed)
        curr_df_trainVal = curr_df_trainVal_withTar.iloc[:,:-1]
        dimRedMethod = PCA(n_components=dim)
        trainVal_reduced = pd.DataFrame(dimRedMethod.fit_transform(curr_df_trainVal.values,curr_df_trainVal_withTar.mean_std.values.reshape(-1, 1)))
        test_reduced = pd.DataFrame(dimRedMethod.transform(df_test))
        print(test_reduced.shape)
        mod = LinearRegression().fit(trainVal_reduced, curr_df_trainVal_withTar.mean_std)
        mod_svr = SVR().fit(trainVal_reduced, curr_df_trainVal_withTar.mean_std)

        actual_score = mod.score(test_reduced, df_test_withTar.mean_std)
        actual_score_svr = mod_svr.score(test_reduced, df_test_withTar.mean_std)
        print(actual_score)
        if actual_score> best_score:
            best_score=actual_score
            best_score_svr=actual_score_svr
            best_num = test_reduced.shape[1]
    print(best_score)
    print(best_score_svr)
    print(best_num)
    best_scores.append(best_score)
    best_scores_svr.append(best_score_svr)
    best_nums.append(best_num)
compute_CI(best_scores,5)
compute_CI(best_scores_svr,5)
compute_CI(best_nums,5)


(2163, 112)
0.8628373094350781
0.8628373094350781
0.6693054995598589
112
(2163, 112)
0.8644690390420424
0.8644690390420424
0.6756108098085217
112
(2163, 112)
0.8605744440572382
0.8605744440572382
0.6709490128597425
112
(2163, 113)
0.8624585450836103
0.8624585450836103
0.6679478409542273
113
(2163, 113)
0.8660331252197431
0.8660331252197431
0.6672425975489029
113
0.8632744925675425 +- 0.0016249880801359777
0.6702111521462507 +- 0.0026136683958371664
112.4 +- 0.42941448508405017


In [8]:
from sklearn.svm import SVR
from sklearn.decomposition import KernelPCA
for curr_kernel in ['linear', 'poly', 'sigmoid']:
    best_scores = []
    best_nums = []
    best_scores_SVR = []
    for curr_seed in [0,1,2,3,4]:
        best_score = 0
        best_num = 0
        best_score_SVR = 0
        print(f'#########################{curr_kernel}, {curr_seed}#############################')
        for i in [5,10,15,20,25,30,40,50,100,200]:
            curr_df_trainVal_withTar = resample(df_trainVal_withTar, random_state=curr_seed)
            curr_df_trainVal = curr_df_trainVal_withTar.iloc[:,:-1]
            dimRedMethod = KernelPCA(n_components=i+1, kernel=curr_kernel)
            trainVal_reduced = pd.DataFrame(dimRedMethod.fit_transform(curr_df_trainVal))
            test_reduced = pd.DataFrame(dimRedMethod.transform(df_test))
            print(test_reduced.shape)
            mod = LinearRegression().fit(trainVal_reduced, curr_df_trainVal_withTar.mean_std)
            mod_svr = SVR().fit(trainVal_reduced, curr_df_trainVal_withTar.mean_std)
            actual_score = mod.score(test_reduced, df_test_withTar.mean_std)
            actual_score_SVR = mod_svr.score(test_reduced, df_test_withTar.mean_std)
            print(actual_score)
            if actual_score> best_score:
                best_score=actual_score
                best_score_SVR=actual_score_SVR
                best_num = test_reduced.shape
        print(best_score)
        print(best_score_SVR)
        print(best_num)
        best_scores.append(best_score)
        best_scores_SVR.append(best_score_SVR)
        best_nums.append(best_num[1])
    compute_CI(best_scores,5)
    compute_CI(best_scores_SVR,5)
    compute_CI(best_nums,5)

# 0.7983047713912139 +- 0.0017884197023088112
# 0.6796233011602648 +- 0.003042124179769615
# 50.0 +- 0.0
# 
# 0.6451073667487277 +- 0.007425733714743601
# 0.5785702579501644 +- 0.004021208906683036
# 49.8 +- 0.35061545887196705


#########################linear, 0#############################
(2163, 6)
0.40752681250065237
(2163, 11)
0.487842121549825
(2163, 16)
0.5812947599069731
(2163, 21)
0.6703595353636131
(2163, 26)
0.7273513690125243
(2163, 31)
0.7570704495085824
(2163, 41)
0.7891115672829419
(2163, 51)
0.8077530164839344
(2163, 101)
0.8549934647671065
(2163, 201)
0.8908982910550626
0.8908982910550626
0.6585121072447734
(2163, 201)
#########################linear, 1#############################
(2163, 6)
0.40975242286000535


KeyboardInterrupt: 

In [9]:
for a,b in zip([1,2],['a','b']):
    print(a,b)

1 a
2 b


In [9]:
for kernel in ['linear','poly','sigmoid']:#['poly']:#['linear']:
    best_scores = []
    best_scores_svr = []
    best_nums = []
    for curr_seed in [0,1,2,3,4]:
        print(f'#########################{kernel}, {curr_seed}#############################')
        best_score = 0
        best_score_svr = 0
        best_num = 0
        for i in [1,2,3,4,5,7,10,12,15,20,25,30,35,40,45,50]:
            try:
                curr_df_trainVal_withTar = resample(df_trainVal_withTar, random_state=curr_seed)
                curr_df_trainVal = curr_df_trainVal_withTar.iloc[:,:-1]
                dimRedMethod = spca(num_components=i+1, kernel=kernel, degree=3, gamma=None, coef0=1)
                trainVal_reduced = pd.DataFrame(dimRedMethod.fit_and_transform(curr_df_trainVal.values,curr_df_trainVal_withTar.mean_std.values.reshape(-1, 1)))
                test_reduced = pd.DataFrame(dimRedMethod.transform(df_test))
                print(test_reduced.shape)
                mod = LinearRegression().fit(trainVal_reduced, curr_df_trainVal_withTar.mean_std)
                mod_svr = SVR().fit(trainVal_reduced, curr_df_trainVal_withTar.mean_std)
                actual_score = mod.score(test_reduced, df_test_withTar.mean_std)
                actual_score_svr = mod_svr.score(test_reduced, df_test_withTar.mean_std)
                print(actual_score)
                if actual_score> best_score:
                    best_score=actual_score
                    best_num = test_reduced.shape
                if actual_score_svr> best_score_svr:
                    best_score_svr=actual_score_svr
            except:
                print("error")
                pass
        print(best_score)
        print(best_num)
        best_scores.append(best_score)
        best_scores_svr.append(best_score_svr)
        best_nums.append(best_num[1])
    compute_CI(best_scores,5)
    compute_CI(best_nums,5)
    compute_CI(best_scores_svr,5)

# 0.7965248843675893 +- 0.014695825798026913
# 34.0 +- 2.1470724254202507
# 0.7965248843675893 +- 0.014695825798026913

# 0.7864495387741657 +- 0.016138750991291233
# 31.0 +- 5.5437171645025325
# 0.7864495387741657 +- 0.016138750991291233

#########################linear, 0#############################
(2163, 2)
0.3705845067713577
(2163, 3)
0.4474347781174288
(2163, 4)
0.5718016787139237
(2163, 5)
0.5960036240915259
(2163, 6)
0.5751579168859613
(2163, 8)
0.6682035869834628
(2163, 11)
0.7607416153054092
(2163, 13)
0.6664250649511699
(2163, 16)
0.7724616832224566
(2163, 21)
0.7239086973990513
(2163, 26)
0.7708049314042456
(2163, 31)
0.8146033388390266
(2163, 36)
0.7654222298678706


  self._lambdas, self._alphas = linalg.eigh(Q, eigvals=(Q.shape[0] - num_components, Q.shape[0] - 1))


error


  self._lambdas, self._alphas = linalg.eigh(Q, eigvals=(Q.shape[0] - num_components, Q.shape[0] - 1))


error


  self._lambdas, self._alphas = linalg.eigh(Q, eigvals=(Q.shape[0] - num_components, Q.shape[0] - 1))


error
0.8146033388390266
(2163, 31)
#########################linear, 1#############################
(2163, 2)
0.39811470380592273
(2163, 3)
0.5424215854091359
(2163, 4)
0.46494420267099745
(2163, 5)
0.5898516690801923
(2163, 6)
0.6525097088939982
(2163, 8)
0.6390520902852468
(2163, 11)
0.6749289712988433
(2163, 13)
0.6364254902876121
(2163, 16)
0.7542332775407706
(2163, 21)
0.7376672383420301
(2163, 26)
0.7698052186472488
(2163, 31)
0.7380800858416174
(2163, 36)
0.8174117580447876


  self._lambdas, self._alphas = linalg.eigh(Q, eigvals=(Q.shape[0] - num_components, Q.shape[0] - 1))


error


  self._lambdas, self._alphas = linalg.eigh(Q, eigvals=(Q.shape[0] - num_components, Q.shape[0] - 1))


error


  self._lambdas, self._alphas = linalg.eigh(Q, eigvals=(Q.shape[0] - num_components, Q.shape[0] - 1))


error
0.8174117580447876
(2163, 36)
#########################linear, 2#############################
(2163, 2)
0.4707359834145125
(2163, 3)
0.38298946157354696
(2163, 4)
0.4305346677736329
(2163, 5)
0.6572199468765112
(2163, 6)
0.6435932781426993
(2163, 8)
0.6242769208984222
(2163, 11)
0.6557843559324434
(2163, 13)
0.6637698338088551
(2163, 16)
0.705452354504953
(2163, 21)
0.7222972799897927
(2163, 26)
0.7213976477615918
(2163, 31)
0.788702010695618
(2163, 36)
0.7651208767058875


  self._lambdas, self._alphas = linalg.eigh(Q, eigvals=(Q.shape[0] - num_components, Q.shape[0] - 1))


(2163, 41)
0.7719067492583264


  self._lambdas, self._alphas = linalg.eigh(Q, eigvals=(Q.shape[0] - num_components, Q.shape[0] - 1))


(2163, 46)
0.7825406482453222


  self._lambdas, self._alphas = linalg.eigh(Q, eigvals=(Q.shape[0] - num_components, Q.shape[0] - 1))


error
0.788702010695618
(2163, 31)
#########################linear, 3#############################
(2163, 2)
0.4276968276157386
(2163, 3)
0.45837159814560513
(2163, 4)
0.44811286431517305
(2163, 5)
0.45840515746607724
(2163, 6)
0.5325050475570654
(2163, 8)
0.5360996589185911
(2163, 11)
0.6910930080653472
(2163, 13)
0.6214144842330611
(2163, 16)
0.6627520967026108
(2163, 21)
0.6506512171434038
(2163, 26)
0.7593259130017782
(2163, 31)
0.7614553808157307
(2163, 36)
0.7740169474689724


  self._lambdas, self._alphas = linalg.eigh(Q, eigvals=(Q.shape[0] - num_components, Q.shape[0] - 1))


error


  self._lambdas, self._alphas = linalg.eigh(Q, eigvals=(Q.shape[0] - num_components, Q.shape[0] - 1))


error


  self._lambdas, self._alphas = linalg.eigh(Q, eigvals=(Q.shape[0] - num_components, Q.shape[0] - 1))


error
0.7740169474689724
(2163, 36)
#########################linear, 4#############################
(2163, 2)
0.36327358776003815
(2163, 3)
0.3748628803180436
(2163, 4)
0.3726754650108668
(2163, 5)
0.5700578412382133
(2163, 6)
0.6260702097664961
(2163, 8)
0.5752147284954761
(2163, 11)
0.7830723294041358
(2163, 13)
0.6236051169323368
(2163, 16)
0.7637024092103177
(2163, 21)
0.7734055236508877
(2163, 26)
0.767356561455613
(2163, 31)
0.6818083957178573
(2163, 36)
0.7878903667895418


  self._lambdas, self._alphas = linalg.eigh(Q, eigvals=(Q.shape[0] - num_components, Q.shape[0] - 1))


error


  self._lambdas, self._alphas = linalg.eigh(Q, eigvals=(Q.shape[0] - num_components, Q.shape[0] - 1))


error


  self._lambdas, self._alphas = linalg.eigh(Q, eigvals=(Q.shape[0] - num_components, Q.shape[0] - 1))


error
0.7878903667895418
(2163, 36)
0.7965248843675893 +- 0.014695825798026913
34.0 +- 2.1470724254202507
0.7965248843675893 +- 0.014695825798026913
#########################poly, 0#############################
(2163, 2)
0.3584207109318507
(2163, 3)
0.44149544563837595
(2163, 4)
0.46975741911023927
(2163, 5)
0.4970865002008866
(2163, 6)
0.5350660758032089
(2163, 8)
0.5963501154539624
(2163, 11)
0.6112133556883681
(2163, 13)
0.54041674995155
(2163, 16)
0.728444502362327
(2163, 21)
0.7315663917577568
(2163, 26)
0.7159323090844041
(2163, 31)
0.7604023253645339
(2163, 36)
0.783668677275969


  self._lambdas, self._alphas = linalg.eigh(Q, eigvals=(Q.shape[0] - num_components, Q.shape[0] - 1))


(2163, 41)
0.6810881093596483


  self._lambdas, self._alphas = linalg.eigh(Q, eigvals=(Q.shape[0] - num_components, Q.shape[0] - 1))


(2163, 46)
0.6843581470791225


  self._lambdas, self._alphas = linalg.eigh(Q, eigvals=(Q.shape[0] - num_components, Q.shape[0] - 1))


(2163, 51)
0.7100713332323999
0.783668677275969
(2163, 36)
#########################poly, 1#############################
(2163, 2)
0.4061310157169369
(2163, 3)
0.45760150307843717
(2163, 4)
0.49049504066526983
(2163, 5)
0.5168070624433572
(2163, 6)
0.5470063245368653
(2163, 8)
0.6121256002590763
(2163, 11)
0.6460476884998888
(2163, 13)
0.6848515420703141
(2163, 16)
0.6824032285137018
(2163, 21)
0.7561322083138086
(2163, 26)
0.7281758515156636
(2163, 31)
0.7346089737314057
(2163, 36)
0.7482479219785052


  self._lambdas, self._alphas = linalg.eigh(Q, eigvals=(Q.shape[0] - num_components, Q.shape[0] - 1))


(2163, 41)
0.6663308818381325


  self._lambdas, self._alphas = linalg.eigh(Q, eigvals=(Q.shape[0] - num_components, Q.shape[0] - 1))


(2163, 46)
0.6796158467430269


  self._lambdas, self._alphas = linalg.eigh(Q, eigvals=(Q.shape[0] - num_components, Q.shape[0] - 1))


(2163, 51)
0.6942063116648154
0.7561322083138086
(2163, 21)
#########################poly, 2#############################
(2163, 2)
0.37447232594995183
(2163, 3)
0.45720878628676564
(2163, 4)
0.46054129347204
(2163, 5)
0.4940099485742614
(2163, 6)
0.4623766993899874
(2163, 8)
0.5335725431129883
(2163, 11)
0.7100721287259295
(2163, 13)
0.6850234411059604
(2163, 16)
0.6963995331988795
(2163, 21)
0.7666961702219622
(2163, 26)
0.6953095538198417
(2163, 31)
0.7632433842519823
(2163, 36)
0.784927437932779


  self._lambdas, self._alphas = linalg.eigh(Q, eigvals=(Q.shape[0] - num_components, Q.shape[0] - 1))


(2163, 41)
0.6864568476024786


  self._lambdas, self._alphas = linalg.eigh(Q, eigvals=(Q.shape[0] - num_components, Q.shape[0] - 1))


(2163, 46)
0.6930773609859328


  self._lambdas, self._alphas = linalg.eigh(Q, eigvals=(Q.shape[0] - num_components, Q.shape[0] - 1))


(2163, 51)
0.6997271985987209
0.784927437932779
(2163, 36)
#########################poly, 3#############################
(2163, 2)
0.4370820526264082
(2163, 3)
0.4651276597159766
(2163, 4)
0.4899546405611426
(2163, 5)
0.6005482013323594
(2163, 6)
0.5357913277343157
(2163, 8)
0.5405383534807393
(2163, 11)
0.6376103635956343
(2163, 13)
0.720171659969814
(2163, 16)
0.7806405967321235
(2163, 21)
0.6480591078815645
(2163, 26)
0.7946350106799045
(2163, 31)
0.7468863128902798
(2163, 36)
0.7921548596208552


  self._lambdas, self._alphas = linalg.eigh(Q, eigvals=(Q.shape[0] - num_components, Q.shape[0] - 1))


(2163, 41)
0.6836671776502701


  self._lambdas, self._alphas = linalg.eigh(Q, eigvals=(Q.shape[0] - num_components, Q.shape[0] - 1))


(2163, 46)
0.6930525186426164


  self._lambdas, self._alphas = linalg.eigh(Q, eigvals=(Q.shape[0] - num_components, Q.shape[0] - 1))


(2163, 51)
0.6957388144237141
0.7946350106799045
(2163, 26)
#########################poly, 4#############################
(2163, 2)
0.3758998013690785
(2163, 3)
0.445710678123416
(2163, 4)
0.4504188353178924
(2163, 5)
0.7116840857883174
(2163, 6)
0.6557374717210982
(2163, 8)
0.5627848474764996
(2163, 11)
0.7089540033982933
(2163, 13)
0.7026033231974478
(2163, 16)
0.7217669709240382
(2163, 21)
0.7886561410467289
(2163, 26)
0.795906569876131
(2163, 31)
0.7836050797824888
(2163, 36)
0.8128843596683671


  self._lambdas, self._alphas = linalg.eigh(Q, eigvals=(Q.shape[0] - num_components, Q.shape[0] - 1))


(2163, 41)
0.6924943752143626


  self._lambdas, self._alphas = linalg.eigh(Q, eigvals=(Q.shape[0] - num_components, Q.shape[0] - 1))


(2163, 46)
0.690237251781432


  self._lambdas, self._alphas = linalg.eigh(Q, eigvals=(Q.shape[0] - num_components, Q.shape[0] - 1))


(2163, 51)
0.6942439470909041
0.8128843596683671
(2163, 36)
0.7864495387741657 +- 0.016138750991291233
31.0 +- 5.5437171645025325
0.7864495387741657 +- 0.016138750991291233
#########################linear, 0#############################
(2163, 2)
0.370309047670211
(2163, 3)
0.5661978950268454
(2163, 4)
0.6240766328990188
(2163, 5)
0.6956181220663735
(2163, 6)
0.4867217939764442
(2163, 8)
0.6815364008575532
(2163, 11)
0.7069622126365365
(2163, 13)
0.7005509105311918
(2163, 16)
0.6804882406945032
(2163, 21)
0.7188930827964908
(2163, 26)
0.7284681352966773
(2163, 31)
0.8054755238226072
(2163, 36)
0.8240553860254131


  self._lambdas, self._alphas = linalg.eigh(Q, eigvals=(Q.shape[0] - num_components, Q.shape[0] - 1))


error


  self._lambdas, self._alphas = linalg.eigh(Q, eigvals=(Q.shape[0] - num_components, Q.shape[0] - 1))


error


  self._lambdas, self._alphas = linalg.eigh(Q, eigvals=(Q.shape[0] - num_components, Q.shape[0] - 1))


error
0.8240553860254131
(2163, 36)
#########################linear, 1#############################
(2163, 2)
0.39739797937021437
(2163, 3)
0.42600583543031134
(2163, 4)
0.5610989132168596
(2163, 5)
0.5953893199977291
(2163, 6)
0.6825058985054279
(2163, 8)
0.5975573877184831
(2163, 11)
0.7176300053897
(2163, 13)
0.7055934906568747
(2163, 16)
0.6929388103159408
(2163, 21)
0.7790464979225902
(2163, 26)
0.736707094227687
(2163, 31)
0.7853264622478368
(2163, 36)
0.8203345460811312


  self._lambdas, self._alphas = linalg.eigh(Q, eigvals=(Q.shape[0] - num_components, Q.shape[0] - 1))


error


  self._lambdas, self._alphas = linalg.eigh(Q, eigvals=(Q.shape[0] - num_components, Q.shape[0] - 1))


error


  self._lambdas, self._alphas = linalg.eigh(Q, eigvals=(Q.shape[0] - num_components, Q.shape[0] - 1))


error
0.8203345460811312
(2163, 36)
#########################linear, 2#############################
(2163, 2)
0.3802477474819542
(2163, 3)
0.5487541091984802
(2163, 4)
0.46223556043545977
(2163, 5)
0.4494680642644745
(2163, 6)
0.5665550181270271
(2163, 8)
0.6131487712307151
(2163, 11)
0.6795040798437972
(2163, 13)
0.6449091009224792
(2163, 16)
0.6195597468923832
(2163, 21)
0.7299001241796245
(2163, 26)
0.788377805360681
(2163, 31)
0.7259610789821773
(2163, 36)
0.7709568724864444


  self._lambdas, self._alphas = linalg.eigh(Q, eigvals=(Q.shape[0] - num_components, Q.shape[0] - 1))


(2163, 41)
0.7719067492583264


  self._lambdas, self._alphas = linalg.eigh(Q, eigvals=(Q.shape[0] - num_components, Q.shape[0] - 1))


(2163, 46)
0.7825406482453222


  self._lambdas, self._alphas = linalg.eigh(Q, eigvals=(Q.shape[0] - num_components, Q.shape[0] - 1))


error
0.788377805360681
(2163, 26)
#########################linear, 3#############################
(2163, 2)
0.4850060295910199
(2163, 3)
0.4121040613448238
(2163, 4)
0.502777852065327
(2163, 5)
0.6143760957133301
(2163, 6)
0.47957265549467043
(2163, 8)
0.458570518774444
(2163, 11)
0.5500973413770969
(2163, 13)
0.6033215679967368
(2163, 16)
0.6590472052371803
(2163, 21)
0.7413909397498333
(2163, 26)
0.7239716069373086
(2163, 31)
0.7431983206205035
(2163, 36)
0.7376030587263216


  self._lambdas, self._alphas = linalg.eigh(Q, eigvals=(Q.shape[0] - num_components, Q.shape[0] - 1))


error


  self._lambdas, self._alphas = linalg.eigh(Q, eigvals=(Q.shape[0] - num_components, Q.shape[0] - 1))


error


  self._lambdas, self._alphas = linalg.eigh(Q, eigvals=(Q.shape[0] - num_components, Q.shape[0] - 1))


error
0.7431983206205035
(2163, 31)
#########################linear, 4#############################
(2163, 2)
0.3633206460287123
(2163, 3)
0.4190861108396752
(2163, 4)
0.3648371358243433
(2163, 5)
0.5194558429868501
(2163, 6)
0.521966357159633
(2163, 8)
0.5487194946932458
(2163, 11)
0.6970076460713985
(2163, 13)
0.6617930476313527
(2163, 16)
0.7194867869316921
(2163, 21)
0.7691854813098862
(2163, 26)
0.7707853175826235
(2163, 31)
0.8042771070803185
(2163, 36)
0.797710256615263


  self._lambdas, self._alphas = linalg.eigh(Q, eigvals=(Q.shape[0] - num_components, Q.shape[0] - 1))


error


  self._lambdas, self._alphas = linalg.eigh(Q, eigvals=(Q.shape[0] - num_components, Q.shape[0] - 1))


error
error
0.8042771070803185
(2163, 31)
0.7960486330336094 +- 0.02568688598659617
32.0 +- 3.279707304013576
0.7960486330336094 +- 0.02568688598659617


  self._lambdas, self._alphas = linalg.eigh(Q, eigvals=(Q.shape[0] - num_components, Q.shape[0] - 1))


In [10]:
from sklearn.manifold import Isomap
from sklearn.svm import SVR

best_scores = []
best_scores_svr = []
best_nums = []
for curr_seed in [0,1,2,3,4]: 
    best_score = 0
    best_score_svr = 0
    best_num = 0
    for i in [5,10,15,20,25,30,40,50,100,200]:
        curr_df_trainVal_withTar = resample(df_trainVal_withTar, random_state=curr_seed)
        curr_df_trainVal = curr_df_trainVal_withTar.iloc[:,:-1]
        dimRedMethod = Isomap(n_components=i,n_neighbors=10)
        trainVal_reduced = pd.DataFrame(dimRedMethod.fit_transform(curr_df_trainVal.values,curr_df_trainVal_withTar.mean_std.values.reshape(-1, 1)))
        test_reduced = pd.DataFrame(dimRedMethod.transform(df_test))
        print(test_reduced.shape)
        mod = LinearRegression().fit(trainVal_reduced, curr_df_trainVal_withTar.mean_std)
        mod_svr = SVR().fit(trainVal_reduced, curr_df_trainVal_withTar.mean_std)

        actual_score = mod.score(test_reduced, df_test_withTar.mean_std)
        actual_score_svr = mod_svr.score(test_reduced, df_test_withTar.mean_std)
        print(actual_score)
        if actual_score> best_score:
            best_score=actual_score
            best_score_svr=actual_score_svr
            best_num = test_reduced.shape[1]
    print(best_score)
    print(best_score_svr)
    print(best_num)
    best_scores.append(best_score)
    best_scores_svr.append(best_score_svr)
    best_nums.append(best_num)
compute_CI(best_scores,5)
compute_CI(best_scores_svr,5)
compute_CI(best_nums,5)


(2163, 5)
0.19217288606719773
(2163, 10)
0.2891673692108768
(2163, 15)
0.3459730100496071
(2163, 20)
0.36060663834147566
(2163, 25)
0.37603741724814943
(2163, 30)
0.3826333177630319
(2163, 40)
0.39551925781812425
(2163, 50)
0.4023863934709956
(2163, 100)
0.4268685502879105
(2163, 200)
0.43889880249395585
0.43889880249395585
0.43513847846119114
200
(2163, 5)
0.17552334111257117
(2163, 10)
0.3068049819472157
(2163, 15)
0.3932049815892966
(2163, 20)
0.4268997081516277
(2163, 25)
0.42749804140903613
(2163, 30)
0.43248263060453485
(2163, 40)
0.44949651035346494
(2163, 50)
0.46491143706299454
(2163, 100)
0.47963488811661503
(2163, 200)
0.5023922835875414
0.5023922835875414
0.44624544716758385
200
(2163, 5)
0.18061687588605035
(2163, 10)
0.3208550500451657
(2163, 15)
0.34995091835772807
(2163, 20)
0.35678663948051903
(2163, 25)
0.3714089567527028
(2163, 30)
0.37766140820564975
(2163, 40)
0.4011659854689139
(2163, 50)
0.4128418202894537
(2163, 100)
0.4320122714281912
(2163, 200)
0.453539991291

In [33]:
with open('results.pkl', 'rb') as f:
    x = pickle.load(f)
x

[[0, 'linear', 0.7876753182696432, 0.6582644603717525, (2163, 35)],
 [0, 'poly', 0.7856958192057962, 0.6750296676426459, (2163, 35)],
 [0, 'sigmoid', 0.7326642158815335, 0.7324959297818436, (2163, 35)],
 [1, 'linear', 0.7780191543720085, 0.6811204721733286, (2163, 35)],
 [1, 'poly', 0.7992620180820665, 0.7095636843753161, (2163, 35)],
 [1, 'sigmoid', 0.7577677625721481, 0.7388529078197974, (2163, 35)],
 [2, 'linear', 0.7961200729297456, 0.7598549243690862, (2163, 30)],
 [2, 'poly', 0.7584825062938393, 0.6637545567408245, (2163, 30)],
 [2, 'sigmoid', 0.7110860305371753, 0.7451331199163801, (2163, 150)],
 [3, 'linear', 0.7835874561587552, 0.7551402720391625, (2163, 35)],
 [3, 'poly', 0.7440595009275666, 0.6945546636292005, (2163, 35)],
 [3, 'sigmoid', 0.718104738699973, 0.7060594305407312, (2163, 150)],
 [4, 'linear', 0.8051674319579987, 0.6834705232149076, (2163, 35)],
 [4, 'poly', 0.8185951990209985, 0.7288546040313353, (2163, 35)],
 [4, 'sigmoid', 0.7338096957091025, 0.705496185521510

In [34]:
### LINEAR SPCA ###
best_scores = []
best_scores_svr = []
best_nums = []
for elem in x[0:15:3]:
    best_scores.append(elem[2])
    best_scores_svr.append(elem[3])
    best_nums.append(elem[4][1])
compute_CI(best_scores,5)
compute_CI(best_scores_svr,5)
compute_CI(best_nums,5)

### poly SPCA ###
best_scores = []
best_scores_svr = []
best_nums = []
for elem in x[1:15:3]:
    best_scores.append(elem[2])
    best_scores_svr.append(elem[3])
    best_nums.append(elem[4][1])
compute_CI(best_scores,5)
compute_CI(best_scores_svr,5)
compute_CI(best_nums,5)

### sigmoid SPCA ###
best_scores = []
best_scores_svr = []
best_nums = []
for elem in x[2:15:3]:
    best_scores.append(elem[2])
    best_scores_svr.append(elem[3])
    best_nums.append(elem[4][1])
compute_CI(best_scores,5)
compute_CI(best_scores_svr,5)
compute_CI(best_nums,5)


0.7901138867376303 +- 0.008384649630823852
0.7075701304336475 +- 0.03658022230910346
34.0 +- 1.753077294359835
0.7812190087060534 +- 0.023651440056663534
0.6943514352838644 +- 0.020487099713344965
34.0 +- 1.753077294359835
0.7306864886799864 +- 0.014076782847690294
0.7256075147160527 +- 0.014618673453012808
104.0 +- 49.382665784665775


In [41]:
### kernel PCA
[0, 'linear', 0.8904786801105362, 0.6884135214176206, (2163, 200)]
[1, 'linear', 0.8910671720372463, 0.6941205171311187, (2163, 200)]
[2, 'linear', 0.8890243094419056, 0.6860954046405752, (2163, 200)]
[3, 'linear', 0.8921921210387593, 0.6860674973445605, (2163, 200)]
[4, 'linear', 0.8910347995949441, 0.686303523462513, (2163, 200)]

compute_CI([0.8904786801105362,0.8910671720372463,0.8890243094419056,0.8921921210387593,0.8910347995949441],5)
compute_CI([0.6884135214176206,0.6941205171311187,0.6860954046405752,0.6860674973445605,0.686303523462513],5)
compute_CI([200,200,200,200,150],5)

0.8907594164446783 +- 0.0009035239487573092
0.6882000927992775 +- 0.0027065499954437417
190.0 +- 17.53077294359835


In [37]:
### LLE ###
with open('results_LLE.pkl', 'rb') as f:
    x = pickle.load(f)
x

best_scores = []
best_scores_svr = []
best_nums = []
for elem in x:
    best_scores.append(elem[2])
    best_scores_svr.append(elem[3])
    best_nums.append(elem[4][1])
compute_CI(best_scores,5)
compute_CI(best_scores_svr,5)
compute_CI(best_nums,5)

0.5176878641179228 +- 0.008992920651721433
0.1414385513119329 +- 0.023577748198900503
200.0 +- 0.0


In [39]:
### isomap ###
with open('results_isomap.pkl', 'rb') as f:
    x = pickle.load(f)
x

best_scores = []
best_scores_svr = []
best_nums = []
for elem in x:
    best_scores.append(elem[2])
    best_scores_svr.append(elem[3])
    best_nums.append(elem[4][1])
compute_CI(best_scores,5)
compute_CI(best_scores_svr,5)
compute_CI(best_nums,5)


0.4240874618527585 +- 0.018643105595670385
0.4266296864729952 +- 0.011488339861755048
190.0 +- 17.53077294359835


In [45]:
### UMAP ###
with open('results_umap.pkl', 'rb') as f:
    x = pickle.load(f)
x

best_scores = []
best_scores_svr = []
best_nums = []
for elem in x:
    best_scores.append(elem[2])
    best_scores_svr.append(elem[3])
    best_nums.append(elem[4][1])
compute_CI(best_scores,5)
compute_CI(best_scores_svr,5)
compute_CI([200,200,200,200,175],5)


0.4688071608543396 +- 0.010873620492664061
0.2715232221462759 +- 0.01026398727813031
195.0 +- 8.765386471799175


In [7]:
### LDA ###
import pickle
with open('results_LDA.pkl', 'rb') as f:
    x = pickle.load(f)
x

best_scores = []
best_scores_svr = []
best_nums = []
for elem in x:
    best_scores.append(elem[2])
    best_scores_svr.append(elem[3])
    best_nums.append(elem[4][1])
compute_CI(best_scores,5)
compute_CI(best_scores_svr,5)
compute_CI(best_nums,5)


0.9883495145631068 +- 0.0005376143887085477
0.9882570503929727 +- 0.0006065108282965393
1.0 +- 0.0


In [9]:
### UMAP ###
import pickle
with open('results_class_umap.pkl', 'rb') as f:
    x = pickle.load(f)
print(x)

best_scores = []
best_scores_svr = []
best_nums = []
for elem in x:
    best_scores.append(elem[2])
    best_scores_svr.append(elem[3])
    best_nums.append(elem[4][1])
compute_CI(best_scores,5)
compute_CI(best_scores_svr,5)
compute_CI(best_nums,5)


[[0, None, 0.9842810910772076, 0.9842810910772076, (2163, 3)], [1, None, 0.986130374479889, 0.986130374479889, (2163, 30)], [2, None, 0.986130374479889, 0.986130374479889, (2163, 7)], [3, None, 0.9865926953305594, 0.9865926953305594, (2163, 5)], [4, None, 0.9865926953305594, 0.9865926953305594, (2163, 50)]]
0.985945446139621 +- 0.0007516121137775516
0.985945446139621 +- 0.0007516121137775516
19.0 +- 16.05765213223901


In [10]:
### NCA ###
import pickle
with open('results_class_NCA.pkl', 'rb') as f:
    x = pickle.load(f)
print(x)

best_scores = []
best_scores_svr = []
best_nums = []
for elem in x:
    best_scores.append(elem[2])
    best_scores_svr.append(elem[3])
    best_nums.append(elem[4][1])
compute_CI(best_scores,5)
compute_CI(best_scores_svr,5)
compute_CI(best_nums,5)


[[0, None, 0.9935275080906149, 0.996301433194637, (2163, 30)], [1, None, 0.9921405455386038, 0.9958391123439667, (2163, 30)], [2, None, 0.9944521497919556, 0.9953767914932964, (2163, 25)], [3, None, 0.9939898289412853, 0.996301433194637, (2163, 25)], [4, None, 0.9930651872399445, 0.9976883957466481, (2163, 30)]]
0.993435043920481 +- 0.0006972048591932797
0.996301433194637 +- 0.0006780997205803709
28.0 +- 2.1470724254202507


In [11]:
### LLE ###
import pickle
with open('results_class_LLE.pkl', 'rb') as f:
    x = pickle.load(f)
print(x)

best_scores = []
best_scores_svr = []
best_nums = []
for elem in x:
    best_scores.append(elem[2])
    best_scores_svr.append(elem[3])
    best_nums.append(elem[4][1])
compute_CI(best_scores,5)
compute_CI(best_scores_svr,5)
compute_CI(best_nums,5)


[[0, None, 0.9833564493758669, 0.9902912621359223, (2163, 100)], [1, None, 0.984743411927878, 0.9902912621359223, (2163, 200)], [2, None, 0.9865926953305594, 0.9902912621359223, (2163, 150)], [3, None, 0.986130374479889, 0.9889042995839112, (2163, 150)], [4, None, 0.9879796578825705, 0.9907535829865927, (2163, 150)]]
0.9857605177993527 +- 0.0013896908902396316
0.9901063337956542 +- 0.0005496971195687785
150.0 +- 27.718585822512658


In [17]:
### isomap ###
import pickle
with open('results_class_isomap.pkl', 'rb') as f:
    x = pickle.load(f)
print(x)

best_scores = []
best_scores_svr = []
best_nums = []
for elem in x:
    best_scores.append(elem[2])
    best_scores_svr.append(elem[3])
    best_nums.append(elem[4][1])
compute_CI(best_scores,5)
compute_CI(best_scores_svr,5)
compute_CI(best_nums,5)


[[0, None, 0.9912159038372631, 0.9912159038372631, (2163, 7)], [1, None, 0.9893666204345816, 0.9912159038372631, (2163, 4)], [2, None, 0.9893666204345816, 0.9907535829865927, (2163, 10)], [3, None, 0.9907535829865927, 0.9902912621359223, (2163, 10)], [4, None, 0.9912159038372631, 0.9916782246879334, (2163, 5)]]
0.9903837263060563 +- 0.0007428210263996852
0.9910309754969949 +- 0.00041326746799710725
7.2 +- 2.17551170992022


In [15]:
### LINEAR SPCA ###

with open('results_class_supPCA.pkl', 'rb') as f:
    x = pickle.load(f)
print(x)

best_scores = []
best_scores_svr = []
best_nums = []
for elem in x[0:15:3]:
    best_scores.append(elem[2])
    best_scores_svr.append(elem[3])
    best_nums.append(elem[4][1])
compute_CI(best_scores,5)
compute_CI(best_scores_svr,5)
compute_CI(best_nums,5)

### poly SPCA ###
best_scores = []
best_scores_svr = []
best_nums = []
for elem in x[1:15:3]:
    best_scores.append(elem[2])
    best_scores_svr.append(elem[3])
    best_nums.append(elem[4][1])
compute_CI(best_scores,5)
compute_CI(best_scores_svr,5)
compute_CI(best_nums,5)

### sigmoid SPCA ###
best_scores = []
best_scores_svr = []
best_nums = []
for elem in x[2:15:3]:
    best_scores.append(elem[2])
    best_scores_svr.append(elem[3])
    best_nums.append(elem[4][1])
compute_CI(best_scores,5)
compute_CI(best_scores_svr,5)
compute_CI(best_nums,5)


[[0, 'linear', 0.9889042995839112, 0.9884419787332409, (2163, 30)], [0, 'poly', 0.9879796578825705, 0.9884419787332409, (2163, 25)], [0, 'sigmoid', 0.5247341655108645, 0.5247341655108645, (2163, 1)], [1, 'linear', 0.9884419787332409, 0.9875173370319001, (2163, 35)], [1, 'poly', 0.989828941285252, 0.9893666204345816, (2163, 30)], [1, 'sigmoid', 0.5247341655108645, 0.5247341655108645, (2163, 1)], [2, 'linear', 0.9893666204345816, 0.9907535829865927, (2163, 35)], [2, 'poly', 0.9889042995839112, 0.9893666204345816, (2163, 30)], [2, 'sigmoid', 0.5247341655108645, 0.5247341655108645, (2163, 1)], [3, 'linear', 0.9884419787332409, 0.9884419787332409, (2163, 35)], [3, 'poly', 0.9893666204345816, 0.9889042995839112, (2163, 30)], [3, 'sigmoid', 0.5247341655108645, 0.5247341655108645, (2163, 1)], [4, 'linear', 0.9902912621359223, 0.9893666204345816, (2163, 35)], [4, 'poly', 0.989828941285252, 0.9902912621359223, (2163, 35)], [4, 'sigmoid', 0.5247341655108645, 0.5247341655108645, (2163, 1)]]
0.9890

In [18]:
### LINEAR SPCA ###

with open('results_class_kernelPCA.pkl', 'rb') as f:
    x = pickle.load(f)
print(x)

best_scores = []
best_scores_svr = []
best_nums = []
for elem in x[0:15:3]:
    best_scores.append(elem[2])
    best_scores_svr.append(elem[3])
    best_nums.append(elem[4][1])
compute_CI(best_scores,5)
compute_CI(best_scores_svr,5)
compute_CI(best_nums,5)

### poly SPCA ###
best_scores = []
best_scores_svr = []
best_nums = []
for elem in x[1:15:3]:
    best_scores.append(elem[2])
    best_scores_svr.append(elem[3])
    best_nums.append(elem[4][1])
compute_CI(best_scores,5)
compute_CI(best_scores_svr,5)
compute_CI(best_nums,5)

### sigmoid SPCA ###
best_scores = []
best_scores_svr = []
best_nums = []
for elem in x[2:15:3]:
    best_scores.append(elem[2])
    best_scores_svr.append(elem[3])
    best_nums.append(elem[4][1])
compute_CI(best_scores,5)
compute_CI(best_scores_svr,5)
compute_CI(best_nums,5)


[[0, 'linear', 0.9935275080906149, 0.9930651872399445, (2163, 30)], [0, 'poly', 0.9930651872399445, 0.9921405455386038, (2163, 40)], [0, 'sigmoid', 0.5247341655108645, 0.5247341655108645, (2163, 1)], [1, 'linear', 0.9921405455386038, 0.9921405455386038, (2163, 30)], [1, 'poly', 0.9921405455386038, 0.9921405455386038, (2163, 75)], [1, 'sigmoid', 0.5247341655108645, 0.5247341655108645, (2163, 1)], [2, 'linear', 0.9939898289412853, 0.9921405455386038, (2163, 25)], [2, 'poly', 0.9939898289412853, 0.9935275080906149, (2163, 150)], [2, 'sigmoid', 0.5247341655108645, 0.5247341655108645, (2163, 1)], [3, 'linear', 0.9935275080906149, 0.9926028663892742, (2163, 30)], [3, 'poly', 0.9926028663892742, 0.9930651872399445, (2163, 200)], [3, 'sigmoid', 0.5247341655108645, 0.5247341655108645, (2163, 1)], [4, 'linear', 0.9926028663892742, 0.9926028663892742, (2163, 30)], [4, 'poly', 0.9935275080906149, 0.9935275080906149, (2163, 150)], [4, 'sigmoid', 0.5247341655108645, 0.5247341655108645, (2163, 1)]]
0