# Preliminaries

In [11]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import numpy as np
import sys
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
from sklearn import preprocessing
from sklearn.utils import resample

#sys.path.append("/Users/paolo/Documents/methods/CMI_FS")
#from feature_selection import forwardFeatureSelection

sys.path.append("../LinCFA")
from LinCFA import LinCFA

sys.path.append("../NonLinCFA")
from NonLinCFA import NonLinCFA

sys.path.append("../GenLinCFA")
from GenLinCFA import GenLinCFA

sys.path.append("../droughts")
from aux import prepare_target,prepare_features,compare_methods

#from aux import standardize,unfold_dataset,compute_r2,prepare_target,prepare_features,aggregate_unfolded_data,aggregate_unfolded_data_onlyTrain,FS_with_linearWrapper,compare_methods, compute_r2


In [8]:
def compute_CI(list,n):
    print(f'{np.mean(list)} +- {1.96*np.std(list)/np.sqrt(n)}')

In [9]:
from sklearn.base import TransformerMixin, BaseEstimator
from sklearn import utils
from sklearn.preprocessing import KernelCenterer, scale
from sklearn.metrics.pairwise import pairwise_kernels
from scipy import linalg
from scipy.sparse.linalg import eigsh as ssl_eigsh

class spca(BaseEstimator, TransformerMixin):
    
    def __init__(self, num_components, kernel="linear", eigen_solver='auto', 
                 max_iterations=None, gamma=0, degree=3, coef0=1, alpha=1.0, 
                 tolerance=0, fit_inverse_transform=False):
        
        self._num_components = num_components
        self._gamma = gamma
        self._tolerance = tolerance
        self._fit_inverse_transform = fit_inverse_transform
        self._max_iterations = max_iterations
        self._degree = degree
        self._kernel = kernel
        self._eigen_solver = eigen_solver
        self._coef0 = coef0
        self._centerer = KernelCenterer()
        self._alpha = alpha
        self._alphas = []
        self._lambdas = []
        
        
    def _get_kernel(self, X, Y=None):
        # Returns a kernel matrix K such that K_{i, j} is the kernel between the ith and jth vectors 
        # of the given matrix X, if Y is None. 
        
        # If Y is not None, then K_{i, j} is the kernel between the ith array from X and the jth array from Y.
        
        # valid kernels are 'linear, rbf, poly, sigmoid, precomputed'
        
        args = {"gamma": self._gamma, "degree": self._degree, "coef0": self._coef0}
        
        return pairwise_kernels(X, Y, metric=self._kernel, n_jobs=-1, filter_params=True, **args)
    
    
    
    def _fit(self, X, Y):
        
        # calculate kernel matrix of the labels Y and centre it and call it K (=H.L.H)
        K = self._centerer.fit_transform(self._get_kernel(Y))
        
        # deciding on the number of components to use
        if self._num_components is not None:
            num_components = min(K.shape[0], self._num_components)
        else:
            num_components = self.K.shape[0]
        
        # Scale X
        # scaled_X = scale(X)
        
        # calculate the eigen values and eigen vectors for X^T.K.X
        Q = (X.T).dot(K).dot(X)
        
        # If n_components is much less than the number of training samples, 
        # arpack may be more efficient than the dense eigensolver.
        if (self._eigen_solver=='auto'):
            if (Q.shape[0]/num_components) > 20:
                eigen_solver = 'arpack'
            else:
                eigen_solver = 'dense'
        else:
            eigen_solver = self._eigen_solver
        
        if eigen_solver == 'dense':
            # Return the eigenvalues (in ascending order) and eigenvectors of a Hermitian or symmetric matrix.
            self._lambdas, self._alphas = linalg.eigh(Q, eigvals=(Q.shape[0] - num_components, Q.shape[0] - 1))
            # argument eigvals = Indexes of the smallest and largest (in ascending order) eigenvalues
        
        elif eigen_solver == 'arpack':
            # deprecated :: self._lambdas, self._alphas = utils.arpack.eigsh(A=Q, num_components, which="LA", tol=self._tolerance)
            self._lambdas, self._alphas = ssl_eigsh(A=Q, k=num_components, which="LA", tol=self._tolerance)
            
        indices = self._lambdas.argsort()[::-1]
        
        self._lambdas = self._lambdas[indices]
        self._lambdas = self._lambdas[self._lambdas > 0]  # selecting values only for non zero eigen values
        
        self._alphas = self._alphas[:, indices]
        #return self._alphas
        self._alphas = self._alphas[:, self._lambdas > 0]  # selecting values only for non zero eigen values
        
        self.X_fit = X

        
    def _transform(self):
        return self.X_fit.dot(self._alphas)
        
        
    def transform(self, X):
        return X.dot(self._alphas)
        
        
    def fit(self, X, Y):
        self._fit(X,Y)
        return
        
        
    def fit_and_transform(self, X, Y):
        self.fit(X, Y)
        return self._transform()
    

In [10]:
df = pd.read_csv('fundamentals.csv')
cols_to_delete = df.columns[df.isnull().sum()/len(df) > .50]
df.drop(cols_to_delete, axis = 1, inplace = True)
df = df.dropna()
df = df.drop(['Unnamed: 0','Ticker Symbol','Period Ending'],axis=1)
normalized_df=(df-df.mean())/df.std()
normalized_df['mean_std'] = normalized_df['Cash Ratio']
normalized_df = normalized_df.drop(['Cash Ratio'],axis=1)
normalized_df

df_trainVal_withTar = normalized_df.iloc[:-442,:]
df_test_withTar = normalized_df.iloc[-442:,:]
df_trainVal = normalized_df.iloc[:-442,:-1]
df_test = normalized_df.iloc[-442:,:-1]
target_df_trainVal = normalized_df.iloc[:-442,-1]
target_df_test = normalized_df.iloc[-442:,-1]

# Regression

# NonLinCFA

In [17]:
#for variable in ['cyclostationary_mean_tg']:#variables_list:
#actual_df_trainVal = df_trainVal_unfolded_std[df_trainVal_unfolded_std.columns[pd.Series(df_trainVal_unfolded_std.columns).str.startswith(variable)]]
for eps in [0.01,0.001,0.0001,0.00001,0.000001]:
    res = []
    nums = []
    for curr_seed in [0,1,2,3,4]:
        curr_df_trainVal = df_trainVal[np.random.default_rng(seed=curr_seed).permutation(df_trainVal.columns.values)]
        curr_df_test = df_test[np.random.default_rng(seed=curr_seed).permutation(df_test.columns.values)]
        curr_df_trainVal_withTar = pd.concat((curr_df_trainVal,target_df_trainVal), axis=1)
        
        output = NonLinCFA(curr_df_trainVal_withTar,'mean_std', eps, -5 , 0).compute_clusters()
        
        aggregate_trainVal = pd.DataFrame()
        aggregate_test = pd.DataFrame()
        for i in range(len(output)):
            aggregate_trainVal[str(i)] = curr_df_trainVal_withTar[output[i]].mean(axis=1)
            aggregate_trainVal = aggregate_trainVal.copy()
            aggregate_test[str(i)] = curr_df_test[output[i]].mean(axis=1)
            aggregate_test = aggregate_test.copy()
        print(f'Number of aggregated features: {len(output)}\n')
        mod = LinearRegression().fit(aggregate_trainVal, df_trainVal_withTar.mean_std)
        score = mod.score(aggregate_test, df_test_withTar.mean_std)
        print(score)
        res.append(score)
        nums.append(len(output))
    compute_CI(res,5)
    compute_CI(nums,5)


62
6
1
5
1
Number of aggregated features: 5

0.8136440738302579
58
4
1
10
1
1
Number of aggregated features: 6

0.8157373735623255
62
6
4
1
1
1
Number of aggregated features: 6

0.8136952358922954
62
6
4
1
1
1
Number of aggregated features: 6

0.8059786176772916
63
7
3
1
1
Number of aggregated features: 5

0.8162261979772543
0.813056299787885 +- 0.003234503926486594
5.6 +- 0.42941448508405017
54
6
8
1
4
1
1
Number of aggregated features: 7

0.7935785039702893
50
11
1
5
6
1
1
Number of aggregated features: 7

0.8168243507889493
52
7
8
2
1
3
1
1
Number of aggregated features: 8

0.8075341115270085
48
13
3
5
3
1
1
1
Number of aggregated features: 8

0.7994296314845528
56
8
6
3
1
1
Number of aggregated features: 6

0.813490072905416
0.8061713341352432 +- 0.007570146132932547
7.2 +- 0.6559414608027151
52
8
8
1
3
2
1
Number of aggregated features: 7

0.8180362597089809
50
11
1
5
6
1
1
Number of aggregated features: 7

0.8168243507889492
50
7
6
6
1
3
1
1
Number of aggregated features: 8

0.80

# GenLinCFA

In [29]:
for eps in [0.1,0.15,0.2,0.25,0.3]:
    res = []
    nums = []
    for curr_seed in [0,1,2,3,4]:
        curr_df_trainVal = df_trainVal[np.random.default_rng(seed=curr_seed).permutation(df_trainVal.columns.values)]
        curr_df_test = df_test[np.random.default_rng(seed=curr_seed).permutation(df_test.columns.values)]
        curr_df_trainVal_withTar = pd.concat((curr_df_trainVal,target_df_trainVal), axis=1)
        
        output = GenLinCFA(curr_df_trainVal_withTar,'mean_std', eps, -5 , 0, 1).compute_clusters()
        
        aggregate_trainVal = pd.DataFrame()
        aggregate_test = pd.DataFrame()
        for i in range(len(output)):
            aggregate_trainVal[str(i)] = curr_df_trainVal_withTar[output[i]].mean(axis=1)
            aggregate_trainVal = aggregate_trainVal.copy()
            aggregate_test[str(i)] = curr_df_test[output[i]].mean(axis=1)
            aggregate_test = aggregate_test.copy()
        print(f'Number of aggregated features: {len(output)}\n')
        mod = LinearRegression().fit(aggregate_trainVal, df_trainVal_withTar.mean_std)
        score = mod.score(aggregate_test, df_test_withTar.mean_std)
        print(score)
        res.append(score)
        nums.append(len(output))
    compute_CI(res,5)
    compute_CI(nums,5)


2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
59
1
2
3
3
1
1
1
1
1
1
1
1
1
1
1
1
Number of aggregated features: 15

0.8120418172593733
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
61
1
2
3
3
1
1
1
1
1
1
1
1
1
1
Number of aggregated features: 13

0.8123928255793366
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
56
1
1
1
1
1
1
2
3
4
5
5
1
1
1
1
1
1
1
1
Number of aggregated features: 16

0.8104174722825184
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
54
1
2
3
4
5
6
6
1
1
1
1
1
1
1
1
1
1
1
1
1
1
Number of aggregated features: 17

0.809

# LinCFA

In [31]:
res = []
nums = []
for curr_seed in [0,1,2,3,4]:
    curr_df_trainVal = df_trainVal[np.random.default_rng(seed=curr_seed).permutation(df_trainVal.columns.values)]
    curr_df_test = df_test[np.random.default_rng(seed=curr_seed).permutation(df_test.columns.values)]
    curr_df_trainVal_withTar = pd.concat((curr_df_trainVal,target_df_trainVal), axis=1)
    
    output = LinCFA(curr_df_trainVal_withTar,'mean_std', 0, 0).compute_clusters()
    
    aggregate_trainVal = pd.DataFrame()
    aggregate_test = pd.DataFrame()
    for i in range(len(output)):
        aggregate_trainVal[str(i)] = curr_df_trainVal_withTar[output[i]].mean(axis=1)
        aggregate_trainVal = aggregate_trainVal.copy()
        aggregate_test[str(i)] = curr_df_test[output[i]].mean(axis=1)
        aggregate_test = aggregate_test.copy()
    print(f'Number of aggregated features: {len(output)}\n')
    mod = LinearRegression().fit(aggregate_trainVal, df_trainVal_withTar.mean_std)
    score = mod.score(aggregate_test, df_test_withTar.mean_std)
    print(score)
    res.append(score)
    nums.append(len(output))
compute_CI(res,5)
compute_CI(nums,5)


Number of aggregated features: 11

0.7723522982689838
Number of aggregated features: 10

0.8099957384013146
Number of aggregated features: 12

0.812051816452628
Number of aggregated features: 12

0.8064436887485126
Number of aggregated features: 12

0.8042768415150631
0.8010240766773004 +- 0.012787405976575728
11.4 +- 0.701230917743934


# Baselines

## pca

In [38]:
# explain 95% of variance
from sklearn.decomposition import PCA
from sklearn.utils import resample

r2 = []
n = []
for curr_seed in [0,1,2,3,4]: 
    curr_df_trainVal_withTar = resample(df_trainVal_withTar, random_state=curr_seed)
    curr_df_trainVal = curr_df_trainVal_withTar.iloc[:,:-1]
    pca = PCA(n_components=0.95)
    trainVal_pca = pd.DataFrame(pca.fit_transform(curr_df_trainVal))
    test_pca = pd.DataFrame(pca.transform(df_test))
    actual_r2 = compare_methods(trainVal_pca, test_pca, curr_df_trainVal_withTar, df_test_withTar, list(trainVal_pca.columns))
    r2.append(actual_r2)
    print(test_pca.shape[1])
    n.append(test_pca.shape[1])
compute_CI(r2,5)
compute_CI(n,5)
    

Full aggregate regression train score: 0.922011320032613, test score: 0.7593726250943146
Aggregate regression train score with FS: 0.922011320032613, test score: 0.7593726250943146
27
Full aggregate regression train score: 0.9058861660878511, test score: 0.7525871350536726
Aggregate regression train score with FS: 0.9058861660878511, test score: 0.7525871350536726
26
Full aggregate regression train score: 0.907905279422283, test score: 0.7546866705898678
Aggregate regression train score with FS: 0.907905279422283, test score: 0.7546866705898678
27
Full aggregate regression train score: 0.9150762209326956, test score: 0.75309811827884
Aggregate regression train score with FS: 0.9150762209326956, test score: 0.75309811827884
26
Full aggregate regression train score: 0.871621141464457, test score: 0.7599120858347211
Aggregate regression train score with FS: 0.871621141464457, test score: 0.7599120858347211
27
0.7559313269702832 +- 0.002728523722143103
26.6 +- 0.42941448508405017


## kernel pca

In [41]:
from sklearn.decomposition import KernelPCA
for curr_kernel in ['linear', 'poly', 'sigmoid']:
    best_scores = []
    best_nums = []
    for curr_seed in [0,1,2,3,4]: 
        best_score = 0
        best_num = 0
        print(f'#########################{curr_kernel}, {curr_seed}#############################')
        for i in range(50):
            curr_df_trainVal_withTar = resample(df_trainVal_withTar, random_state=curr_seed)
            curr_df_trainVal = curr_df_trainVal_withTar.iloc[:,:-1]
            dimRedMethod = KernelPCA(n_components=i+1, kernel=curr_kernel)
            trainVal_reduced = pd.DataFrame(dimRedMethod.fit_transform(curr_df_trainVal))
            test_reduced = pd.DataFrame(dimRedMethod.transform(df_test))
            print(test_reduced.shape)
            mod = LinearRegression().fit(trainVal_reduced, curr_df_trainVal_withTar.mean_std)
            actual_score = mod.score(test_reduced, df_test_withTar.mean_std)
            print(actual_score)
            if actual_score> best_score:
                best_score=actual_score
                best_num = test_reduced.shape
        print(best_score)
        print(best_num)
        best_scores.append(best_score)
        best_nums.append(best_num[1])
    compute_CI(best_scores,5)
    compute_CI(best_nums,5)

#########################linear, 0#############################
(442, 1)
-0.0018544102584430178
(442, 2)
-0.003280726489439667
(442, 3)
0.09567341254614958
(442, 4)
0.09403905939583246
(442, 5)
0.36430035597335564
(442, 6)
0.3620050578937106
(442, 7)
0.32649790067559636
(442, 8)
0.46733819536579924
(442, 9)
0.4833238430001322
(442, 10)
0.502637863616437
(442, 11)
0.530014093557158
(442, 12)
0.7106851966991402
(442, 13)
0.6963421061143209
(442, 14)
0.6973603195858842
(442, 15)
0.7012568986441079
(442, 16)
0.7048578221301454
(442, 17)
0.7146307600642655
(442, 18)
0.7166811477109108
(442, 19)
0.7169235074194249
(442, 20)
0.7495553815757968
(442, 21)
0.7494723855356651
(442, 22)
0.7537138463111344
(442, 23)
0.7648184532458546
(442, 24)
0.7648008042456759
(442, 25)
0.7636537759973359
(442, 26)
0.760333447297009
(442, 27)
0.7593726250943142
(442, 28)
0.7635576567400576
(442, 29)
0.7653090569666516
(442, 30)
0.7616369883929337
(442, 31)
0.736858498004758
(442, 32)
0.7382039366858885
(442, 33)

(442, 15)
-0.004036303966620025
(442, 16)
-0.00400907005153428
(442, 17)
-0.020178199852025402
(442, 18)
-0.017469478795663074
(442, 19)
-0.0034352462211462154
(442, 20)
-0.17163691410732151
(442, 21)
-0.12020191487664067
(442, 22)
-0.44708191055586566
(442, 23)
-0.36543314409796324
(442, 24)
0.505889663551473
(442, 25)
0.4987617263343981
(442, 26)
0.5917763703773973
(442, 27)
0.6203517886149581
(442, 28)
0.6244514570849583
(442, 29)
0.6425644938259301
(442, 30)
0.7217306717187605
(442, 31)
0.7202095107498279
(442, 32)
0.7119475665252637
(442, 33)
0.711597269209181
(442, 34)
0.7019280462245097
(442, 35)
0.7012065382100884
(442, 36)
0.558008124197727
(442, 37)
0.5846600364686916
(442, 38)
0.6656481360377622
(442, 39)
0.6413478390934688
(442, 40)
0.707113332535654
(442, 41)
0.6449054704942341
(442, 42)
0.6437006287188634
(442, 43)
0.673576142816842
(442, 44)
0.5967023210256899
(442, 45)
0.5346782496464071
(442, 46)
0.543406890276595
(442, 47)
0.41379097631448714
(442, 48)
0.3947604238614

(442, 20)
0.03698481425835498
(442, 21)
0.03598975698246787
(442, 22)
0.11406353714135331
(442, 23)
0.09044374903278507
(442, 24)
0.12034396480256537
(442, 25)
0.10446599660344791
(442, 26)
0.06898925019687419
(442, 27)
0.02757056442105743
(442, 28)
-0.004994856303079276
(442, 29)
0.02133051315016865
(442, 30)
-0.007394918311959042
(442, 31)
0.024052962664159794
(442, 32)
0.04204759670158287
(442, 33)
0.043277362081020065
(442, 34)
0.01909520286374644
(442, 35)
0.0953163485215095
(442, 36)
0.1391027986393344
(442, 37)
0.046131822094679586
(442, 38)
0.0674049486959879
(442, 39)
0.08673758803220655
(442, 40)
0.004845138164390694
(442, 41)
-0.026462813573504995
(442, 42)
-0.3044604910766424
(442, 43)
-0.7879783158013232
(442, 44)
-0.7996868120059353
(442, 45)
-0.8898548946433198
(442, 46)
-0.8428276671617858
(442, 47)
-1.0294719155140881
(442, 48)
-1.050754089048909
(442, 49)
-1.3981128413081678
(442, 50)
-1.3759221358033291
0.415131366533506
(442, 2)
#########################sigmoid, 1##

In [None]:
0.7763803385484433 +- 0.01176161585485915
36.0 +- 10.806694221638733

0.7105455478106365 +- 0.03536120629257586
41.0 +- 5.1110546856788766

0.4349818403914211 +- 0.02364046317279162
3.4 +- 1.188994869627283

## isomap

In [48]:
from sklearn.manifold import Isomap
best_scores = []
best_nums = []
for curr_seed in [0,1,2,3,4]: 
    best_score = 0
    best_num = 0
    for i in range(50):
        curr_df_trainVal_withTar = resample(df_trainVal_withTar, random_state=curr_seed)
        curr_df_trainVal = curr_df_trainVal_withTar.iloc[:,:-1]
        dimRedMethod = Isomap(n_components=i+1,n_neighbors=10)
        trainVal_reduced = pd.DataFrame(dimRedMethod.fit_transform(curr_df_trainVal.values,curr_df_trainVal_withTar.mean_std.values.reshape(-1, 1)))
        test_reduced = pd.DataFrame(dimRedMethod.transform(df_test))
        print(test_reduced.shape)
        mod = LinearRegression().fit(trainVal_reduced, curr_df_trainVal_withTar.mean_std)
        actual_score = mod.score(test_reduced, df_test_withTar.mean_std)
        print(actual_score)
        if actual_score> best_score:
            best_score=actual_score
            best_num = test_reduced.shape[1]
    print(best_score)
    print(best_num)
    best_scores.append(best_score)
    best_nums.append(best_num)
compute_CI(best_scores,5)
compute_CI(best_nums,5)



(442, 1)
-0.005760793109692575




(442, 2)
0.0020468006205934808




(442, 3)
-0.02938231078578446




(442, 4)
-0.05630519283312996




(442, 5)
-0.07329764049744703




(442, 6)
-0.04327378506765056




(442, 7)
0.07923392630001747




(442, 8)
0.11460125893399664




(442, 9)
-0.0682440219705116




(442, 10)
-0.11493078058736983




(442, 11)
0.03802248718892487




(442, 12)
-0.23785811873451324




(442, 13)
-0.3285395724844997




(442, 14)
-0.32906956710273016




(442, 15)
-0.08746374914071287




(442, 16)
0.0294820880950053




(442, 17)
0.06033014801024239




(442, 18)
0.07634271134701409




(442, 19)
0.08459479765759803




(442, 20)
0.13099053506045577




(442, 21)
0.16565483657277347




(442, 22)
0.17536330173780046




(442, 23)
0.10565529994930734




(442, 24)
0.06815560744777094




(442, 25)
0.05944437739391972




(442, 26)
0.03943240269529402




(442, 27)
0.07065894111524473




(442, 28)
-0.036109116223618676




(442, 29)
-0.010741483240934713




(442, 30)
-0.07848894763202785




(442, 31)
-0.23360512605546613




(442, 32)
-0.4018996129901029




(442, 33)
-0.4033222464642525




(442, 34)
-0.35083258617603397




(442, 35)
-0.3532184150056632




(442, 36)
-0.3106689923415571




(442, 37)
-0.21892477099095764




(442, 38)
-0.20731806455457025




(442, 39)
-0.24324519561243063




(442, 40)
-0.1583046389244216




(442, 41)
-0.22956117793607245




(442, 42)
-0.2554624057334569




(442, 43)
-0.3079368194615788




(442, 44)
-0.32393547203795636




(442, 45)
-0.32433664243935034




(442, 46)
-0.32669425484965897




(442, 47)
-0.33457242061439896




(442, 48)
-0.33218190985861384




(442, 49)
-0.3249281558493633




(442, 50)
-0.32037675111970065
0.17536330173780046
22




(442, 1)
0.0011768162472177712




(442, 2)
0.039211255628372244




(442, 3)
0.043838248833172666




(442, 4)
-0.173849569550669




(442, 5)
-0.052443354020921484




(442, 6)
-0.1379515861017675




(442, 7)
-0.3189285941557727




(442, 8)
0.1666487240356127




(442, 9)
0.08111487604546253




(442, 10)
0.11694750537197374




(442, 11)
0.17518653819750107




(442, 12)
0.05441165873483933




(442, 13)
-0.02187529859081927




(442, 14)
-0.028387393578194198




(442, 15)
-0.0019204494263111993




(442, 16)
-0.12654365663600786




(442, 17)
-0.1165635192646508




(442, 18)
-0.05029493377052274




(442, 19)
-0.0490108390178392




(442, 20)
0.02907099024469373




(442, 21)
0.24627805295567673




(442, 22)
0.16283692019881457




(442, 23)
0.06151211983371474




(442, 24)
0.05211945003469498




(442, 25)
0.009068484589126014




(442, 26)
-0.17467647537792352




(442, 27)
-0.1933960131279724




(442, 28)
-0.15117435635258358




(442, 29)
-0.19933871085803645




(442, 30)
-0.21645234494851695




(442, 31)
-0.09675147346822821




(442, 32)
-0.10904868399834289




(442, 33)
-0.15974074707499342




(442, 34)
-0.17445304074524604




(442, 35)
-0.24279361884602157




(442, 36)
-0.25724395320153914




(442, 37)
-0.1444899487101805




(442, 38)
-0.07546981664443919




(442, 39)
-0.025281289852592703




(442, 40)
-0.004364731379421505




(442, 41)
-0.1398248052564195




(442, 42)
-0.015851453796286163




(442, 43)
0.008556597665931864




(442, 44)
0.05052266473403999




(442, 45)
0.05054678101234833




(442, 46)
0.06064060295972329




(442, 47)
0.07464536866430549




(442, 48)
0.030709008516662806




(442, 49)
0.04669128314938109




(442, 50)
0.04091550770910457
0.24627805295567673
21




(442, 1)
-0.014465427971726452




(442, 2)
0.13805604225287038




(442, 3)
0.12187161710457006




(442, 4)
-0.039996468128491225




(442, 5)
-0.2058771502192729




(442, 6)
-0.2062144921326119




(442, 7)
-0.07548772472400223




(442, 8)
-0.17045698169119738




(442, 9)
-0.1770559208220963




(442, 10)
-0.10786439330130615




(442, 11)
-0.10282757847610968




(442, 12)
0.000924565415980072




(442, 13)
-0.009991779638968579




(442, 14)
0.09075168043279935




(442, 15)
0.14362201496001425




(442, 16)
0.14648552508861734




(442, 17)
0.2224787781928369




(442, 18)
0.2979386949285441




(442, 19)
0.3308699297216827




(442, 20)
0.3367830138113236




(442, 21)
0.2477016016694591




(442, 22)
0.2377393760139197




(442, 23)
0.14673543722101978




(442, 24)
0.15339864894823185




(442, 25)
0.18685424890193947




(442, 26)
0.1403183021319414




(442, 27)
0.14875903379986832




(442, 28)
0.1778837918033218




(442, 29)
0.17863570865197587




(442, 30)
0.22634005748266073




(442, 31)
0.2910956064563558




(442, 32)
0.2674613044558456




(442, 33)
0.2501533587581801




(442, 34)
0.22994558728549608




(442, 35)
0.2276790698912956




(442, 36)
0.2528055130787651




(442, 37)
0.2701790659325518




(442, 38)
0.26348056075506565




(442, 39)
0.24970769239313428




(442, 40)
0.2784938365325239




(442, 41)
0.31329479369387814




(442, 42)
0.3327050736004019




(442, 43)
0.33003920348140015




(442, 44)
0.2813987452196236




(442, 45)
0.2539665564703708




(442, 46)
0.272359380829426




(442, 47)
0.20999457234551777




(442, 48)
0.10533736363640778




(442, 49)
0.11168374921941138




(442, 50)
0.06584757380392314
0.3367830138113236
20




(442, 1)
-0.0070394740320633975




(442, 2)
0.08536813562178347




(442, 3)
0.0574544869711352




(442, 4)
-0.008371890798804182




(442, 5)
-0.11284718920579206




(442, 6)
-0.16861674268355697




(442, 7)
-0.15107579542906335




(442, 8)
-0.19239108395517857




(442, 9)
-0.1032317788490249




(442, 10)
-0.10473672862752648




(442, 11)
-0.17605636556511062




(442, 12)
-0.06689767256265733




(442, 13)
0.0821941335475711




(442, 14)
-0.17941394989531




(442, 15)
-0.008369923389158318




(442, 16)
0.0851186049967011




(442, 17)
0.15016331892177504




(442, 18)
0.22876314541287146




(442, 19)
0.19825927362546714




(442, 20)
0.18352698716856142




(442, 21)
0.17317727821459727




(442, 22)
0.1796952258343376




(442, 23)
0.16474690097958455




(442, 24)
0.13086100193802697




(442, 25)
0.13140668045548487




(442, 26)
0.15813254870895477




(442, 27)
0.17802807386748354




(442, 28)
0.21605434846289429




(442, 29)
0.22395641696635782




(442, 30)
0.24824089974855956




(442, 31)
0.23240996332677222




(442, 32)
0.23458885330572044




(442, 33)
0.20781283592024014




(442, 34)
0.20899644872568612




(442, 35)
0.21933825000799978




(442, 36)
0.2218526466655325




(442, 37)
0.24229643575800996




(442, 38)
0.27434217629242386




(442, 39)
0.2787980159240181




(442, 40)
0.2698039375724377




(442, 41)
0.2611480657360774




(442, 42)
0.20276806175125406




(442, 43)
0.21711288460134892




(442, 44)
0.2069035421123977




(442, 45)
0.18464221767690858




(442, 46)
0.22169202344397332




(442, 47)
0.21821952455592686




(442, 48)
0.22557466840760065




(442, 49)
0.22807102793704948




(442, 50)
0.19008818799314053
0.2787980159240181
39




(442, 1)
-0.011304793462685092




(442, 2)
0.1054678880086598




(442, 3)
0.10216083317792413




(442, 4)
0.11511725849279164




(442, 5)
0.1258365196949195




(442, 6)
0.030672270764392273




(442, 7)
0.030809151419729175




(442, 8)
0.04473634100424351




(442, 9)
0.08820467092749451




(442, 10)
0.1867428313054087




(442, 11)
0.054489352637751676




(442, 12)
0.06319737033468653




(442, 13)
-0.0342345536639439




(442, 14)
-0.15928813236172967




(442, 15)
-0.3794711329110252




(442, 16)
-0.689010594534404




(442, 17)
-0.6487742162161649




(442, 18)
-0.6012292820023362




(442, 19)
-0.25877069681962306




(442, 20)
-0.263545178914693




(442, 21)
-0.09360126982328132




(442, 22)
0.10230410750190855




(442, 23)
0.22510004499775726




(442, 24)
0.2237970845444669




(442, 25)
0.22138701069273115




(442, 26)
0.20249288635691076




(442, 27)
0.173391528842896




(442, 28)
0.1721477193273776




(442, 29)
0.21950923558030078




(442, 30)
0.2002162385222025




(442, 31)
0.2270842751782316




(442, 32)
0.2263797259233239




(442, 33)
0.20014801357649592




(442, 34)
0.26785424543213054




(442, 35)
0.26662626344288476




(442, 36)
0.2541910224543965




(442, 37)
0.24640650195653302




(442, 38)
0.1834418813746731




(442, 39)
0.17690888091127543




(442, 40)
0.00569926021398337




(442, 41)
0.005519325821152665




(442, 42)
-0.004734914118090572




(442, 43)
-0.02385726643325148




(442, 44)
-0.03377947253368463




(442, 45)
-0.033764231960261126




(442, 46)
-0.21905905051332808




(442, 47)
-0.21065102179880046




(442, 48)
-0.17775801319203532




(442, 49)
-0.1639089977803465
(442, 50)
-0.22099693539536647
0.26785424543213054
34
0.26101532597218985 +- 0.04581068695546693
27.2 +- 6.821250559831386




In [None]:
0.26101532597218985 +- 0.04581068695546693
27.2 +- 6.8212505598313860

## LLE

In [47]:
from sklearn.manifold import LocallyLinearEmbedding as LLE

best_scores = []
best_nums = []
for curr_seed in [0,1,2,3,4]: 
    best_score = 0
    best_num = 0
    for i in range(50):
        curr_df_trainVal_withTar = resample(df_trainVal_withTar, random_state=curr_seed)
        curr_df_trainVal = curr_df_trainVal_withTar.iloc[:,:-1]
        dimRedMethod = LLE(n_components=i+1,n_neighbors=10)
        trainVal_reduced = pd.DataFrame(dimRedMethod.fit_transform(curr_df_trainVal.values,curr_df_trainVal_withTar.mean_std.values.reshape(-1, 1)))
        test_reduced = pd.DataFrame(dimRedMethod.transform(df_test))
        print(test_reduced.shape)
        mod = LinearRegression().fit(trainVal_reduced, curr_df_trainVal_withTar.mean_std)
        actual_score = mod.score(test_reduced, df_test_withTar.mean_std)
        print(actual_score)
        if actual_score> best_score:
            best_score=actual_score
            best_num = test_reduced.shape[1]
    print(best_score)
    print(best_num)
    best_scores.append(best_score)
    best_nums.append(best_num)
compute_CI(best_scores,5)
compute_CI(best_nums,5)



(442, 1)
0.5445025801660901
(442, 2)
0.5462416922307325
(442, 3)
0.5464474327223566
(442, 4)
0.5432544351958116




(442, 5)
0.6069821525130226
(442, 6)
0.6712136599154954
(442, 7)
0.6712486553707209
(442, 8)
0.6691208816047118




(442, 9)
0.6846023433173085
(442, 10)
0.685561593453349
(442, 11)
0.6858129050318836




(442, 12)
0.6798544036773022
(442, 13)
0.6797243341360804
(442, 14)
0.6801096719302624




(442, 15)
0.6855113645894213
(442, 16)
0.682589313311522
(442, 17)
0.6840474741330915




(442, 18)
0.6843190528431036
(442, 19)
0.689116130469767




(442, 20)
0.6913945806641693
(442, 21)
0.6900608573396345




(442, 22)
0.6914208889031129
(442, 23)
0.6912789280354286
(442, 24)
0.6928394482933415




(442, 25)
0.6909092230078173
(442, 26)
0.6903352411619752
(442, 27)
0.6888746246373848




(442, 28)
0.6907334802383296
(442, 29)
0.689547137080907
(442, 30)
0.6911095731169961
(442, 31)
0.6864884146410444




(442, 32)
0.6833817980050892
(442, 33)
0.6806609974164453
(442, 34)




0.6800045421502146
(442, 35)
0.6814623636464117
(442, 36)
0.6805534478773303




(442, 37)
0.6820137290749807
(442, 38)
0.6934357539412495




(442, 39)
0.6931821420103232
(442, 40)
0.6930648802146251




(442, 41)
0.6926263249961583
(442, 42)
0.6905270069312359




(442, 43)
0.6903613307588541
(442, 44)
0.6919021999702324




(442, 45)
0.6863959219187079
(442, 46)
0.6884375849937905




(442, 47)
0.6884226207026896
(442, 48)
0.6925024464936549




(442, 49)
0.7009096731319058
(442, 50)
0.7016723411678509
0.7016723411678509
50
(442, 1)
0.08389143468468796




(442, 2)
0.08521580272924478
(442, 3)
0.08574540868562164
(442, 4)
0.4881693109908053




(442, 5)
0.7051754584685253
(442, 6)
0.7047122022594556
(442, 7)
0.7163838456617813




(442, 8)
0.7156438987788738
(442, 9)
0.7212198293555876
(442, 10)
0.7218839591699806




(442, 11)
0.7220825809976165
(442, 12)
0.7185921820574286




(442, 13)
0.7187848061347518
(442, 14)
0.7248427812874217
(442, 15)
0.7269130414888865




(442, 16)
0.7322760205188047
(442, 17)
0.7322550932279825




(442, 18)
0.7353211435660558
(442, 19)
0.7509295460708572
(442, 20)
0.7536680950110076




(442, 21)
0.7532042234238225
(442, 22)
0.7629989069496959




(442, 23)
0.7769401049736986
(442, 24)
0.7735972545868883




(442, 25)
0.7735033193708803
(442, 26)
0.773553339772149




(442, 27)
0.7763520443617468
(442, 28)
0.7761101270516569




(442, 29)
0.7766693287779899
(442, 30)
0.7770860203296064




(442, 31)
0.7771563768054562
(442, 32)
0.7764447444338023




(442, 33)
0.776661653183714
(442, 34)
0.7765576217184985




(442, 35)
0.7767774166603839
(442, 36)
0.7767322786808042




(442, 37)
0.7765679170854044
(442, 38)
0.776587717180723




(442, 39)
0.7757304585112282
(442, 40)
0.7769338497497491
(442, 41)
0.7785613968006846
(442, 42)
0.7778833260625171




(442, 43)
0.7817275896231908
(442, 44)
0.7819641939982254




(442, 45)
0.7816012647015007
(442, 46)
0.7813891850446678




(442, 47)
0.7812329811118944
(442, 48)
0.7807536756397845




(442, 49)
0.7698116914296486
(442, 50)
0.7698329962886049
0.7819641939982254
44
(442, 1)
0.002084524200922644




(442, 2)
-0.0015581985081127847
(442, 3)
0.008882209931194818
(442, 4)
0.3553519649772634
(442, 5)
0.22068733026116472




(442, 6)
0.22720170896332603
(442, 7)
0.24805149263946935
(442, 8)
0.6060065709571618




(442, 9)
0.6293172871199202
(442, 10)
0.68836722739616
(442, 11)
0.6932331986147223




(442, 12)
0.6804069031396214
(442, 13)
0.687482470181485
(442, 14)
0.6815426636955295




(442, 15)
0.6893865819141909
(442, 16)
0.6866514790963661
(442, 17)
0.7038860623482743




(442, 18)
0.7036618355727613
(442, 19)
0.6886915681151511
(442, 20)
0.6943544300702472




(442, 21)
0.6941937733327139
(442, 22)
0.6940461767603576




(442, 23)
0.7006459679397143
(442, 24)
0.7064293900872977
(442, 25)
0.7067109407860255
(442, 26)
0.7070961046341182




(442, 27)
0.7071508680264456
(442, 28)
0.703943902080606
(442, 29)
0.7048447079224294




(442, 30)
0.7034952592240856
(442, 31)
0.7038560889495857




(442, 32)
0.7025545154130617
(442, 33)
0.7042597108594089




(442, 34)
0.7046422341934537
(442, 35)
0.705086361843726




(442, 36)
0.7054210641186651
(442, 37)
0.7035743991891285
(442, 38)




0.7049175278320192
(442, 39)
0.7097432260939851




(442, 40)
0.7089441261932274
(442, 41)
0.7088781893822046




(442, 42)
0.7151674936398786
(442, 43)
0.7151803770063793




(442, 44)
0.714163641784368
(442, 45)
0.7173450871746411




(442, 46)
0.7239753941631217
(442, 47)
0.7227610205963098




(442, 48)
0.7294390833348685
(442, 49)
0.7334249120841752




(442, 50)
0.7351131355301586
0.7351131355301586
50
(442, 1)
0.05204365130948585
(442, 2)
0.08825661686454855




(442, 3)
0.12944703469330066
(442, 4)
0.2251878385855126
(442, 5)
0.22759205093728552




(442, 6)
0.2826324001458198
(442, 7)
0.2832334110937571
(442, 8)
0.43118536030417776




(442, 9)
0.48218225261364756
(442, 10)
0.7160729463139053
(442, 11)
0.7175760892453488




(442, 12)
0.7192439978809534
(442, 13)
0.7100845100552325
(442, 14)
0.7143116922558636
(442, 15)
0.7152686201675487




(442, 16)
0.7157849659850858
(442, 17)
0.7158687342498236




(442, 18)
0.7114097975094521
(442, 19)
0.7121314169936958
(442, 20)
0.7096040914253701




(442, 21)
0.7092125797777094
(442, 22)
0.722124480730016




(442, 23)
0.7222338695373187
(442, 24)
0.7221858951715409




(442, 25)
0.7223455587618924
(442, 26)
0.720946731265416




(442, 27)
0.7200225064785866
(442, 28)
0.724594587642876




(442, 29)
0.7171935208999634
(442, 30)
0.7144150449815383
(442, 31)
0.7140908569795219




(442, 32)
0.7114517815393746
(442, 33)
0.7120091295482798
(442, 34)
0.7089263967314848




(442, 35)
0.7081862426530654
(442, 36)
0.7086415480558215
(442, 37)
0.7090030816057107
(442, 38)
0.7090982163593207




(442, 39)
0.7090927192905068
(442, 40)
0.7065191272948923




(442, 41)
0.7047068567752428
(442, 42)
0.7079435409585384




(442, 43)
0.7080288172438658
(442, 44)
0.7098612698097562




(442, 45)
0.7101992360360634
(442, 46)
0.709150590663082




(442, 47)
0.7072243471012383
(442, 48)
0.7069416297769422




(442, 49)
0.7087827326313265
(442, 50)
0.7152151664752802
0.724594587642876
28
(442, 1)
0.40370291866405505




(442, 2)
0.5103207800180691
(442, 3)
0.5202268038276643
(442, 4)
0.5142286820264867




(442, 5)
0.5296205629516237
(442, 6)
0.5926376399658742
(442, 7)
0.5908249915069106




(442, 8)
0.6044716940483617
(442, 9)
0.6188715944919895




(442, 10)
0.6191394437822549
(442, 11)
0.6184418612737481




(442, 12)
0.6179560972025553
(442, 13)
0.6169617729279184
(442, 14)
0.6177096838578746




(442, 15)
0.6159914317018744
(442, 16)
0.6165311601357737
(442, 17)
0.6086639200272153




(442, 18)
0.6112034947088626
(442, 19)
0.6157062979664449




(442, 20)
0.616553792455395
(442, 21)
0.6166106033709114




(442, 22)
0.6271280137828125
(442, 23)
0.6304660014807878




(442, 24)
0.6305454907757728
(442, 25)
0.6308345414387713




(442, 26)
0.6331070933768446
(442, 27)
0.6526438055919508
(442, 28)
0.6543052622313621




(442, 29)
0.6567264508496555
(442, 30)
0.6683861643961826
(442, 31)
0.6691704570048711




(442, 32)
0.6705573037303669
(442, 33)
0.6691480219194985




(442, 34)
0.6708306096642292
(442, 35)
0.6715695701591271




(442, 36)
0.6796696490807275
(442, 37)
0.6777805959152697




(442, 38)
0.6799022542593756
(442, 39)
0.6810693419479923




(442, 40)
0.6827440823726504
(442, 41)
0.6816805111062297




(442, 42)
0.6816818572134267
(442, 43)
0.6876153031610753




(442, 44)
0.686566194993486
(442, 45)
0.6920772909659221




(442, 46)
0.6930429760064416
(442, 47)
0.6967905479086233




(442, 48)
0.6968845729396104
(442, 49)
0.6955017653511779
(442, 50)
0.6917718195718925
0.6968845729396104
48
0.7280457662557442 +- 0.02668950852105602
44.0 +- 7.270516900468631




In [None]:
0.7280457662557442 +- 0.02668950852105602
44.0 +- 7.270516900468631

## Sup PCA

In [75]:
for kernel in ['sigmoid']:#['poly']:#['linear']:
    best_scores = []
    best_nums = []
    for curr_seed in [0,3,8,12,5]: 
        print(f'#########################{kernel}, {curr_seed}#############################')
        best_score = 0
        best_num = 0
        for i in range(50):
            curr_df_trainVal_withTar = resample(df_trainVal_withTar, random_state=curr_seed)
            curr_df_trainVal = curr_df_trainVal_withTar.iloc[:,:-1]
            dimRedMethod = spca(num_components=i+1, kernel=kernel, degree=3, gamma=None, coef0=1)
            trainVal_reduced = pd.DataFrame(dimRedMethod.fit_and_transform(curr_df_trainVal.values,curr_df_trainVal_withTar.mean_std.values.reshape(-1, 1)))
            test_reduced = pd.DataFrame(dimRedMethod.transform(df_test))
            print(test_reduced.shape)
            mod = LinearRegression().fit(trainVal_reduced, curr_df_trainVal_withTar.mean_std)
            actual_score = mod.score(test_reduced, df_test_withTar.mean_std)
            print(actual_score)
            if actual_score> best_score:
                best_score=actual_score
                best_num = test_reduced.shape
        print(best_score)
        print(best_num)
        best_scores.append(best_score)
        best_nums.append(best_num[1])
    compute_CI(best_scores,5)
    compute_CI(best_nums,5)

#########################sigmoid, 0#############################
(442, 1)
0.694209268415197
(442, 2)
0.5954490761978187
(442, 3)
0.5638963926824548
(442, 4)
0.5130217415656153
(442, 5)
0.5130207860929714
(442, 6)
0.5497002112132319
(442, 7)
0.606245994116924
(442, 8)
0.5956382683118397
(442, 9)
0.51155709223451
(442, 10)
0.501054509768553
(442, 11)
0.5222448764327459
(442, 12)
0.5211831490424863
(442, 13)
0.5644962755944005
(442, 14)
0.543720460633436
(442, 15)
0.5667410624457011
(442, 16)
0.48867076418902855
(442, 17)
0.5247202609558146
(442, 18)
0.5333718944831355
(442, 19)
0.5111876649065379
(442, 20)
0.3520527060274632
(442, 21)
0.3708635697127567
(442, 22)
0.33269473424686835
(442, 23)
0.13720356057849104
(442, 24)
0.11192837806518308
(442, 25)
0.09387775592724734
(442, 26)
0.05498273807552112
(442, 27)
0.21185183380016925
(442, 28)
0.18317335908685917
(442, 29)
0.20944716924831142
(442, 30)
0.09429999865202587
(442, 31)
0.32741193642730304
(442, 32)
0.31963052114482926
(442, 33)


In [None]:
0.773055927916064 +- 0.012552027829434004
31.0 +- 13.77030776707623

0.7626988470258912 +- 0.01156512478454842
26.0 +- 12.246471491821634

0.6409896167884936 +- 0.03842632477566533
16.0 +- 13.247048274993189

## UMAP

In [12]:
import umap

best_scores = []
best_nums = []
for curr_seed in [0,1,2,3,4]: 
    best_score = 0
    best_num = 0
    for i in range(50):
        curr_df_trainVal_withTar = resample(df_trainVal_withTar, random_state=curr_seed)
        curr_df_trainVal = curr_df_trainVal_withTar.iloc[:,:-1]
        dimRedMethod = umap.UMAP(n_components=i+1)
        trainVal_reduced = pd.DataFrame(dimRedMethod.fit_transform(curr_df_trainVal.values,curr_df_trainVal_withTar.mean_std.values.reshape(-1, 1)))
        test_reduced = pd.DataFrame(dimRedMethod.transform(df_test))
        print(test_reduced.shape)
        mod = LinearRegression().fit(trainVal_reduced, curr_df_trainVal_withTar.mean_std)
        actual_score = mod.score(test_reduced, df_test_withTar.mean_std)
        print(actual_score)
        if actual_score> best_score:
            best_score=actual_score
            best_num = test_reduced.shape[1]
    print(best_score)
    print(best_num)
    best_scores.append(best_score)
    best_nums.append(best_num)
compute_CI(best_scores,5)
compute_CI(best_nums,5)

OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.


(442, 1)
0.08810835121279548
(442, 2)
0.330757000367491
(442, 3)
0.26659863625343816
(442, 4)
0.3180765813605223
(442, 5)
0.2816786939926871
(442, 6)
0.47255594405946355
(442, 7)
0.3591710544765149
(442, 8)
0.35350826937058133
(442, 9)
0.450469023343672
(442, 10)
0.321923343605095
(442, 11)
0.40699801515079437
(442, 12)
0.39455519397994054
(442, 13)
0.47543490678157996
(442, 14)
0.4484876742950098
(442, 15)
0.43696096077107105
(442, 16)
0.2730611326616482
(442, 17)
0.4819371337921463
(442, 18)
0.5251711860067776
(442, 19)
0.468022789732929
(442, 20)
0.4736760996373073
(442, 21)
0.5260779490427746
(442, 22)
0.49488076613877763
(442, 23)
0.5542597447269181
(442, 24)
0.5263901967791711
(442, 25)
0.5100943023749833
(442, 26)
0.5424001784675228
(442, 27)
0.5265398602615985
(442, 28)
0.5238314714369225
(442, 29)
0.44531783651313084
(442, 30)
0.5354238080030431
(442, 31)
0.49611702026765314
(442, 32)
0.508730434297642
(442, 33)
0.5357498187650893
(442, 34)
0.45819036684543646
(442, 35)
0.5300

In [None]:
0.626848326328787 +- 0.02482459870273499
44.8 +- 4.236502708602934

## t-sne

In [18]:
from sklearn.manifold import TSNE

best_scores = []
best_nums = []
for curr_seed in [0,1,2,3,4]: 
    best_score = 0
    best_num = 0
    for i in range(50):
        curr_df_trainVal_withTar = resample(df_trainVal_withTar, random_state=curr_seed)
        curr_df_trainVal = curr_df_trainVal_withTar.iloc[:,:-1]
        dimRedMethod = TSNE(n_components=i+1)
        trainVal_reduced = pd.DataFrame(dimRedMethod.fit_transform(curr_df_trainVal.values,curr_df_trainVal_withTar.mean_std.values.reshape(-1, 1)))
        
        mod = LinearRegression().fit(trainVal_reduced, curr_df_trainVal_withTar.mean_std)
        
        trainValTest = pd.concat((curr_df_trainVal,df_test),axis=0)
        test_reduced = pd.DataFrame(dimRedMethod.fit_transform(trainValTest.values)).iloc[-df_test.shape[0]:,:]
        print(test_reduced.shape)
        actual_score = mod.score(test_reduced, df_test_withTar.mean_std)
        print(actual_score)
        if actual_score> best_score:
            best_score=actual_score
            best_num = test_reduced.shape[1]
    print(best_score)
    print(best_num)
    best_scores.append(best_score)
    best_nums.append(best_num)
compute_CI(best_scores,5)
compute_CI(best_nums,5)




(442, 1)
-0.9849195121116525




(442, 2)
0.1850353876146149




(442, 3)
-0.5316912175724298




ValueError: 'n_components' should be inferior to 4 for the barnes_hut algorithm as it relies on quad-tree or oct-tree.