In [None]:
%pip install pinard

In [None]:
## Generate preprocessing test data - Snapshot (XX-XX-XX - commit:)
import numpy as np
import pandas as pd

from sklearn.pipeline import Pipeline, FeatureUnion
from pinard import preprocessor as pp
from pinard.nirs_pipelines import FeatureAugmentation

x_fname = 'test_preprocessing_src.csv'
x_df = pd.read_csv(x_fname, sep=';', header=None)
x = x_df.astype(np.float32).values

preprocessing = [   ('Id', pp.IdentityTransformer()),
                    ('Baseline', pp.Baseline()),
                    ('StandardNormalVariate', pp.StandardNormalVariate()), 
                    ('RobustNormalVariate', pp.RobustNormalVariate()),
                    ('SavitzkyGolay', pp.SavitzkyGolay()),
                    ('Normalize', pp.Normalize()),
                    ('Detrend', pp.Detrend()),
                    ('MultiplicativeScatterCorrection', pp.MultiplicativeScatterCorrection()),
                    ('Derivate', pp.Derivate()),
                    ('Gaussian', pp.Gaussian(order = 2, sigma = 1)),
                    ('Wavelet', pp.Wavelet()),
                    ('SimpleScale', pp.SimpleScale()),
                ]

pipeline = FeatureAugmentation(preprocessing)
xt = pipeline.fit_transform(x)
xtt = np.swapaxes(xt, 1, 2)
a = pp.baseline(x)[0]
xtt = np.concatenate(xtt)
b = xtt[0:12,:]
np.savetxt("test_preprocessing_validation.csv", b, delimiter=";")
np.savetxt("test_data.csv", x, delimiter=";")

In [73]:
## Generate model selection test data - Snapshot (XX-XX-XX - commit:)
import random
import numpy as np
import pandas as pd
from sklearn.model_selection import ShuffleSplit
from pinard import model_selection

x_fname = 'test_split.csv'
x_df = pd.read_csv(x_fname, sep=';', header=None)
x = x_df.astype(np.float32).values
y = np.reshape(x[:,0], (-1,1))
x = x[:,1:]

np.random.seed(42)
random.seed(42)

train_index, test_index = model_selection.train_test_split_idx(x, method="random", test_size=0.25, random_state=42)
test_data = train_index

train_index, test_index = model_selection.train_test_split_idx(x, method="k_mean", test_size=0.25, random_state=42, metric= "canberra")
test_data = np.column_stack((test_data, train_index))

train_index, test_index = model_selection.train_test_split_idx(x, method="k_mean", test_size=0.25, random_state=42, metric= "jensenshannon")
test_data = np.column_stack((test_data, train_index))

train_index, test_index = model_selection.train_test_split_idx(x, method="k_mean", test_size=0.25, random_state=42, pca_components=4, metric= "correlation")
test_data = np.column_stack((test_data, train_index))

train_index, test_index = model_selection.train_test_split_idx(x, method="kennard_stone", test_size=0.25, random_state=42)
test_data = np.column_stack((test_data, train_index))

train_index, test_index = model_selection.train_test_split_idx(x, method="kennard_stone", test_size=0.25, random_state=42, metric='correlation', pca_components=8)
test_data = np.column_stack((test_data, train_index))

train_index, test_index = model_selection.train_test_split_idx(x, method="kennard_stone", test_size=0.25, random_state=42, metric='correlation')
test_data = np.column_stack((test_data, train_index))

train_index, test_index = model_selection.train_test_split_idx(x, y=y, method="spxy", test_size=0.25, random_state=42)
test_data = np.column_stack((test_data, train_index))

train_index, test_index = model_selection.train_test_split_idx(x, y=y, method="spxy", test_size=0.25, random_state=42, pca_components=2)
test_data = np.column_stack((test_data, train_index))

train_index, test_index = model_selection.train_test_split_idx(x, y=y, method="spxy", test_size=0.25, random_state=42, metric='canberra')
test_data = np.column_stack((test_data, train_index))

train_index, test_index = model_selection.train_test_split_idx(x, y=y, method="stratified", test_size=0.25, random_state=42)
test_data = np.column_stack((test_data, train_index))

train_index, test_index = model_selection.train_test_split_idx(x, y=y, method="stratified", test_size=0.25, random_state=42, n_bins=4)
test_data = np.column_stack((test_data, train_index))

train_index, test_index = model_selection.train_test_split_idx(x, y=y, method="circular", test_size=0.25, random_state=42)
test_data = np.column_stack((test_data, train_index))

# train_index, test_index = model_selection.train_test_split_idx(x, method="SPlit", test_size=0.25, random_state=42)
# test_data = np.column_stack((test_data, train_index))

np.savetxt("test_split_validation.csv", test_data.astype(np.int32), delimiter=";", fmt='%i')


> 61 21
> 61 21
> 61 21
(61,)
> 61 21
> 61 21
> 61 21


ValueError: The number of observations (164) is too small; the covariance matrix is singular. For observations with 312 dimensions, at least 313 observations are required.

In [26]:
a = np.arange(10)
b = np.arange(10)
c = np.column_stack_stack((a,b))
print(c.shape)
d = np.column_stack_stack((c,a))
d

(10, 2)


array([[0, 0, 0],
       [1, 1, 1],
       [2, 2, 2],
       [3, 3, 3],
       [4, 4, 4],
       [5, 5, 5],
       [6, 6, 6],
       [7, 7, 7],
       [8, 8, 8],
       [9, 9, 9]])