In [None]:
%pip install pinard

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
## Generate preprocessing test data - Snapshot (22-10-20 - commit:5544c8f119baea5f8a3d99f7ba67c1e914b98b9c)
import numpy as np
import pandas as pd
from sklearn.pipeline import FeatureUnion, Pipeline

from pinard import preprocessor as pp
from pinard.nirs_pipelines import FeatureAugmentation

x_fname = 'test_preprocessing_src.csv'
x_df = pd.read_csv(x_fname, sep=';', header=None)
x = x_df.astype(np.float32).values

preprocessing = [   ('Id', pp.IdentityTransformer()),
                    ('Baseline', pp.Baseline()),
                    ('StandardNormalVariate', pp.StandardNormalVariate()), 
                    ('RobustNormalVariate', pp.RobustNormalVariate()),
                    ('SavitzkyGolay', pp.SavitzkyGolay()),
                    ('Normalize', pp.Normalize()),
                    ('Detrend', pp.Detrend()),
                    ('MultiplicativeScatterCorrection', pp.MultiplicativeScatterCorrection()),
                    ('Derivate', pp.Derivate()),
                    ('Gaussian', pp.Gaussian(order = 2, sigma = 1)),
                    ('Wavelet', pp.Wavelet()),
                    ('SimpleScale', pp.SimpleScale()),
                ]

pipeline = FeatureAugmentation(preprocessing)
xt = pipeline.fit_transform(x)
xtt = np.swapaxes(xt, 1, 2)
a = pp.baseline(x)[0]
xtt = np.concatenate(xtt)
b = xtt[0:12,:]
np.savetxt("test_preprocessing_validation.csv", b, delimiter=";")
np.savetxt("test_data.csv", x, delimiter=";")

In [1]:
## Generate model selection test data - Snapshot (22-10-20 - commit:5544c8f119baea5f8a3d99f7ba67c1e914b98b9c)
import random

import numpy as np
import pandas as pd
from sklearn.model_selection import ShuffleSplit

from pinard import model_selection

x_fname = 'test_split.csv'
x_df = pd.read_csv(x_fname, sep=';', header=None)
x = x_df.astype(np.float32).values
y = np.reshape(x[:,0], (-1,1))
x = x[:,1:]

np.random.seed(42)
random.seed(42)

train_index, test_index = model_selection.train_test_split_idx(x, method="random", test_size=0.25, random_state=42)
test_data = train_index

train_index, test_index = model_selection.train_test_split_idx(x, method="k_mean", test_size=0.25, random_state=42, metric= "canberra")
test_data = np.column_stack((test_data, train_index))

train_index, test_index = model_selection.train_test_split_idx(x, method="k_mean", test_size=0.25, random_state=42, pca_components=4, metric= "canberra")
test_data = np.column_stack((test_data, train_index))

train_index, test_index = model_selection.train_test_split_idx(x, method="kennard_stone", test_size=0.25, random_state=42)
test_data = np.column_stack((test_data, train_index))

train_index, test_index = model_selection.train_test_split_idx(x, method="kennard_stone", test_size=0.25, random_state=42, metric='correlation', pca_components=8)
test_data = np.column_stack((test_data, train_index))

train_index, test_index = model_selection.train_test_split_idx(x, method="kennard_stone", test_size=0.25, random_state=42, metric='correlation')
test_data = np.column_stack((test_data, train_index))

train_index, test_index = model_selection.train_test_split_idx(x, y=y, method="spxy", test_size=0.25, random_state=42)
test_data = np.column_stack((test_data, train_index))

train_index, test_index = model_selection.train_test_split_idx(x, y=y, method="spxy", test_size=0.25, random_state=42, pca_components=2)
test_data = np.column_stack((test_data, train_index))

train_index, test_index = model_selection.train_test_split_idx(x, y=y, method="spxy", test_size=0.25, random_state=42, metric='canberra')
test_data = np.column_stack((test_data, train_index))

train_index, test_index = model_selection.train_test_split_idx(x, y=y, method="stratified", test_size=0.25, random_state=42)
test_data = np.column_stack((test_data, train_index))

train_index, test_index = model_selection.train_test_split_idx(x, y=y, method="stratified", test_size=0.25, random_state=42, n_bins=4)
test_data = np.column_stack((test_data, train_index))

train_index, test_index = model_selection.train_test_split_idx(x, y=y, method="circular", test_size=0.25, random_state=42)
test_data = np.column_stack((test_data, train_index))

# train_index, test_index = model_selection.train_test_split_idx(x, method="SPlit", test_size=0.25, random_state=42)
# test_data = np.column_stack((test_data, train_index))

np.savetxt("test_split_validation.csv", test_data.astype(np.int32), delimiter=";", fmt='%i')

ImportError: attempted relative import with no known parent package

In [86]:
%load_ext autoreload
%autoreload 2

## Generate augmentation test data - Snapshot (22-10-20 - commit:-----)
import random

import numpy as np
import pandas as pd
from sklearn.pipeline import FeatureUnion

from pinard import augmentation as aug
from pinard.sklearn import SampleAugmentation

x_fname = "test_augmentation.csv"
x_df = pd.read_csv(x_fname, sep=";", header=None)
x = x_df.astype(np.float32).values
y = np.reshape(x[:, 0], (-1, 1))
x = x[:, 1:]

augmentations = [
    ("Id", aug.IdentityAugmenter()),
    ("Rotate_Translate", aug.Rotate_Translate(random_state=42, per_sample=False)),
    # ("Random_Y_Shift", aug.Random_Y_Shift(random_state=42, per_sample=False)),
    # ("Random_Multiplicative_Shift", aug.Random_Multiplicative_Shift(random_state=42, per_sample=False)),
    # ("Random_Spline_Addition", aug.Random_Spline_Addition(random_state=42)),
    # ("Random_X_Spline_Deformation", aug.Random_X_Spline_Deformation(random_state=42)),
    # ("Random_X_Spline_Shift", aug.Random_X_Spline_Shift(random_state=42)),
    # ("Monotonous_Spline_Simplification", aug.Monotonous_Spline_Simplification(random_state=42)),
    # ("Dependent_Spline_Simplification", aug.Dependent_Spline_Simplification(random_state=42)),
]


# for augment in augmentations:
#     print(augment[0])
#     augment[1].fit_transform(x,y)

aug = SampleAugmentation(augmentations)
X_train, y_train = aug.transform(x, y)
print(X_train[:,:10])

# print(X_train.shape, y_train.shape)
# print(np.concatenate(y_train))
# print(X_train[2], x[1])

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
[[0.72180432 0.72554117 0.72992826 0.7402032  0.75501108 0.76992512
  0.77215958 0.76993704 0.79868561 0.80254292]
 [0.81684974 0.81926377 0.82232804 0.83128017 0.84476523 0.85835645
  0.85926809 0.85572273 0.88314848 0.88568298]
 [0.86521149 0.86832756 0.87789011 0.89377493 0.90516847 0.90261436
  0.90387791 0.9171114  0.93558824 0.93859202]
 [0.76445829 0.7680819  0.778152   0.79454435 0.80644544 0.80439886
  0.80616996 0.81991098 0.83889536 0.84240668]
 [0.87479496 0.87715763 0.87726188 0.88246244 0.89566934 0.90164775
  0.91074002 0.92526674 0.93393207 0.93441856]
 [0.97002107 0.9710499  0.96982031 0.97368704 0.9855601  0.99020467
  0.99796311 1.011156   1.01848748 1.01764014]
 [0.89307523 0.89902884 0.90493369 0.91273469 0.9155581  0.91560477
  0.92193139 0.93618143 0.94984376 0.95042825]
 [1.04906841 1.05418637 1.05925558 1.06622093 1.06820869 1.06741972
  1.07291069 1.08632508 1.09915177 1.09