In [1]:
import importlib
import pickle

import numpy as np
import matplotlib.pyplot as plt
import sklearn.linear_model
import sklearn.pipeline
import sklearn.preprocessing
import sklearn.metrics
import sklearn.model_selection
import sklearn.multiclass

import lapprox.misc.utils as utils
import lapprox.models.ssa as ssa
import lapprox.models.ar as ar
import lapprox.models.arma as arma
import lapprox.models.fft as fft
import lapprox.models.semor as semor
import lapprox.joint.joint_mapping as joint_mapping
import lapprox.segments.normalize as normalize
import lapprox.misc.namespace_cutter as namespace_cutter

importlib.reload(joint_mapping)

%matplotlib inline

In [2]:
with open("data/semor_profiles.pickle", "rb") as f:
    semor_profiles = pickle.load(f)

for key, profile in semor_profiles.items():
    semor_profiles[key] = normalize.shrink_segment(profile, 20)

with open("data/segments.pickle", "rb") as f:
    segments_dict = pickle.load(f) 

## Подготавливаем данные

In [3]:
action_and_segment = []

for key, segments_array in segments_dict.items():
    action_and_segment.extend([(key, s) for s in segments_array if s.size > 30])

segments = [pair[1] for pair in action_and_segment]
actions = [pair[0] for pair in action_and_segment]

In [4]:
normalized_segments = normalize.normalize_segments(segments)
action_and_norm_segment = [(pair[0][0], pair[1])
                           for pair in zip(action_and_segment, normalized_segments)]
le = sklearn.preprocessing.LabelEncoder()
Y = le.fit_transform(actions)
X = np.array(normalized_segments)

In [5]:
models = {
    "ssa_5": ssa.Ssa(5),
    "ssa_10": ssa.Ssa(10),
    "ar_2": ar.Ar(2),
    "ar_4": ar.Ar(4),
#     "arma_2_2": arma.Arma(2, 2),
    "fft_2": fft.Fft(2),
    "fft_5": fft.Fft(5),
    "semor_run": semor.Semor(semor_profiles['run']),
    "semor_walk": semor.Semor(semor_profiles['walk']),
    "semor_up": semor.Semor(semor_profiles['up']),
    "semor_down": semor.Semor(semor_profiles['down'])
}
mapper = joint_mapping.JointMapping(models)
X_intermediate = mapper.fit_transform(X)
cutter = namespace_cutter.CutNamespacesTransformer(namespaces=["ar_2"], mapping=mapper)
clf = sklearn.linear_model.LogisticRegressionCV(multi_class='multinomial', scoring='neg_log_loss')

pipeline = sklearn.pipeline.Pipeline([
    ('namespace_cutter', cutter),
    ('clf', clf)
])
grid = {
    'namespace_cutter__namespaces': utils.all_combinations(list(models))
}
gs = sklearn.model_selection.GridSearchCV(pipeline, grid, scoring='neg_log_loss', cv=5, n_jobs=-2, verbose=1)

  linalg.lstsq(X, y)


In [None]:
gs.fit(X_intermediate, Y)

Fitting 5 folds for each of 1022 candidates, totalling 5110 fits
('ssa_5',)
{'ssa_5': array([0, 1, 2, 3, 4]), 'ssa_10': array([ 5,  6,  7,  8,  9, 10, 11, 12, 13, 14]), 'ar_2': array([15, 16, 17]), 'ar_4': array([18, 19, 20, 21, 22]), 'fft_2': array([23, 24]), 'fft_5': array([25, 26, 27, 28, 29]), 'semor_run': array([30, 31, 32, 33, 34]), 'semor_walk': array([35, 36, 37, 38, 39]), 'semor_up': array([40, 41, 42, 43, 44]), 'semor_down': array([45, 46, 47, 48, 49])}
('ssa_5',)
{'ssa_5': array([0, 1, 2, 3, 4]), 'ssa_10': array([ 5,  6,  7,  8,  9, 10, 11, 12, 13, 14]), 'ar_2': array([15, 16, 17]), 'ar_4': array([18, 19, 20, 21, 22]), 'fft_2': array([23, 24]), 'fft_5': array([25, 26, 27, 28, 29]), 'semor_run': array([30, 31, 32, 33, 34]), 'semor_walk': array([35, 36, 37, 38, 39]), 'semor_up': array([40, 41, 42, 43, 44]), 'semor_down': array([45, 46, 47, 48, 49])}
('ssa_5',)
{'ssa_5': array([0, 1, 2, 3, 4]), 'ssa_10': array([ 5,  6,  7,  8,  9, 10, 11, 12, 13, 14]), 'ar_2': array([15, 16, 17

[Parallel(n_jobs=-2)]: Done  36 tasks      | elapsed:    3.5s


('semor_walk',)
{'ssa_5': array([0, 1, 2, 3, 4]), 'ssa_10': array([ 5,  6,  7,  8,  9, 10, 11, 12, 13, 14]), 'ar_2': array([15, 16, 17]), 'ar_4': array([18, 19, 20, 21, 22]), 'fft_2': array([23, 24]), 'fft_5': array([25, 26, 27, 28, 29]), 'semor_run': array([30, 31, 32, 33, 34]), 'semor_walk': array([35, 36, 37, 38, 39]), 'semor_up': array([40, 41, 42, 43, 44]), 'semor_down': array([45, 46, 47, 48, 49])}
('semor_walk',)
{'ssa_5': array([0, 1, 2, 3, 4]), 'ssa_10': array([ 5,  6,  7,  8,  9, 10, 11, 12, 13, 14]), 'ar_2': array([15, 16, 17]), 'ar_4': array([18, 19, 20, 21, 22]), 'fft_2': array([23, 24]), 'fft_5': array([25, 26, 27, 28, 29]), 'semor_run': array([30, 31, 32, 33, 34]), 'semor_walk': array([35, 36, 37, 38, 39]), 'semor_up': array([40, 41, 42, 43, 44]), 'semor_down': array([45, 46, 47, 48, 49])}
('semor_up',)
{'ssa_5': array([0, 1, 2, 3, 4]), 'ssa_10': array([ 5,  6,  7,  8,  9, 10, 11, 12, 13, 14]), 'ar_2': array([15, 16, 17]), 'ar_4': array([18, 19, 20, 21, 22]), 'fft_2': ar

  np.exp(prob, prob)


('ar_2', 'semor_walk')
{'ssa_5': array([0, 1, 2, 3, 4]), 'ssa_10': array([ 5,  6,  7,  8,  9, 10, 11, 12, 13, 14]), 'ar_2': array([15, 16, 17]), 'ar_4': array([18, 19, 20, 21, 22]), 'fft_2': array([23, 24]), 'fft_5': array([25, 26, 27, 28, 29]), 'semor_run': array([30, 31, 32, 33, 34]), 'semor_walk': array([35, 36, 37, 38, 39]), 'semor_up': array([40, 41, 42, 43, 44]), 'semor_down': array([45, 46, 47, 48, 49])}
('ar_2', 'semor_walk')
{'ssa_5': array([0, 1, 2, 3, 4]), 'ssa_10': array([ 5,  6,  7,  8,  9, 10, 11, 12, 13, 14]), 'ar_2': array([15, 16, 17]), 'ar_4': array([18, 19, 20, 21, 22]), 'fft_2': array([23, 24]), 'fft_5': array([25, 26, 27, 28, 29]), 'semor_run': array([30, 31, 32, 33, 34]), 'semor_walk': array([35, 36, 37, 38, 39]), 'semor_up': array([40, 41, 42, 43, 44]), 'semor_down': array([45, 46, 47, 48, 49])}
('ar_2', 'semor_up')
{'ssa_5': array([0, 1, 2, 3, 4]), 'ssa_10': array([ 5,  6,  7,  8,  9, 10, 11, 12, 13, 14]), 'ar_2': array([15, 16, 17]), 'ar_4': array([18, 19, 20, 

[Parallel(n_jobs=-2)]: Done 186 tasks      | elapsed:   23.5s


('ar_4', 'semor_walk')
{'ssa_5': array([0, 1, 2, 3, 4]), 'ssa_10': array([ 5,  6,  7,  8,  9, 10, 11, 12, 13, 14]), 'ar_2': array([15, 16, 17]), 'ar_4': array([18, 19, 20, 21, 22]), 'fft_2': array([23, 24]), 'fft_5': array([25, 26, 27, 28, 29]), 'semor_run': array([30, 31, 32, 33, 34]), 'semor_walk': array([35, 36, 37, 38, 39]), 'semor_up': array([40, 41, 42, 43, 44]), 'semor_down': array([45, 46, 47, 48, 49])}
('ar_4', 'semor_walk')
{'ssa_5': array([0, 1, 2, 3, 4]), 'ssa_10': array([ 5,  6,  7,  8,  9, 10, 11, 12, 13, 14]), 'ar_2': array([15, 16, 17]), 'ar_4': array([18, 19, 20, 21, 22]), 'fft_2': array([23, 24]), 'fft_5': array([25, 26, 27, 28, 29]), 'semor_run': array([30, 31, 32, 33, 34]), 'semor_walk': array([35, 36, 37, 38, 39]), 'semor_up': array([40, 41, 42, 43, 44]), 'semor_down': array([45, 46, 47, 48, 49])}
('ar_4', 'semor_down')
{'ssa_5': array([0, 1, 2, 3, 4]), 'ssa_10': array([ 5,  6,  7,  8,  9, 10, 11, 12, 13, 14]), 'ar_2': array([15, 16, 17]), 'ar_4': array([18, 19, 20

  np.exp(prob, prob)


('fft_5', 'semor_down')
{'ssa_5': array([0, 1, 2, 3, 4]), 'ssa_10': array([ 5,  6,  7,  8,  9, 10, 11, 12, 13, 14]), 'ar_2': array([15, 16, 17]), 'ar_4': array([18, 19, 20, 21, 22]), 'fft_2': array([23, 24]), 'fft_5': array([25, 26, 27, 28, 29]), 'semor_run': array([30, 31, 32, 33, 34]), 'semor_walk': array([35, 36, 37, 38, 39]), 'semor_up': array([40, 41, 42, 43, 44]), 'semor_down': array([45, 46, 47, 48, 49])}
('fft_5', 'semor_down')
{'ssa_5': array([0, 1, 2, 3, 4]), 'ssa_10': array([ 5,  6,  7,  8,  9, 10, 11, 12, 13, 14]), 'ar_2': array([15, 16, 17]), 'ar_4': array([18, 19, 20, 21, 22]), 'fft_2': array([23, 24]), 'fft_5': array([25, 26, 27, 28, 29]), 'semor_run': array([30, 31, 32, 33, 34]), 'semor_walk': array([35, 36, 37, 38, 39]), 'semor_up': array([40, 41, 42, 43, 44]), 'semor_down': array([45, 46, 47, 48, 49])}
('semor_run', 'semor_walk')
('semor_run', 'semor_up')
{'ssa_5': array([0, 1, 2, 3, 4]), 'ssa_10': array([ 5,  6,  7,  8,  9, 10, 11, 12, 13, 14]), 'ar_2': array([15, 16

[Parallel(n_jobs=-2)]: Done 436 tasks      | elapsed:   56.0s


('ssa_5', 'semor_run', 'semor_down')
{'ssa_5': array([0, 1, 2, 3, 4]), 'ssa_10': array([ 5,  6,  7,  8,  9, 10, 11, 12, 13, 14]), 'ar_2': array([15, 16, 17]), 'ar_4': array([18, 19, 20, 21, 22]), 'fft_2': array([23, 24]), 'fft_5': array([25, 26, 27, 28, 29]), 'semor_run': array([30, 31, 32, 33, 34]), 'semor_walk': array([35, 36, 37, 38, 39]), 'semor_up': array([40, 41, 42, 43, 44]), 'semor_down': array([45, 46, 47, 48, 49])}
('ssa_5', 'semor_run', 'semor_down')
{'ssa_5': array([0, 1, 2, 3, 4]), 'ssa_10': array([ 5,  6,  7,  8,  9, 10, 11, 12, 13, 14]), 'ar_2': array([15, 16, 17]), 'ar_4': array([18, 19, 20, 21, 22]), 'fft_2': array([23, 24]), 'fft_5': array([25, 26, 27, 28, 29]), 'semor_run': array([30, 31, 32, 33, 34]), 'semor_walk': array([35, 36, 37, 38, 39]), 'semor_up': array([40, 41, 42, 43, 44]), 'semor_down': array([45, 46, 47, 48, 49])}
('ssa_5', 'semor_walk', 'semor_up')
{'ssa_5': array([0, 1, 2, 3, 4]), 'ssa_10': array([ 5,  6,  7,  8,  9, 10, 11, 12, 13, 14]), 'ar_2': array

[Parallel(n_jobs=-2)]: Done 786 tasks      | elapsed:  1.7min


('fft_2', 'fft_5', 'semor_up')
{'ssa_5': array([0, 1, 2, 3, 4]), 'ssa_10': array([ 5,  6,  7,  8,  9, 10, 11, 12, 13, 14]), 'ar_2': array([15, 16, 17]), 'ar_4': array([18, 19, 20, 21, 22]), 'fft_2': array([23, 24]), 'fft_5': array([25, 26, 27, 28, 29]), 'semor_run': array([30, 31, 32, 33, 34]), 'semor_walk': array([35, 36, 37, 38, 39]), 'semor_up': array([40, 41, 42, 43, 44]), 'semor_down': array([45, 46, 47, 48, 49])}
('fft_2', 'fft_5', 'semor_up')
('fft_2', 'fft_5', 'semor_up')
{'ssa_5': array([0, 1, 2, 3, 4]), 'ssa_10': array([ 5,  6,  7,  8,  9, 10, 11, 12, 13, 14]), 'ar_2': array([15, 16, 17]), 'ar_4': array([18, 19, 20, 21, 22]), 'fft_2': array([23, 24]), 'fft_5': array([25, 26, 27, 28, 29]), 'semor_run': array([30, 31, 32, 33, 34]), 'semor_walk': array([35, 36, 37, 38, 39]), 'semor_up': array([40, 41, 42, 43, 44]), 'semor_down': array([45, 46, 47, 48, 49])}
('fft_2', 'fft_5', 'semor_up')
{'ssa_5': array([0, 1, 2, 3, 4]), 'ssa_10': array([ 5,  6,  7,  8,  9, 10, 11, 12, 13, 14]),

[Parallel(n_jobs=-2)]: Done 1236 tasks      | elapsed:  2.8min


('ssa_5', 'fft_2', 'semor_walk', 'semor_down')
{'ssa_5': array([0, 1, 2, 3, 4]), 'ssa_10': array([ 5,  6,  7,  8,  9, 10, 11, 12, 13, 14]), 'ar_2': array([15, 16, 17]), 'ar_4': array([18, 19, 20, 21, 22]), 'fft_2': array([23, 24]), 'fft_5': array([25, 26, 27, 28, 29]), 'semor_run': array([30, 31, 32, 33, 34]), 'semor_walk': array([35, 36, 37, 38, 39]), 'semor_up': array([40, 41, 42, 43, 44]), 'semor_down': array([45, 46, 47, 48, 49])}
('ssa_5', 'fft_2', 'semor_walk', 'semor_down')
{'ssa_5': array([0, 1, 2, 3, 4]), 'ssa_10': array([ 5,  6,  7,  8,  9, 10, 11, 12, 13, 14]), 'ar_2': array([15, 16, 17]), 'ar_4': array([18, 19, 20, 21, 22]), 'fft_2': array([23, 24]), 'fft_5': array([25, 26, 27, 28, 29]), 'semor_run': array([30, 31, 32, 33, 34]), 'semor_walk': array([35, 36, 37, 38, 39]), 'semor_up': array([40, 41, 42, 43, 44]), 'semor_down': array([45, 46, 47, 48, 49])}
('ssa_5', 'fft_5', 'semor_run', 'semor_walk')
{'ssa_5': array([0, 1, 2, 3, 4]), 'ssa_10': array([ 5,  6,  7,  8,  9, 10, 1

[Parallel(n_jobs=-2)]: Done 1786 tasks      | elapsed:  4.2min


('ar_4', 'fft_2', 'semor_walk', 'semor_up')
('ar_4', 'fft_2', 'semor_walk', 'semor_up')
{'ssa_5': array([0, 1, 2, 3, 4]), 'ssa_10': array([ 5,  6,  7,  8,  9, 10, 11, 12, 13, 14]), 'ar_2': array([15, 16, 17]), 'ar_4': array([18, 19, 20, 21, 22]), 'fft_2': array([23, 24]), 'fft_5': array([25, 26, 27, 28, 29]), 'semor_run': array([30, 31, 32, 33, 34]), 'semor_walk': array([35, 36, 37, 38, 39]), 'semor_up': array([40, 41, 42, 43, 44]), 'semor_down': array([45, 46, 47, 48, 49])}
{'ssa_5': array([0, 1, 2, 3, 4]), 'ssa_10': array([ 5,  6,  7,  8,  9, 10, 11, 12, 13, 14]), 'ar_2': array([15, 16, 17]), 'ar_4': array([18, 19, 20, 21, 22]), 'fft_2': array([23, 24]), 'fft_5': array([25, 26, 27, 28, 29]), 'semor_run': array([30, 31, 32, 33, 34]), 'semor_walk': array([35, 36, 37, 38, 39]), 'semor_up': array([40, 41, 42, 43, 44]), 'semor_down': array([45, 46, 47, 48, 49])}
('ar_4', 'fft_2', 'semor_walk', 'semor_up')
{'ssa_5': array([0, 1, 2, 3, 4]), 'ssa_10': array([ 5,  6,  7,  8,  9, 10, 11, 12, 1

[Parallel(n_jobs=-2)]: Done 2436 tasks      | elapsed:  5.7min


('ssa_5', 'ar_4', 'fft_5', 'semor_run', 'semor_up')
{'ssa_5': array([0, 1, 2, 3, 4]), 'ssa_10': array([ 5,  6,  7,  8,  9, 10, 11, 12, 13, 14]), 'ar_2': array([15, 16, 17]), 'ar_4': array([18, 19, 20, 21, 22]), 'fft_2': array([23, 24]), 'fft_5': array([25, 26, 27, 28, 29]), 'semor_run': array([30, 31, 32, 33, 34]), 'semor_walk': array([35, 36, 37, 38, 39]), 'semor_up': array([40, 41, 42, 43, 44]), 'semor_down': array([45, 46, 47, 48, 49])}
('ssa_5', 'ar_4', 'fft_5', 'semor_run', 'semor_up')
{'ssa_5': array([0, 1, 2, 3, 4]), 'ssa_10': array([ 5,  6,  7,  8,  9, 10, 11, 12, 13, 14]), 'ar_2': array([15, 16, 17]), 'ar_4': array([18, 19, 20, 21, 22]), 'fft_2': array([23, 24]), 'fft_5': array([25, 26, 27, 28, 29]), 'semor_run': array([30, 31, 32, 33, 34]), 'semor_walk': array([35, 36, 37, 38, 39]), 'semor_up': array([40, 41, 42, 43, 44]), 'semor_down': array([45, 46, 47, 48, 49])}
('ssa_5', 'ar_4', 'fft_5', 'semor_run', 'semor_down')
('ssa_5', 'ar_4', 'fft_5', 'semor_run', 'semor_up')
{'ssa_

[Parallel(n_jobs=-2)]: Done 3186 tasks      | elapsed:  7.5min


('ssa_5', 'ssa_10', 'ar_2', 'ar_4', 'fft_2', 'fft_5')
{'ssa_5': array([0, 1, 2, 3, 4]), 'ssa_10': array([ 5,  6,  7,  8,  9, 10, 11, 12, 13, 14]), 'ar_2': array([15, 16, 17]), 'ar_4': array([18, 19, 20, 21, 22]), 'fft_2': array([23, 24]), 'fft_5': array([25, 26, 27, 28, 29]), 'semor_run': array([30, 31, 32, 33, 34]), 'semor_walk': array([35, 36, 37, 38, 39]), 'semor_up': array([40, 41, 42, 43, 44]), 'semor_down': array([45, 46, 47, 48, 49])}
('ssa_5', 'ssa_10', 'ar_2', 'ar_4', 'fft_2', 'fft_5')
{'ssa_5': array([0, 1, 2, 3, 4]), 'ssa_10': array([ 5,  6,  7,  8,  9, 10, 11, 12, 13, 14]), 'ar_2': array([15, 16, 17]), 'ar_4': array([18, 19, 20, 21, 22]), 'fft_2': array([23, 24]), 'fft_5': array([25, 26, 27, 28, 29]), 'semor_run': array([30, 31, 32, 33, 34]), 'semor_walk': array([35, 36, 37, 38, 39]), 'semor_up': array([40, 41, 42, 43, 44]), 'semor_down': array([45, 46, 47, 48, 49])}
('ssa_5', 'ssa_10', 'ar_2', 'ar_4', 'fft_2', 'semor_walk')
{'ssa_5': array([0, 1, 2, 3, 4]), 'ssa_10': array