In [58]:
import functools
import importlib
import pickle

import keras
import numpy as np
import matplotlib.pyplot as plt
import sklearn.linear_model
import sklearn.pipeline
import sklearn.preprocessing
import sklearn.metrics
import sklearn.model_selection
import sklearn.multiclass
import tqdm

import lapprox.misc.utils as utils
import lapprox.models.ssa as ssa
import lapprox.models.ar as ar
import lapprox.models.arma as arma
import lapprox.models.fft as fft
import lapprox.models.semor as semor
import lapprox.joint.joint_mapping as joint_mapping
import lapprox.segments.normalize as normalize
import lapprox.misc.namespace_cutter as namespace_cutter

importlib.reload(joint_mapping)

%matplotlib inline

In [60]:
with open("data/semor_profiles.pickle", "rb") as f:
    semor_profiles = pickle.load(f)

for key, profile in semor_profiles.items():
    semor_profiles[key] = normalize.shrink_segment(profile, 20)

with open("data/segments.pickle", "rb") as f:
    segments_dict = pickle.load(f) 

## Подготавливаем данные

In [61]:
action_and_segment = []

for key, segments_array in segments_dict.items():
    action_and_segment.extend([(key, s) for s in segments_array if s.size > 30])

segments = [pair[1] for pair in action_and_segment]
actions = [pair[0] for pair in action_and_segment]

In [62]:
normalized_segments = normalize.normalize_segments(segments)
action_and_norm_segment = [(pair[0][0], pair[1])
                           for pair in zip(action_and_segment, normalized_segments)]
le = sklearn.preprocessing.LabelEncoder()
Y = le.fit_transform(actions)
X = np.array(normalized_segments)

In [73]:
models = {
    "ssa_5": ssa.Ssa(5),
    "ssa_10": ssa.Ssa(10),
    "ar_2": ar.Ar(2),
    "ar_4": ar.Ar(4),
#     "arma_2_2": arma.Arma(2, 2),
    "fft_2": fft.Fft(2),
    "fft_5": fft.Fft(5),
    "semor_run": semor.Semor(semor_profiles['run']),
    "semor_walk": semor.Semor(semor_profiles['walk']),
    "semor_up": semor.Semor(semor_profiles['up']),
    "semor_down": semor.Semor(semor_profiles['down'])
}
mapper = joint_mapping.JointMapping(models)
X_intermediate = mapper.fit_transform(X)

In [99]:
def grid_search_using_mapping(X_intermediate, Y, mapper, clf, clf_grid, cv=5, n_jobs=-1):
    cutter = namespace_cutter.CutNamespacesTransformer(namespaces=[], mapping=mapper)

    pipeline = sklearn.pipeline.Pipeline([
        ('namespace_cutter', cutter),
        ('clf', clf)
    ])
    grid = dict({'namespace_cutter__namespaces': utils.all_combinations(list(models))}, **clf_grid)
    gs = sklearn.model_selection.GridSearchCV(pipeline, grid, scoring='neg_log_loss', cv=cv, n_jobs=n_jobs, verbose=1,
                                              return_train_score=True)
    gs.fit(X_intermediate, Y)

gss_clojure = functools.partial(grid_search_using_mapping, X_intermediate, Y, mapper)

grids = {}
clfs = {}
grid_search_results = {}

# Logistic regression, l2-regularization.
clfs['lr_l2'] = sklearn.linear_model.LogisticRegressionCV(multi_class='multinomial',
                                              scoring='neg_log_loss')
grids['lr_l2'] = {}
# Logistic regression, l1-regularization.
clfs['lr_l1'] = sklearn.linear_model.LogisticRegressionCV(multi_class='multinomial',
                                              scoring='neg_log_loss')
grids['lr_l1'] = {}

# Elastic-net
clfs['en'] = sklearn.linear_model.ElasticNetCV()
grids['en'] = {}

#                                                )
# clfs['en'] = sklearn.multiclass.OneVsRestClassifier(
#     sklearn.linear_model.SGDClassifier(loss='log', penalty='elasticnet')

# grids['en'] = {
#     "clf__estimator__l1_ratio": [.1, .5, .7, .9, .95, .99, 1],
#     'clf__estimator__alpha': [10 ** x for x in range(-6, 1)]
# }

In [101]:
# gss_clojure(clfs['en'], grids['en'], n_jobs=1)

In [None]:
grid_search_results['lr_l2'] = gss_clojure(clfs['lr_l2'], grids['lr_l2'], n_jobs=1)
grid_search_results['lr_l1'] = gss_clojure(clfs['lr_l1'], grids['lr_l1'], n_jobs=1)

# Нейронная сеть

In [51]:
def transform_to_inputs(Y, n_classes):
    Y_binary = np.zeros((Y.shape[0], n_classes))
    for i in range(n_classes):
        Y_binary[:, i] = (Y == i)
    
    return Y_binary

def test_neural_network(X, Y, units, cv=15):
    n_classes = len(set(Y))
    kfold = sklearn.model_selection.StratifiedKFold(n_splits=cv, random_state=1)
    
    scores = []
    for train_index, test_index in kfold.split(X, Y):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = (transform_to_inputs(Y[train_index], n_classes),
                           transform_to_inputs(Y[test_index], n_classes))
        
        model = keras.models.Sequential()
        model.add(keras.layers.Dense(units, activation='relu', input_dim=X_train.shape[1]))
        model.add(keras.layers.Dense(n_classes, activation='sigmoid'))
        model.compile(loss='categorical_crossentropy',
                      optimizer='rmsprop')
        model.fit(X_train, y_train, epochs=20, verbose=0)
        scores.append(model.evaluate(X_test, y_test, verbose=0))
    
    return np.array(scores)

In [102]:
scores = {}
for i in tqdm.trange(5, 100, 5):
    scores[i] = test_neural_network(X, Y, i)


  0%|          | 0/19 [00:00<?, ?it/s][A
[A

KeyboardInterrupt: 