In [11]:
from keras.models import Sequential
from keras.layers import MaxPooling1D, Conv1D, Embedding, LSTM, Flatten, Dense, Dropout

import pandas as pd
import os
import numpy as np
import re

Using TensorFlow backend.


In [5]:
def normalize_signal(signal):
    """
    time_series
    
    Returns normalized TS: (TS - mean(TS))/std(TS)
    """
    std = np.std(signal)
    norm_signal = (signal - np.mean(signal)) / std if std else 1
    return norm_signal

In [6]:
def normalize_all_signals_in_ds(one_group_ds):
    for signal in one_group_ds.drop('t', axis=1):
        one_group_ds[signal] = normalize_signal(one_group_ds[signal])

In [7]:
def append_signals_dataset(data_directory, signals_dataset = None, labels = None):
    data_dir_list = os.listdir(data_directory)
    os.chdir(data_directory)
    signal_id = 0
    if not labels:
        labels = pd.DataFrame(columns=['id', 'class', 'label'])
    for signal_label, directory in enumerate(data_dir_list):
        if os.path.isfile(directory) or directory[0] == '.':
                continue
        os.chdir(directory)
        for filename in os.listdir(os.curdir):
            if re.match(r'(.*csv)', filename):
                if (signals_dataset) is not None:
                    tmp_df = pd.read_csv(filename)
                    normalize_all_signals_in_ds(tmp_df)
                    tmp_df['id'] = signal_id
                    signals_dataset = signals_dataset.append(tmp_df, ignore_index=True)
                else:
                    signals_dataset = pd.read_csv(filename)
                    normalize_all_signals_in_ds(signals_dataset)
                    signals_dataset['id'] = 0
#                 print(signals_dataset.head(5))
                tmp_labels = pd.Series([signal_id, directory, np.int(signal_label)],
                                         index=labels.columns)
                labels = labels.append(tmp_labels, ignore_index=True)
                signal_id += 1
        os.chdir('../')
    os.chdir('../..')
    return signals_dataset, labels

In [12]:
def normalize_labels(labels_df):
    """
    Разметим все классы числами от 0 до количества классов
    и преобразуем новую колонку в формат чисел (float)
    """
    unique_classes = np.unique(labels_df['class'])
    labels_df['label'] = labels['class'].map(dict(zip(unique_classes, range(len(unique_classes)))))
    labels_df.label = pd.to_numeric(labels.label)
    labels_df.id = pd.to_numeric(labels_df.id)

In [13]:
# os.chdir('..')
os.getcwd()

'/home/poligon/master_degree'

In [14]:
dataset, labels = append_signals_dataset('./data/gen_1_25')

In [15]:
dataset_t = dataset.T

In [16]:
dataset_t

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,7427,7428,7429,7430,7431,7432,7433,7434,7435,7436
t,0.01,0.02,0.03,0.04,0.05,0.06,0.07,0.08,0.09,0.1,...,1.91,1.92,1.93,1.94,1.95,1.96,1.97,1.98,1.99,2.0
U,1.110467,1.110467,1.110467,1.110467,1.110467,1.110467,1.110467,1.110467,1.110467,1.110467,...,-1.457484,-1.379632,-1.290658,-1.193343,-1.084907,-0.962568,-0.831888,-0.690086,-0.539943,-0.378678
U.1,1.632743,1.632743,1.632743,1.632743,1.632743,1.632743,1.632743,1.632743,1.632743,1.632743,...,-1.031605,-0.926908,-0.796037,-0.69134,-0.560468,-0.403423,-0.246377,-0.089331,0.093889,0.277109
Pij,-2.795848,-2.795848,-2.795848,-2.795848,-2.795848,-2.795848,-2.795848,-2.795848,-2.795848,-2.795848,...,-2.632471,-2.734597,-2.834281,-2.930412,-3.021881,-3.107356,-3.185394,-3.254551,-3.313162,-3.359452
Pg,-0.256433,-0.256433,-0.256433,-0.256433,-0.256433,-0.256433,-0.256433,-0.256433,-0.256433,-0.256433,...,-2.720235,-2.835952,-2.95017,-3.061966,-3.169954,-3.273096,-3.369662,-3.458268,-3.536951,-3.604097
Pgt,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
delta,-1.499264,-1.499264,-1.499264,-1.499264,-1.499264,-1.499264,-1.499264,-1.499264,-1.499264,-1.499264,...,2.243746,2.323213,2.405662,2.491094,2.579721,2.671331,2.766137,2.864352,2.965975,3.071007
id,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,36.0,36.0,36.0,36.0,36.0,36.0,36.0,36.0,36.0,36.0


In [17]:
pd.to_numeric(np.unique(dataset.t))

array([0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1 , 0.11,
       0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19, 0.2 , 0.21, 0.22,
       0.23, 0.24, 0.25, 0.26, 0.27, 0.28, 0.29, 0.3 , 0.31, 0.32, 0.33,
       0.34, 0.35, 0.36, 0.37, 0.38, 0.39, 0.4 , 0.41, 0.42, 0.43, 0.44,
       0.45, 0.46, 0.47, 0.48, 0.49, 0.5 , 0.51, 0.52, 0.53, 0.54, 0.55,
       0.56, 0.57, 0.58, 0.59, 0.6 , 0.61, 0.62, 0.63, 0.64, 0.65, 0.66,
       0.67, 0.68, 0.69, 0.7 , 0.71, 0.72, 0.73, 0.74, 0.75, 0.76, 0.77,
       0.78, 0.79, 0.8 , 0.81, 0.82, 0.83, 0.84, 0.85, 0.86, 0.87, 0.88,
       0.89, 0.9 , 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97, 0.98, 0.99,
       1.  , 1.01, 1.02, 1.03, 1.04, 1.05, 1.06, 1.07, 1.08, 1.09, 1.1 ,
       1.11, 1.12, 1.13, 1.14, 1.15, 1.16, 1.17, 1.18, 1.19, 1.2 , 1.21,
       1.22, 1.23, 1.24, 1.25, 1.26, 1.27, 1.28, 1.29, 1.3 , 1.31, 1.32,
       1.33, 1.34, 1.35, 1.36, 1.37, 1.38, 1.39, 1.4 , 1.41, 1.42, 1.43,
       1.44, 1.45, 1.46, 1.47, 1.48, 1.49, 1.5 , 1.

In [18]:
pd.to_numeric(np.unique(dataset.t) * 100, downcast='unsigned').reshape(-1)

array([  1.,   2.,   3.,   4.,   5.,   6.,   7.,   8.,   9.,  10.,  11.,
        12.,  13.,  14.,  15.,  16.,  17.,  18.,  19.,  20.,  21.,  22.,
        23.,  24.,  25.,  26.,  27.,  28.,  29.,  30.,  31.,  32.,  33.,
        34.,  35.,  36.,  37.,  38.,  39.,  40.,  41.,  42.,  43.,  44.,
        45.,  46.,  47.,  48.,  49.,  50.,  51.,  52.,  53.,  54.,  55.,
        56.,  57.,  58.,  59.,  60.,  61.,  62.,  63.,  64.,  65.,  66.,
        67.,  68.,  69.,  70.,  71.,  72.,  73.,  74.,  75.,  76.,  77.,
        78.,  79.,  80.,  81.,  82.,  83.,  84.,  85.,  86.,  87.,  88.,
        89.,  90.,  91.,  92.,  93.,  94.,  95.,  96.,  97.,  98.,  99.,
       100., 101., 102., 103., 104., 105., 106., 107., 108., 109., 110.,
       111., 112., 113., 114., 115., 116., 117., 118., 119., 120., 121.,
       122., 123., 124., 125., 126., 127., 128., 129., 130., 131., 132.,
       133., 134., 135., 136., 137., 138., 139., 140., 141., 142., 143.,
       144., 145., 146., 147., 148., 149., 150., 15

In [25]:
cols = np.append(['id'], np.arange(0,201))
U_signals = pd.DataFrame(columns=cols)
U_signals.head()

Unnamed: 0,id,0,1,2,3,4,5,6,7,8,...,191,192,193,194,195,196,197,198,199,200


In [20]:
signal_len = 201
for i, id in enumerate(np.unique(dataset.id)):
    
    temp = np.array(dataset[dataset['id'] == id].loc[:,['U']].T)
    U_signals = U_signals.append(pd.Series(temp[0]), ignore_index=True)

In [21]:
U_signals.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,191,192,193,194,195,196,197,198,199,200
0,1.110467,1.110467,1.110467,1.110467,1.110467,1.110467,1.110467,1.110467,1.110467,1.110467,...,-0.554819,-0.527064,-0.49931,-0.471555,-0.434548,-0.406794,-0.379039,-0.342033,-0.314278,-0.286523
1,1.112579,1.112579,1.112579,1.112579,1.112579,1.112579,1.112579,1.112579,1.112579,1.112579,...,-0.686156,-0.660337,-0.634518,-0.608699,-0.582879,-0.55706,-0.531241,-0.496816,-0.470996,-0.445177
2,1.066873,1.066873,1.066873,1.066873,1.066873,1.066873,1.066873,1.066873,1.066873,1.066873,...,-1.401751,-1.407141,-1.412531,-1.417921,-1.423311,-1.428701,-1.434091,-1.434091,-1.439481,-1.444871
3,1.11175,1.11175,1.11175,1.11175,1.11175,1.11175,1.11175,1.11175,1.11175,1.11175,...,-0.818908,-0.794974,-0.771041,-0.755085,-0.731151,-0.707217,-0.683283,-0.65935,-0.635416,-0.611482
4,1.107607,1.107607,1.107607,1.107607,1.107607,1.107607,1.107607,1.107607,1.107607,1.107607,...,-0.49218,-0.463441,-0.434702,-0.396384,-0.367645,-0.338907,-0.300588,-0.27185,-0.243111,-0.204793


In [190]:
normalize_labels(labels)
labels

Unnamed: 0,id,class,label
0,0,le_8,0
1,1,le_8,0
2,2,le_8,0
3,3,le_8,0
4,4,le_8,0
5,5,le_8,0
6,6,le_8,0
7,7,le_8,0
8,8,le_8,0
9,9,le_8,0


In [194]:
U_ts_labeled = U_signals.merge(labels.label, right_on=labels.id, left_on=U_signals.index).drop('key_0', axis=1)

In [200]:
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn import metrics

In [201]:
X_train, X_test, y_train, y_test = train_test_split(U_signals, labels.label, test_size=0.2, random_state=42)

In [288]:
X_train.shape

(29, 201)

In [206]:
TIME_PERIODS = 201

In [291]:
X_train_3d = np.array(X_train).reshape(X_train.shape[0], X_train.shape[1], 1)

In [292]:
X_test_3d = np.array(X_test).reshape(X_test.shape[0], X_test.shape[1], 1)

In [381]:
model = Sequential()
model.add(Conv1D(32, 10, padding='same', activation='relu', input_shape=(TIME_PERIODS,1)))
model.add(MaxPooling1D(pool_size=10))
model.add(Conv1D(64, kernel_size=10, padding='same', activation='relu'))
model.add(MaxPooling1D(pool_size=10))
model.add(Flatten())
model.add(Dense(100, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [326]:
model = Sequential()
model.add(Dense(201, input_dim=201))
model.add(Dense(100, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(100, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='softmax'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy',])

In [382]:
model

<keras.engine.sequential.Sequential at 0x1a62be3190>

In [383]:
model.fit(X_train_3d, y_train, epochs=6)

Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6


<keras.callbacks.callbacks.History at 0x1a62bb0bd0>

In [384]:
model.metrics_names

['loss', 'accuracy']

In [385]:
model.evaluate(X_test_3d, y_test)



[0.5827293395996094, 0.875]

In [278]:
import keras.metrics

In [None]:
keras.metrics.accuracy