In [1]:
import os
from keras.layers import Input, Dense, Conv1D, Flatten, MaxPooling1D, Dropout
from keras.models import Model, Sequential
from keras.regularizers import l1_l2
import h5py
import numpy as np
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, Normalizer, StandardScaler
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix
from sklearn.utils.multiclass import unique_labels
import uuid
import random
import json
  

def network(input_window_size=60, filter_number=32, conv_window=(3,), pooling_window=(2,), dropout_rate=[],
            activation='relu', dense_activation='softmax', optimizer='adam', loss='categorical_crossentropy', layers=1,
            l1_value=0.0001, l2_value=0.0001):
    model = Sequential()
    
    # Input Layer
    model.add(Conv1D(filter_number, conv_window, activation=activation, padding='same', input_shape=(input_window_size, 1),
                     activity_regularizer=l1_l2(l1=l1_value, l2=l2_value)))
    model.add(MaxPooling1D(pooling_window, padding='same'))
    model.add(Dropout(dropout_rate[0]))

    
    # Hidden Layers
    current_filter = 1
    filter_number_temp = filter_number
    for i in range(layers):
        filter_number_temp = filter_number_temp *  2
        model.add(Conv1D(filter_number_temp, conv_window, activation=activation, padding='same', 
                         activity_regularizer=l1_l2(l1=l1_value, l2=l2_value)))
        model.add(MaxPooling1D(pooling_window, padding='same'))
        model.add(Dropout(dropout_rate[current_filter]))
        current_filter = current_filter + 1
    
    # Output Layer
    model.add(Flatten())
    model.add(Dense(2, activation=dense_activation))

    model.compile(optimizer=optimizer[0], loss=loss, metrics=['accuracy', 'categorical_accuracy'])

    return model

Using TensorFlow backend.


In [2]:
import h5py

def rolling_window(a, window, step_size, padding=True, copy=True):
    if copy:
        result = a.copy()
    else:
        result = a
    if padding:
        result = np.hstack((result, np.zeros(window)))
    shape = result.shape[:-1] + (result.shape[-1] - window + 1 - step_size, window)
    strides = result.strides + (result.strides[-1] * step_size,)
    return np.lib.stride_tricks.as_strided(result, shape=shape, strides=strides)


def load_data(size, base_path, output_path, scaler, normalizer=False):
    output_path = output_path.format(size, scaler, normalizer)
    total_files = len(os.listdir(base_path))
    x_dataset = np.zeros(shape=(0, size))
    y_dataset = np.zeros(shape=(0, 1))
    for file_path in os.listdir(base_path):
        # Cargar datos
        h5f = h5py.File(os.path.join(base_path, file_path), 'r')
        x = h5f['normal'][:]
        y = rolling_window(h5f['transformed'][:], size, 1)

        #Escalar datos antes de hacer el window
        if scaler != 'none': 
            if scaler == 'min_max_scaler':
                scaler = MinMaxScaler()
            elif scaler == 'standard_scaler':
                scaler = StandardScaler()
            elif scaler == 'min_max_scaler_1':
                scaler = MinMaxScaler(feature_range=(-1, 1))
            scaler = scaler.fit(x.reshape(-1, 1))
            x = scaler.transform(x.reshape(1, -1))[0]
        if normalizer:
            normalizer = Normalizer().fit(x.reshape(-1, 1))
            x = normalizer.transform(x.reshape(1, -1))[0]

        x = rolling_window(x, size, 1)

        # Determina si P esta en el window o no
        y = [[np.amax(array)] for array in y]

        x_dataset = np.vstack((x_dataset, x))
        y_dataset = np.vstack((y_dataset, y))

    x_dataset = np.reshape(x_dataset, (len(x_dataset), size, 1))
    y_dataset = np.reshape(y_dataset, (len(y_dataset), 1))

    # Balancear datos:
    # n = tamaño del que haya menos labels 
    n = min(len(np.where(y_dataset == 1)[0]), len(np.where(y_dataset == 0)[0]))

    # Elije n muestras aleatorias de cada label
    mask = np.hstack([np.random.choice(np.where(y_dataset == l)[0], n, replace=False)
                        for l in np.unique(y_dataset)])
    x_dataset = x_dataset[mask]
    y_dataset = y_dataset[mask]

    # crea un one-hot-encoding vector de y_dataset
    y_dataset = to_categorical(y_dataset)

    dataset = h5py.File(output_path)
    dataset.create_dataset('x_dataset', data=x_dataset)
    dataset.create_dataset('y_dataset', data=y_dataset)
    dataset.close()

    print("Done creating data")



In [4]:
data_path = '/home/irene/Documents/Datos-Tesis/Training_window_set_1'
output_path = '/home/irene/Documents/Datos-Tesis/window_output/{}_{}_{}.h5'

for size in [60, 120]:
    for scaler in ['none']:
        for normalizer in [True, False]:
            load_data(size, data_path, output_path, scaler, normalizer)

Done creating data
Done creating data
Done creating data
Done creating data
