## Projeto Sonar
### Análise de estacionaridade para diferentes janelamentos. 
#### Dataset: 4classes
#### Autor: Pedro Henrique Braga Lisboa (pedrohblisboa@gmail.com)
#### Laboratorio de Processamento de Sinais - UFRJ

In [2]:
import sys
import os
import joblib
import numpy as np
import pandas as pd
#sys.path.extend(['/home/pedrolisboa/Workspace/lps/LpsToolbox'])
from itertools import starmap
from Functions.DataHandler import LofarDataset
from Functions.ConvolutionalNeuralNetworks import MLPClassifier
from sklearn.model_selection import StratifiedKFold, cross_validate
from sklearn.preprocessing import StandardScaler
from keras.utils import to_categorical
from itertools import repeat
datapath = os.getenv('OUTPUTDATAPATH')
audiodatapath = os.getenv('INPUTDATAPATH')
results_path = os.getenv('PACKAGE_NAME')
database = '4classes'

Using TensorFlow backend.


In [3]:
# Load LOFAR data
#dataobj = LofarDataset(data_path=datapath)
def factor2(num):
    if num % 2 != 0:
        return [num // 2]
    factors = [num//2]
    factors.extend(factor2(num//2))
    return factors

#window_list = map(lambda e: pow(2,e), range(7,13,1))
#overlap_list = map(factor2, window_list)
window_list = [1024]
overlap_list = [0]
decimation_rate = 3
spectrum_bins_left = 400

In [4]:
lofar = LofarDataset(datapath)
X, y, class_labels = lofar.loadData(database, window_list[0], overlap_list[0], decimation_rate, spectrum_bins_left)
if np.argwhere(np.isnan(X)).shape[0] > 0:
    X = np.concatenate([X[:426443], X[426444:]], axis=0)
np.unique(X)
# np.argwhere(np.isnan(X))

array([-0.19999996, -0.19999994, -0.19999994, ...,  1.68971947,
        1.69386689,  1.70014497])

In [5]:
def lofar_iter(estimator, fold_fun, dataset_obj, train_fun, verbose):
    results = {'window': [],
               'overlap': [],
               'fold': [],
               'scores': []}
    for window, overlap in zip(window_list, overlap_list):
        if verbose:
            print('Window: %i  Overlap: %i' % (window, overlap))
        X, y, class_labels = lofar.loadData(database, window, overlap, decimation_rate, spectrum_bins_left)
        cvo_file = os.path.join(results_path, 
                                'db_%s_window_%i_overlap_%i_dec_%i_bins_%i_skf.jbl' % (database, 
                                                                                     window, 
                                                                                     overlap, 
                                                                                     decimation_rate, 
                                                                                     spectrum_bins_left))
        if os.path.exists(cvo_file):
            if verbose:
                print('\tLoading cross validation configuration')
            cvo = joblib.load(cvo_file)
        else:
            if verbose:
                print('\tCreating cross validation configuration')
            cvo = list(skf.split(X, y))
            joblib.dump(cvo, cvo_file)
        cachedir = cvo_file[:-4]
        partial_results = train_fun(X,y, cvo, estimator, verbose, cachedir)
        
        results['window'] = list(repeat(window, len(partial_results['scores'])))
        results['overlap'] = list(repeat(overlap, len(partial_results['scores'])))
        for key in partial_results:
            results[key].extend(partial_results[key])
            
        return results
        
        


In [6]:
from Functions.NpUtils.Scores import spIndex, recall_score
from sklearn.metrics import make_scorer
scoring = {'sp': spIndex}
scaler = StandardScaler()
import ipyparallel as ipp
import dill
# c = ipp.Client(profile='ssh', sshserver='pedro.lisboa@ferney.lps.ufrj.br')
# c[:].use_dill()
# dview = c[:]
def novelty_detectionCV(X, y, cvo, estimator, verbose, cachedir):
    scores = list()
    fold = list()
    def train_fold(data):
        i_fold, train, test = data
        if verbose:
            print('\t\t Fold %i' % i_fold)
        X_train = X[train]
        y_train = y[train]

        X_test = X[test]
        y_test = y[test]
        scaler.fit(X_train, y_train)
        X_train = scaler.transform(X_train)
        X_test = scaler.transform(X_test)

        y_train = to_categorical(y_train)
        y_test = to_categorical(y_test)

        estimator.cachedir = os.path.join(cachedir, '%i_fold' % i_fold)
        estimator.fit(X_train, y_train,
                      validation_split=0.2,
                      n_inits=10,
                      verbose=verbose)
        score = estimator.score(X_test, y_test)
        return (i_fold, score)
#         scores.append(score)
#         fold.append(i_fold)
    results = map(train_fold, [(i_fold, train, test) for i_fold, (train, test) in enumerate(cvo)])
    #results = dview.map_sync(train_fold, [(i_fold, train, test) for i_fold, (train, test) in enumerate(cvo)])
    fold,scores = map(list,zip(*results))
    return {'fold': fold,
            'scores': scores}
            
    

In [None]:
import time
verbose = 1
lofar = LofarDataset(datapath)
skf = StratifiedKFold(n_splits=10)
estimator = MLPClassifier(layer_sizes=(10,4),
                          activations=('relu', 'tanh'),
                          input_shape=(400,),
                          solver="sgd",
                          loss="mean_squared_error",
                          epochs=100)
start = time.time()
results = lofar_iter(estimator, skf, lofar, novelty_detectionCV, verbose)
stop = time.time()

print stop - start

Window: 1024  Overlap: 0
	Loading cross validation configuration
		 Fold 0
{0: 0.8895663374838987, 1: 0.39214869775893396, 2: 1.0, 3: 0.48442760942760943}
[{'verbose': 0, 'filepath': '/home/pedrolisboa/Workspace/lps/SonarAnalysis/Results/StationarityAnalysis/db_4classes_window_1024_overlap_0_dec_3_bins_400_skf/0_fold/d8c968eccdba084377d104a005ef26fe1766e791311b569c2590034a8c9f5c9fc15404707dc34953348e83097d63d42f161cc0df8c60b80ea824ea6e4fe3b598_400/end_weights', 'period': 1, 'save_weights_only': False, 'mode': 'auto', 'type': 'ModelCheckpoint', 'monitor': 'val_loss'}, {'save_best_only': True, 'monitor': 'val_loss', 'filepath': '/home/pedrolisboa/Workspace/lps/SonarAnalysis/Results/StationarityAnalysis/db_4classes_window_1024_overlap_0_dec_3_bins_400_skf/0_fold/d8c968eccdba084377d104a005ef26fe1766e791311b569c2590034a8c9f5c9fc15404707dc34953348e83097d63d42f161cc0df8c60b80ea824ea6e4fe3b598_400/best_weights', 'mode': 'auto', 'save_weights_only': False, 'type': 'ModelCheckpoint', 'period': 1

In [None]:
import pandas as pd
pd.DataFrame(results)
# results['window'] = list(results['window'])
# results['overlap'] = list(results['overlap'])

In [3]:
import ipyparallel as ipp
c = ipp.Client(profile='ssh', sshserver='pedro.lisboa@ferney.lps.ufrj.br')

SSHException: The authenticity of the host can't be established.

In [13]:
c.ids

[0, 1, 2, 3, 4, 5, 6, 7]

In [None]:
c[:].map_sync()