In [1]:
from __future__ import division

import sys
import os
import h5py
import glob
import tqdm
import time 
import pickle
import numpy as np
from math import sqrt
from matplotlib import image
import matplotlib.pyplot as plt
import multiprocessing as mp
from keras.models import load_model

# import python library
sys.path.append(os.path.join(os.getcwd().split(os.environ.get('USER'))[0],os.environ.get('USER'), 'wdml', 'py'))

from sample_detector import SampleDetector
from database import Database

class Detector(Database):
    # Attributes
    __train, __test = None, None
    __train_test_file = 'train_test.pickle'
    __dataset_location = None
    __result_location = None
    __site = None
    # Initializer
    def __init__(self, dataset_location, database_location, result_location, site):
        self.__dataset_location = dataset_location
        self.__result_location = result_location
        self.__site = site
        super().__init__(dataset_location, database_location, site)
        
####### MATCHED FILTER
    def evaluate_detector(self,args):
        sample = args[0]
        return sample.evaluate_detector(transforms=args[1], transforms_params=args[2], 
                                        detector=args[3], detector_params=args[4],
                                        diff_err=args[5], time_err=args[6],
                                        kernel=args[7],
                                        segmented=args[8])

    
    def generate_kernel(self, sample, whistler, whistler_params):
        if whistler=='sim':
            return sample.whistler_sim(decay=whistler_params[0], 
                                       whistler_time=whistler_params[1], 
                                       whistler_freq_len=whistler_params[2], 
                                       whistler_freq_start=whistler_params[3], 
                                       thickness=whistler_params[4],
                                       size=whistler_params[5],
                                       freq_slice=whistler_params[6])
        elif whistler=='mean':
            pass
    
    def detector_metric(self, train, transforms, transforms_params, detector, detector_params, diff_err, time_err,
                         whistler, whistler_params, segmented, save=False):
        # get files from either train or test
        files = self.get_train() if train else self.get_test()
        samples = np.array([SampleDetector(self.__dataset_location, self.__site, file) for file in files])#[int(len(files)*0.297):int(len(files)*0.299)]
        assert len(samples)>0, 'No samples'
        # generate kernel
        kernel = self.generate_kernel(samples[0],whistler, whistler_params)
        # create multiprocessing methods
        pool = mp.Pool(mp.cpu_count())
        params = [[sample, transforms, transforms_params, detector, detector_params, diff_err, time_err, kernel, segmented] for sample in samples]
        results = []
        for result in tqdm.tqdm(pool.imap_unordered(self.evaluate_detector, params), total=len(params)):
            results.append(result)
        results = np.array(results)
        pool.close()
        pool.join()
        if save:
            self.save_detector_metric(train, transforms, transforms_params, detector, detector_params, diff_err, time_err,
                         whistler, whistler_params, segmented, results)
        return results
    
    def save_detector_metric(self, train, transforms, transforms_params, detector, detector_params, diff_err, time_err,
                         whistler, whistler_params, segmented, results):
        #create parameters dictionary
        data = {
            'transforms': transforms,
            'transforms_params': transforms_params, 
            'detector': detector, 
            'detector_params': detector_params,
            'whistler': whistler,
            'whistler_params': whistler_params, 
            'diff_err': diff_err, 
            'time_err': time_err,
            'segmented': segmented,
            'results': results
        }
        path = os.path.join(self.__result_location,self.get_site())
        try:
            os.makedirs(path)
        except OSError:
            pass
        file_name = '_'.join([str(train),str(transforms),str(transforms_params),str(detector),
                     str(detector_params),str(diff_err),str(time_err),str(whistler),str(whistler_params),str(segmented)])
        file_name += '.result'
        pickle.dump(data, open(os.path.join(path,file_name), 'wb'))
    
    def load_detector_metric(self,train, transforms, transforms_params, detector, detector_params, diff_err, time_err,
                         whistler, whistler_params,segmented):
        file_name = '_'.join([str(train),str(transforms),str(transforms_params),str(detector),
                     str(detector_params),str(diff_err),str(time_err),str(whistler),str(whistler_params),str(segmented)])
        file_name += '.result'
        path = os.path.join(self.__result_location,self.get_site(),file_name)
        if not os.path.exists(path):
            raise Exception('%s does not exists.'%path)
        return pickle.load( open(path, "rb"))

######### MACHINE LEARNING
    def detector_metric_ml(self, train, transforms, transforms_params, input_shape, scaler, model,
                        diff_err, time_err,save=False):
        # get files from either train or test
        files = self.get_train() if train else self.get_test()
        samples = np.array([SampleDetector(self.__dataset_location, self.__site, file) for file in files])#[int(len(files)*0.297):int(len(files)*0.299)]
        assert len(samples)>0, 'No samples'
        # create multiprocessing methods
        results = []
        for sample in tqdm.tqdm(samples):
            result = sample.evaluate_detector_ml(transforms, transforms_params, input_shape, scaler, model, diff_err, time_err)
            results.append(result)
        results = np.array(results)
        if save:
            self.save_detector_metric_ml(train, transforms, transforms_params, input_shape, diff_err, time_err,results)
        return results
    
    def detector_metric_ml(self, train, transforms, transforms_params, input_shape, scaler, model,
                        diff_err, time_err,save=False):
        # get files from either train or test
        files = self.get_train() if train else self.get_test()
        samples = np.array([SampleDetector(self.__dataset_location, self.__site, file) for file in files])
        assert len(samples)>0, 'No samples'
        # create multiprocessing methods
        pool = mp.Pool(mp.cpu_count())
        params = [[sample, transforms, transforms_params, input_shape, scaler, model, diff_err, time_err] for sample in samples]
        results = []
        for result in tqdm.tqdm(pool.imap_unordered(self.evaluate_detector_ml, params), total=len(params)):
            results.append(result)
        results = np.array(results)
        pool.close()
        pool.join()
        if save:
            self.save_detector_metric_ml(train, transforms, transforms_params, input_shape, diff_err, time_err,results)
        return results
    
    def save_detector_metric_ml(self, train, transforms, transforms_params, input_shape, diff_err, time_err, results):
        #create parameters dictionary
        data = {
            'transforms': transforms,
            'transforms_params': transforms_params, 
            'input_shape': detector, 
            'diff_err': diff_err, 
            'time_err': time_err,
            'results': results
        }
        path = os.path.join(self.__result_location,self.get_site())
        try:
            os.makedirs(path)
        except OSError:
            pass
        file_name = '_'.join([str(train),str(transforms),str(transforms_params),str(input_shape),
                    str(diff_err),str(time_err)])
        file_name += '.result'
        pickle.dump(data, open(os.path.join(path,file_name), 'wb'))
    
    def load_detector_metric_ml(self,train, transforms, transforms_params, input_shape, diff_err, time_err):
        file_name = '_'.join([str(train),str(transforms),str(transforms_params),str(input_shape),
                              str(diff_err),str(time_err)])
        file_name += '.result'
        path = os.path.join(self.__result_location,self.get_site(),file_name)
        if not os.path.exists(path):
            raise Exception('%s does not exists.'%path)
        return pickle.load( open(path, "rb"))

    
####


    def load_all_detector_metric(self):
        files = glob.glob(os.path.join(self.__result_location,self.get_site(),'*.result'))
        metrics = []
        for file in files:
            results = pickle.load( open(file, "rb"))['results']
            metrics.append([file,self.results(results),self.performance(results)])
        return np.array(metrics)
        
    def results(self, results):
        results = np.array([r for r in results[:,2]])
        return np.array([results[:,0].sum(),results[:,1].sum(),results[:,2].sum()])
    
    def performance(self, results):
        results = np.array([r for r in results[:,2]])
        false_negative, true_positive, false_positive = results[:,0].sum(),results[:,1].sum(),results[:,2].sum()
        precision = true_positive/(true_positive + false_positive)
        recall = true_positive/(true_positive + false_negative)
        f1_score = 2*(precision*recall/(precision+recall))
        g_measure = sqrt((true_positive/(true_positive+false_positive))*(true_positive/(true_positive+false_negative)))
        return np.array([precision, recall, f1_score, g_measure],dtype=np.float)

Using TensorFlow backend.


In [None]:
# dataset_loc = os.path.join(os.getcwd().split(os.environ.get('USER'))[0],os.environ.get('USER'), 'wdml', 'data','datasets', 'awdEvents1')
# database_loc = os.path.join(os.getcwd().split(os.environ.get('USER'))[0],os.environ.get('USER'), 'wdml', 'data','databases', 'awdEvents1')
# result_loc = os.path.join(os.getcwd().split(os.environ.get('USER'))[0],os.environ.get('USER'), 'wdml', 'data','results', 'awdEvents1')
# site = 'marion'
# my_detector = Detector(dataset_loc, database_loc, result_loc, site)
# my_sample = SampleDetector(dataset_loc, site, np.random.choice(my_detector.get_train(),1)[0])
# scaler_path = os.path.join(database_loc,site,'scaler.pickle')
# model_path = os.path.join(database_loc,site,'model.h5')

# freq_slice = [2.5,10]
# train=True 
# transforms=['slice','zscore']
# transforms_params=[freq_slice,[None]]
# diff_err=0.3
# time_err=0.2
# # MF
# detector='tm_cfar'
# detector_params=[10,25,1e-6,10]
# whistler='sim'
# whistler_params=[3,0.7,7.5,2.5,1,25,freq_slice]
# segmented=False
# # ML
# input_shape = [48,108] # size at req_slice[2.5,10]
# scaler = pickle.load( open( scaler_path, "rb" ) )['scaler']
# model = load_model(model_path)

# # my_detector.detector_metric(train, transforms, transforms_params, detector, detector_params, diff_err, 
# #                             time_err, whistler, whistler_params, segmented, save=True)

# # my_detector.detector_metric_ml(train, transforms, transforms_params, input_shape, scaler, model, 
# #                             diff_err=diff_err, time_err=time_err)

In [31]:
# dataset_loc = os.path.join(os.getcwd().split(os.environ.get('USER'))[0],os.environ.get('USER'), 'wdml', 'data','datasets', 'awdEvents1')
# database_loc = os.path.join(os.getcwd().split(os.environ.get('USER'))[0],os.environ.get('USER'), 'wdml', 'data','databases', 'awdEvents1')
# result_loc = os.path.join(os.getcwd().split(os.environ.get('USER'))[0],os.environ.get('USER'), 'wdml', 'data','results', 'awdEvents1')
# site = 'marion'
# my_detector = Detector(dataset_loc, database_loc, result_loc, site)

# freq_slice = [2.5,10]
# train=True 
# transforms=['slice','zscore']
# transforms_params=[freq_slice,[None]]
# detector='tm_cfar'
# detector_params=[10,25,1e-6,7]
# diff_err=0.3
# time_err=0.2
# whistler='sim'
# whistler_params=[3,0.7,7.5,2.5,1,25,freq_slice]
# segmented=False

# # for decay in [3,3.25,3.5,3.75,4]:
# #     for whistler_time in [0.5,0.6,0.7,0.8,0.9]:
# #             for N in [10,15,20,25,30]:
# #                 for G in [10,15,20,25,30]:
# # for pfa in [1e-1,1e-2,1e-3,1e-4,1e-5,1e-6]:
# # for pfa in [1e-2, 1e-3,1e-4,1e-5,1e-6]:
# #     for C in [6,7,8,9,10]:
# # for size in [10,25,40,50]:
# #     whistler_params=[3,0.7,7.5,2.5,1,size,freq_slice]
# #     my_detector.detector_metric(train, transforms, transforms_params, detector, detector_params, diff_err, 
# #                                                         time_err, whistler, whistler_params, segmented, save=True)
            
            
# # for pfa in [1e-1,1e-2,1e-3,1e-4,1e-5,1e-6]:
# #     for C in [0,3,5]:

# # for w in [0.5,0.65,0.8,1,1.5]:
    
# #     data = my_detector.load_detector_metric(train=True, transforms=['zscore'], transforms_params=[[None]], detector='tm_cfar', 
# #                                 detector_params=[10,15,1e-3,0], diff_err=0.3, time_err=0.2, whistler='sim', whistler_params=[w,0.8,7.5,2.5,1,90])
# #     results = data['results']
# #     print(my_detector.results(results))

In [60]:
# dataset_loc = os.path.join(os.getcwd().split(os.environ.get('USER'))[0],os.environ.get('USER'), 'wdml', 'data','datasets', 'awdEvents1')
# database_loc = os.path.join(os.getcwd().split(os.environ.get('USER'))[0],os.environ.get('USER'), 'wdml', 'data','databases', 'awdEvents1')
# result_loc = os.path.join(os.getcwd().split(os.environ.get('USER'))[0],os.environ.get('USER'), 'wdml', 'data','results', 'awdEvents1')
# site = 'marion'
# my_detector = Detector(dataset_loc, database_loc, result_loc, site)
# results = my_detector.load_all_detector_metric()

In [89]:
# results
# arr = results[:,2]
# arr = np.array([r for r in arr])
# ix = np.argwhere(arr[:,3]==arr[:,3].max())
# results[ix]

array([[["/home/othniel/wdml/data/results/awdEvents1/marion/True_['slice', 'zscore']_[[2.5, 10], [None]]_tm_cfar_[10, 25, 1e-06, 10]_0.3_0.2_sim_[3, 0.7, 7.5, 2.5, 1, 25, [2.5, 10]]_False.result",
         array([ 783, 2294,  513]),
         array([0.81724261, 0.74553136, 0.77974167, 0.7805639 ])]]],
      dtype=object)

In [32]:
# for size in [10,25,40,50]:
#     whistler_params=[3,0.7,7.5,2.5,1,size,freq_slice]
#     data = my_detector.load_detector_metric(train, transforms, transforms_params, detector, 
#                         detector_params, diff_err, time_err, whistler, whistler_params, segmented)
#     results = data['results']
#     print((size),my_detector.results(results),my_detector.performance(results))

10 (1543, 1534, 100) (0.9388004895960832, 0.49853753656158595, 0.651241774570155, 0.684125195710582)
25 (936, 2141, 377) (0.8502779984114377, 0.6958076048098798, 0.7653261840929403, 0.7691748159535654)
40 (811, 2266, 590) (0.7934173669467787, 0.7364315892102697, 0.7638631383785606, 0.7643936240234112)
50 (768, 2309, 676) (0.773534338358459, 0.7504062398440039, 0.7617947871989442, 0.7618825330966649)
