In [1]:
from __future__ import division

import sys
import os
import h5py
import glob
import tqdm
import time 
import pickle
import numpy as np
from matplotlib import image
import matplotlib.pyplot as plt
import multiprocessing as mp

# import python library
sys.path.append(os.path.join(os.getcwd().split(os.environ.get('USER'))[0],os.environ.get('USER'), 'wdml', 'py'))

from sample_detector import SampleDetector
from database import Database

class Detector(Database):
    # Attributes
    __train, __test = None, None
    __train_test_file = 'train_test.pickle'
    __dataset_location = None
    __result_location = None
    __site = None
    # Initializer
    def __init__(self, dataset_location, database_location, result_location, site):
        self.__dataset_location = dataset_location
        self.__result_location = result_location
        self.__site = site
        super().__init__(dataset_location, database_location, site)
    
    def evaluate_detector(self,args):
        sample = args[0]
        return sample.evaluate_detector(transforms=args[1], transforms_params=args[2], 
                                        detector=args[3], detector_params=args[4],
                                        diff_err=args[5], time_err=args[6],
                                        kernel=args[7])
    
    def generate_kernel(self, sample, whistler, whislter_params):
        if whistler=='sim':
            return sample.whistler_sim(decay=whislter_params[0], 
                                       whistler_time=whislter_params[1], 
                                       whistler_freq_len=whislter_params[2], 
                                       whistler_freq_start=whislter_params[3], 
                                       thickness=whislter_params[4],
                                       size=whislter_params[5])
    
    def detector_metric(self, train, transforms, transforms_params, detector, detector_params, diff_err, time_err,
                         whistler, whistler_params, save=False):
        # get files from either train or test
        files = self.get_train() if train else self.get_test()
        samples = np.array([SampleDetector(self.__dataset_location, self.__site, file) for file in files])#[int(len(files)*0.297):int(len(files)*0.299)]
        assert len(samples)>0, 'No samples'
        # generate kernel
        kernel = self.generate_kernel(samples[0],whistler, whistler_params)
        # create multiprocessing methods
        pool = mp.Pool(mp.cpu_count())
        params = [[sample, transforms, transforms_params, detector, detector_params, diff_err, time_err, kernel] for sample in samples]
        results = []
        for result in tqdm.tqdm(pool.imap_unordered(self.evaluate_detector, params), total=len(params)):
            results.append(result)
        results = np.array(results)
        pool.close()
        pool.join()
        if save:
            self.save_detector_metric(train, transforms, transforms_params, detector, detector_params, diff_err, time_err,
                         whistler, whistler_params, results)
        return results
    
    def save_detector_metric(self, train, transforms, transforms_params, detector, detector_params, diff_err, time_err,
                         whistler, whistler_params, results):
        #create parameters dictionary
        data = {
            'transforms': transforms,
            'transforms_params': transforms_params, 
            'detector': detector, 
            'detector_params': detector_params,
            'whistler': whistler,
            'whistler_params': whistler_params, 
            'diff_err': diff_err, 
            'time_err': time_err,
            'results': results
        }
        path = os.path.join(self.__result_location,self.get_site())
        try:
            os.makedirs(path)
        except OSError:
            pass
        file_name = '_'.join([str(train),str(transforms),str(transforms_params),str(detector),
                     str(detector_params),str(diff_err),str(time_err),str(whistler),str(whistler_params)])
        file_name += '.result'
        pickle.dump(data, open(os.path.join(path,file_name), 'wb'))
    
    def load_detector_metric(self,train, transforms, transforms_params, detector, detector_params, diff_err, time_err,
                         whistler, whistler_params):
        file_name = '_'.join([str(train),str(transforms),str(transforms_params),str(detector),
                     str(detector_params),str(diff_err),str(time_err),str(whistler),str(whistler_params)])
        file_name += '.result'
        path = os.path.join(self.__result_location,self.get_site(),file_name)
        if not os.path.exists(path):
            raise Exception('%s does not exists.'%path)
        return pickle.load( open(path, "rb"))
    
    def results(self, results):
        results = np.array([r for r in results[:,2]])
        return results[:,0].sum(),results[:,1].sum(),results[:,2].sum()

2013-06-09UT12:52:37.22969609.marion.vr2


<Figure size 4500x300 with 1 Axes>

<Figure size 4500x300 with 1 Axes>

<Figure size 200x300 with 2 Axes>

<Figure size 4500x300 with 1 Axes>

<Figure size 4500x300 with 1 Axes>

['2013-06-09UT12:52:37.22969609.marion.vr2'
 array([[array([ 2.54647964, 10.27165075]), 0],
       [array([ 3.06473303, 10.38458628]), 0],
       [array([ 4.64508597, 10.5452116 ]), 0],
       [array([ 5.15694118, 11.03268269]), 0],
       [array([8.07451584, 9.34757823]), 0],
       [array([9.16860633, 9.63007175]), 0],
       [array([ 5.6463, 59.    ]), -1],
       [array([ 9.6803, 92.    ]), -1]], dtype=object)
 array([2, 6, 0])]


In [10]:
dataset_loc = os.path.join(os.getcwd().split(os.environ.get('USER'))[0],os.environ.get('USER'), 'wdml', 'data','datasets', 'awdEvents1')
database_loc = os.path.join(os.getcwd().split(os.environ.get('USER'))[0],os.environ.get('USER'), 'wdml', 'data','databases', 'awdEvents1')
result_loc = os.path.join(os.getcwd().split(os.environ.get('USER'))[0],os.environ.get('USER'), 'wdml', 'data','results', 'awdEvents1')
site = 'marion'
my_detector = Detector(dataset_loc, database_loc, result_loc, site)
my_sample = SampleDetector(dataset_loc, site, np.random.choice(my_detector.get_train(),1)[0])

for w in [0.5,0.65,0.8,1,1.5]:
    my_detector.detector_metric(train=True, transforms=['zscore'], transforms_params=[[None]], detector='tm_cfar', 
                            detector_params=[10,15,1e-3,0], diff_err=0.3, time_err=0.2, whistler='sim', whistler_params=[3,w,7.5,2.5,1,90], save=True)
# data = my_detector.load_detector_metric(train=True, transforms=['zscore'], transforms_params=[[None]], detector='tm_cfar', 
#                             detector_params=[10,15,1e-3,0], diff_err=0.3, time_err=0.2, whistler='sim', whistler_params=[3,0.6,5,2.5,2])

100%|██████████| 1471/1471 [01:43<00:00, 13.32it/s]
100%|██████████| 1471/1471 [01:42<00:00, 13.12it/s]
100%|██████████| 1471/1471 [01:45<00:00,  9.15it/s]
100%|██████████| 1471/1471 [01:40<00:00, 14.67it/s]
100%|██████████| 1471/1471 [01:41<00:00, 13.25it/s]


In [9]:
for w in [0.5,0.65,0.8,1,1.5]:
    data = my_detector.load_detector_metric(train=True, transforms=['zscore'], transforms_params=[[None]], detector='tm_cfar', 
                                detector_params=[10,15,1e-3,0], diff_err=0.3, time_err=0.2, whistler='sim', whistler_params=[w,0.8,7.5,2.5,1,90])
    results = data['results']
    print(my_detector.results(results))

(2877, 200, 500)
(2649, 428, 985)
(1598, 1479, 466)
(927, 2150, 689)
(941, 2136, 790)
(1239, 1838, 535)
