In [1]:
from __future__ import division

import sys
import os
import h5py
import glob
import tqdm
import time 
import pickle
import numpy as np
from math import sqrt
from matplotlib import image
import matplotlib.pyplot as plt
import multiprocessing as mp
# from keras.models import load_model

# import python library
sys.path.append(os.path.join(os.getcwd().split(os.environ.get('USER'))[0],os.environ.get('USER'), 'wdml', 'py'))

from cfar_detector import CFARDetector
from database import Database
from dataset_simulation import DatasetSimulation

class CFARDetectorGenerator(Database,DatasetSimulation):
    # Attributes
    __train, __test = None, None
    __train_test_file = 'train_test.pickle'
    __dataset_location = None
    __result_location = None
    __site = None
    # Initializer
    def __init__(self, dataset_location, database_location, dataset_sim_location, result_location, site,t_res, f_res):
        self.__dataset_location = dataset_location
        self.__result_location = result_location
        self.__site = site
        Database.__init__(self,dataset_location, database_location, site)
        DatasetSimulation.__init__(self,dataset_sim_location, t_res, f_res)
        
    ###############################
    """CROSS-CORRELATION RESULTS"""
    
    def get_sample_corr(self, params):
        """Get result of cross-correlation and target indices
        Param
            params: parameters passed by multiprocessing correlation generator
        Result
            file: name of file sample
            corr: result of correlation
            noise_ix: index of noise cut
            target_ix: index of target cut"""
        sample = params[0]
        corr = sample.get_corr(transforms=params[1], transforms_params=params[2], kernel=params[3])
        cuts, w, _ = sample.cuts(cut_time=0.2, cut_time_split=1.5, cut_freq=8, cut_freq_min=1.5, time_err=1, noise=True)
        noise_ix = np.array([[c[2],c[3]] for c in cuts[w:]])
        target_ix = np.array([[c[2],c[3]] for c in cuts[:w]])
        target_ix = target_ix[np.where(target_ix[:,1]<len(corr))]
        return [sample.get_file(), corr, noise_ix, target_ix]
    
    def generate_correlation(self, transforms, transforms_params, kernel_type='sim', train=True, save=True, n=None):
        """Generate all correlation results
        Param
            save: to save results to a file
            n: number of sample to process, None to process all sample
        Return
            results
        """
        # get files from either train or test
        files = self.get_train() if train else self.get_test()
        samples = np.array([CFARDetector(self.__dataset_location, self.__site, file) for file in files])#[int(len(files)*0.297):int(len(files)*0.299)]
        assert len(samples)>0, 'No samples'
        # load kernel
        kernel_mean_params = [os.path.join(self.get_database_location(), self.get_site(), 'kernels', 'kernel_data.h5')]
        kernel_sim_params = [0.35,80,1]
        kernel = samples[0].load_kernel(kernel_type,kernel_mean_params if kernel_type=='mean' else kernel_sim_params)
        # create multiprocessing methods
        pool = mp.Pool(mp.cpu_count())
        params = [[sample, transforms, transforms_params, kernel] for sample in samples][:n]
        results = []
        for result in tqdm.tqdm(pool.imap_unordered(self.get_sample_corr, params), total=len(params)):
            results.append(result)
        pool.close()
        pool.join()
        if save:
            self.save_generated_correlation(transforms, transforms_params, kernel_type,train,results)
        return results
    
    def save_generated_correlation(self, transforms, transforms_params, kernel_type,train,results):
        """Save results"""
        #create parameters dictionary
        data = {
            'transforms': transforms,
            'transforms_params': transforms_params, 
            'kernel_type': kernel_type, 
            'train': train, 
            'results': results
        }
        path = os.path.join(self.__result_location,self.get_site(),'cfar')
        try:
            os.makedirs(path)
        except OSError:
            pass
        file_name = '_'.join([str(transforms),str(transforms_params),str(kernel_type),str(train)])
        file_name += '.result'
        pickle.dump(data, open(os.path.join(path,file_name), 'wb'))
    
    def load_generated_correlation(self,transforms, transforms_params, kernel_type, train):
        """Load saved result containing all correlation"""
        file_name = '_'.join([str(transforms),str(transforms_params),str(kernel_type),str(train)])
        file_name += '.result'
        path = os.path.join(self.__result_location,self.get_site(),'cfar',file_name)
        if not os.path.exists(path):
            raise Exception('%s does not exists.'%path)
        return pickle.load( open(path, "rb"))
    
    def load_generated_correlation_interference(self,transforms, transforms_params, kernel_type, train):
        """Load interference and target+interference of correlations
        Param
            ...
        Return
            target_data: contain target+interference of all correlation
            noise_data: contain interference of all correlation
        """
        results = self.load_generated_correlation(transforms, transforms_params, kernel_type, train)['results']
        target_data, noise_data = np.array([]), np.array([])
        for result in tqdm.tqdm(results):
            _, corr, noise_ix, target_ix = result
            for t in target_ix:
                target_data = np.concatenate((target_data,corr[t[0]:t[1]]))
            for n in noise_ix:
                noise_data = np.concatenate((noise_data,corr[n[0]:n[1]]))  
        return target_data, noise_data
    
    ###################################
    """CROSS-CORRELATION GENERATIONS"""
    
    def get_confusion_matrix_(self, params):
        return self.get_confusion_matrix(file=params[0],trans=params[1],trans_params=params[2],kernel=params[3],
                                         N=params[4],G=params[5],k=params[6],Ts=params[7],Tl=params[8],pfa=params[9],time_err=params[10])
    
    def get_confusion_matrix(self, file, trans, trans_params,kernel,N,G,k,Ts,Tl,pfa,time_err):
        sample = CFARDetector(Database.get_dataset_location(self), Database.get_site(self), file)
#         output = sample.detection_starting_locations_final(trans,trans_params, kernel, 'fusion_cfar', [N,G,k,Ts,Tl,pfa], time_err)
        bboxes = sample.detection_bounding_boxes(trans,trans_params, kernel, 'fusion_cfar', [N,G,k,Ts,Tl,pfa], time_err=time_err, duration=False)
        confusion_matrix = sample.confusion_matrix(bboxes,time_err=time_err)
        return file, confusion_matrix, bboxes
    
    def dataset_cross_correlation_gen(self, transforms, transforms_params, f_min, f_max, An, D0, magnitude,N,G,k,Ts,Tl,X_dB, train=True, n=None, time_err=1, force=False):
        """Generate cross-correlation of all sample and save it to a file
        Param
            """
        # check if file already exist
        t_name = '('+','.join([str(transforms),str(transforms_params)])+')'
        f_name = '('+','.join([str(f_min),str(f_max)])+')'
        k_name = '('+','.join([str(An),str(D0),str(magnitude)])+')'
        d_name = '('+','.join([str(N),str(G),str(k),str(Ts),str(Tl),str(X_dB)])+')'
        p_name = '('+','.join([str(train),str(n),str(time_err)])+')'
        file_name = '_'.join([t_name,f_name,k_name,d_name,p_name])
        file_name += '.corr'
        path = os.path.join(self.__result_location,self.get_site(), 'cfar')
        if os.path.exists(os.path.join(path,file_name)) and not force:
            print('%s already exists'%file_name)
            return
        # calculate pfa
        pfa = (1/(1+((10**(X_dB/10))/(2*N))))**(2*N)
        # generate kernel
        trans = ['slice',transforms,'scale']
        trans_params = [[f_min,f_max],transforms_params,[0,1]]
        f_range = np.linspace(f_min, f_max, 1e3)*1e3
        dataset_sim = DatasetSimulation
        dataset_sim.set_frequency_range(self,f_range)
        kernel = dataset_sim.whistler_sim(self,An=An, D0=D0, magnitude=magnitude)
        # get samples
        files = Database.get_train(self) if train else Database.get_test(self)
        # get results
        pool = mp.Pool(mp.cpu_count())
        params = [[file,trans,trans_params,kernel,N,G,k,Ts,Tl,pfa,time_err] for file in files][:n]
        results = {}
        bboxes = {}
        confusion_matrix = [0,0,0,0]
        for file, result, bbox in tqdm.tqdm(pool.imap_unordered(self.get_confusion_matrix_, params), total=len(params)):
            for i in range(len(result)):
                confusion_matrix[i]+=len(result[i])
            results[str(file)]=result
            bboxes[str(file)]=bbox
        pool.close()
        pool.join()
        """Save results"""
        #create parameters dictionary
        data = {
            'transforms': transforms, 'transforms_params': transforms_params, 
            'f_min':f_min, 'f_max':f_max,
            'An':An, 'D0':D0, 'magnitude':magnitude,
            'N':N, 'G':G, 'k':k,'Ts':Ts, 'Tl':Tl, 'X_dB':X_dB,
            'train': train, 'n':n, 'time error':time_err,
            'confusion matrix': confusion_matrix,
            'bboxes':bboxes,
            'results': results
        }
        path = os.path.join(self.__result_location,self.get_site(), 'cfar')
        try:
            os.makedirs(path)
        except OSError:
            pass
        t_name = '('+','.join([str(transforms),str(transforms_params)])+')'
        f_name = '('+','.join([str(f_min),str(f_max)])+')'
        k_name = '('+','.join([str(An),str(D0),str(magnitude)])+')'
        d_name = '('+','.join([str(N),str(G),str(k),str(Ts),str(Tl),str(X_dB)])+')'
        p_name = '('+','.join([str(train),str(n),str(time_err)])+')'
        file_name = '_'.join([t_name,f_name,k_name,d_name,p_name])
        file_name += '.corr'
        pickle.dump(data, open(os.path.join(path,file_name), 'wb'))
    
    def dataset_cross_correlation_load(self, transforms, transforms_params, f_min, f_max, An, D0, magnitude,N,G,k,Ts,Tl,X_dB, train=True, n=None, time_err=1):
        """Load saved result containing all correlation"""
        t_name = '('+','.join([str(transforms),str(transforms_params)])+')'
        f_name = '('+','.join([str(f_min),str(f_max)])+')'
        k_name = '('+','.join([str(An),str(D0),str(magnitude)])+')'
        d_name = '('+','.join([str(N),str(G),str(k),str(Ts),str(Tl),str(X_dB)])+')'
        p_name = '('+','.join([str(train),str(n),str(time_err)])+')'
        file_name = '_'.join([t_name,f_name,k_name,d_name,p_name])
        file_name += '.corr'
        path = os.path.join(self.__result_location,self.get_site(), 'cfar',file_name)
        if not os.path.exists(path):
            raise Exception('%s does not exists.'%path)
        return pickle.load( open(path, "rb"))

    def performance(self, confusion_matrix):
        tp,fp,fn,tn = confusion_matrix
        precision = tp/(tp+fp)
        recall = tp/(tp+fn)
        f1_score = 2*(precision*recall/(precision+recall))
        g_measure = sqrt((tp/(tp+fp))*(tp/(tp+fn)))
        result = {
            'recall': np.round(recall,3),
            'precision': np.round(precision,3),
            'f1 score': np.round(f1_score,3),
            'g measure': np.round(g_measure,3),
            'false alarm': np.round(1-precision,3),
            'misdetection': np.round(1-recall,3)
        }
        return result

In [2]:
# dataset_loc = os.path.join(os.getcwd().split(os.environ.get('USER'))[0],os.environ.get('USER'), 'wdml', 'data','datasets', 'awdEvents1')
# database_loc = os.path.join(os.getcwd().split(os.environ.get('USER'))[0],os.environ.get('USER'), 'wdml', 'data','databases', 'awdEvents1')
# dataset_sim_loc = os.path.join(os.getcwd().split(os.environ.get('USER'))[0],os.environ.get('USER'), 'wdml', 'data','datasets', 'simulations', 'simple' ,'whistler')
# result_loc = os.path.join(os.getcwd().split(os.environ.get('USER'))[0],os.environ.get('USER'), 'wdml', 'data','results', 'awdEvents1')
# site = 'marion'
# t_res, f_res = 0.006395061728395062, 0.15503875968992248
# cfar_detector_gen = CFARDetectorGenerator(dataset_loc, database_loc, dataset_sim_loc, result_loc, site, t_res, f_res)
# n=1
# cfar_detector_gen.dataset_cross_correlation_gen(transforms='zscore', transforms_params=None, f_min=1.5, f_max=9.5, 
#                                            An=0.35, D0=80, magnitude=1, N=10, G=7, k=9, Ts=3, Tl=8, X_dB=0.5,
#                                            train=True, n=n)
# data = cfar_detector_gen.dataset_cross_correlation_load(transforms='zscore', transforms_params=None, f_min=1.5, f_max=9.5, 
#                                            An=0.35, D0=80, magnitude=1, N=10, G=7, k=9, Ts=3, Tl=8, X_dB=0.5,
#                                            train=True, n=n)
# confusion_matrix = data['confusion matrix']
# print(confusion_matrix)
# performance = cfar_detector_gen.performance(confusion_matrix)
# print(performance)

100%|██████████| 1/1 [00:00<00:00,  2.08it/s]

[4, 1, 0, 0]
{'recall': 1.0, 'precision': 0.8, 'f1 score': 0.889, 'g measure': 0.894, 'false alarm': 0.2, 'misdetection': 0.0}



