In [1]:
import sys
import os
import time

sys.path.insert(0,'../')
sys.path.insert(0,'../py')

import parameters
import utilities
from spectrogram_utilities import spectrogram_reshape
import output_utilities
import spectrogram_output_visualiser

import tensorflow as tf
import numpy as np
import pandas as pd
import h5py

from PIL import Image

# tf.enable_eager_execution() 
tf.logging.set_verbosity(tf.logging.INFO)

In [3]:
def spectrogram_cuts_db(awd_event, site, files, database_name, 
                        threshold=parameters.output_threshold,
                        time_lower_boundary = parameters.time_lower_boundary,
                        time_upper_boundary = parameters.time_upper_boundary,
                        freq_upper_boundary = parameters.freq_upper_boundary,
                        freq_lower_boundary = parameters.freq_lower_boundary,
                        verbose=False, force=False):
    '''Extract the whistler and noise cuts and store them in a h5py database'''
    start = time.time()

    # create h5py database
    if not utilities.init_h5py(database_name, force=force, verbose=verbose):
        return 
    # load database
    database = h5py.File(utilities.get_h5py_path(database_name), 'r+')

    if verbose:
        print('\nGenerating whistler and noise cuts database for %s/%s' %('awdEvent'+str(awd_event),site))
        last_percent = None
        num_file = 0
    for file in files:
        indices, spectrogram, spec_cuts, noise_cuts, f_cut_length, t_cut_length = spectrogram_output_visualiser.spectrogram_cut(
            awd_event, site, file, 
            threshold=threshold,
            time_lower_boundary=time_lower_boundary,
            time_upper_boundary=time_upper_boundary,
            freq_upper_boundary=freq_upper_boundary,
            freq_lower_boundary=freq_lower_boundary)
        i = 0
        for cut in spec_cuts:
            spec = spectrogram[cut[0]:cut[1],cut[2]:cut[3]] # extract portion of interest in the spectrogram 
            # resize spec to fixed size
#             spec = Image.fromarray(spec).resize((parameters.clf_input_width, parameters.clf_input_height))
#             spec = np.array(spec)
            spec = spectrogram_reshape(spec)
            dataset_name = file.split(site)[0]+str(i)
            file_dataset = database.create_dataset(dataset_name,spec.shape,np.float32, compression="gzip", data=spec)
            file_dataset.attrs['pb'] = int(indices[i][-1])
            file_dataset.attrs['evt'] = True
            file_dataset.attrs['site'] = site
            i += 1
        for noise in noise_cuts:
            spec = spectrogram[noise[0]:noise[1], noise[2]:noise[3]]
            # resize spec to fixed size
#             spec = Image.fromarray(spec).resize((parameters.clf_input_width, parameters.clf_input_height))
#             spec = np.array(spec)
            spec = spectrogram_reshape(spec)
            dataset_name = file.split(site)[0]+str(i)
            file_dataset = database.create_dataset(dataset_name,spec.shape,np.float32, compression="gzip", data=spec)
            file_dataset.attrs['pb'] = 0
            file_dataset.attrs['evt'] = False
            file_dataset.attrs['site'] = site
            i += 1
        if verbose:
            percent = int(num_file*100/len(files))
            if last_percent != percent:
                if percent%10==0:
                    sys.stdout.write("%s%%" % percent)
                    sys.stdout.flush()
                else:
                    sys.stdout.write(".")
                    sys.stdout.flush()
                last_percent = percent
            num_file+=1
    database.attrs['freq_length']=f_cut_length
    database.attrs['time_length']=t_cut_length
    database.attrs['width']=parameters.clf_input_width
    database.attrs['height']=parameters.clf_input_height
    database.close()
    end = time.time()
    if verbose:
        print("\nRuntime: {:.2f} seconds".format(end - start))

In [4]:
def load_spectrogram_cuts_db(awd_event, site, database_name, verbose=False, noise=True):
    '''Load spectrogram cuts from database
    returns:
        array of spectrogram
    '''
    start = time.time()
    data = []
    pb = []
    evt = []
    _site = []
    # load database
    try:
        database = h5py.File(utilities.get_h5py_path(database_name), 'r+')
    except Exception as e:
        if verbose:
            print(e)
        # if no database, create the database
        files = utilities.all_files(awd_event, site)
        spectrogram_cuts_db(awd_event, site, files, database_name,verbose=verbose)
        start = time.time() # restart timing
        database = h5py.File(utilities.get_h5py_path(database_name), 'r+')
    files = list(database.keys())
    if verbose:
        print('\nLoading spectrogram cuts from database for %s/%s' %('awdEvent'+str(awd_event),site))
        last_percent = None
        num_file = 0
    for file in files:
        if not noise and not database[file].attrs['evt']:
            # if noise is not selected, skip the noise cut
            
            continue
        file_data = np.empty(database[file].shape)
        database[file].read_direct(file_data)
        file_data = file_data.flatten()
        data.append(file_data)
        pb.append(database[file].attrs['pb'])
        evt.append(database[file].attrs['evt'])
        _site.append(database[file].attrs['site'])
        if verbose:
            percent = int(num_file*100/len(files))
            if last_percent != percent:
                if percent%10==0:
                    sys.stdout.write("%s%%" % percent)
                    sys.stdout.flush()
                else:
                    sys.stdout.write(".")
                    sys.stdout.flush()
                last_percent = percent
            num_file+=1
    data = np.array(data)
    pb = np.array(pb)
    evt = np.array(evt)
    f_cut_length = database.attrs['freq_length']
    t_cut_length = database.attrs['time_length']
    
    database.close()
    end = time.time()
    if verbose:
        print("\nRuntime: {:.2f} seconds".format(end - start))
    return data, pb, evt, f_cut_length, t_cut_length

## TFRecords

In [31]:
def spectrogram_cuts_tfrecords(awd_event, site,verbose=False):
    '''Extract the whistler and noise cuts and store them in a TFRecords database'''
    start = time.time()
    files = utilities.all_files(awd_event, site)
    if verbose:
        print('\nGenerating whistler and noise cuts tfrecord files for %s/%s' %('awdEvent'+str(awd_event),site))
        last_percent = None
        num_file = 0
    for file in files:
        indices, spectrogram, spec_cuts, noise_cuts, f_cut_length, t_cut_length = spectrogram_output_visualiser.spectrogram_cut(awd_event, site, file, 10)
        i = 0
        for cut in spec_cuts:
            spec = spectrogram[cut[0]:cut[1],cut[2]:cut[3]] # extract portion of interest in the spectrogram 
            dataset_name = os.path.join(parameters.tfrecord_location,
                                        'awdEvents1',
                                        'cuts',
                                os.path.splitext(file)[0]+'.1'+'_'+str(cut[0])+'_'+str(cut[1])+'_'+str(cut[2])+'_'+str(cut[3])+'.tfr')
            # save whistler cut as a tf record
            writer = tf.python_io.TFRecordWriter(dataset_name)
            feature = {
                'data': tf.train.Feature(
                            float_list=tf.train.FloatList(value=spec.flatten())),
                'merit': tf.train.Feature(
                            int64_list=tf.train.Int64List(value=[int(indices[i][-1])])),
                'label': tf.train.Feature(
                            int64_list=tf.train.Int64List(value=[True]))
            }
            example = tf.train.Example(features=tf.train.Features(feature=feature))
            # serialize to string and write on the file
            writer.write(example.SerializeToString())
            writer.close()
            i += 1
        for cut in noise_cuts:
            spec = spectrogram[cut[0]:cut[1],cut[2]:cut[3]]
            dataset_name = os.path.join(parameters.tfrecord_location,
                                        'awdEvents1',
                                        'cuts',
                                os.path.splitext(file)[0]+'.0'+'_'+str(cut[0])+'_'+str(cut[1])+'_'+str(cut[2])+'_'+str(cut[3])+'.tfr')
#             print(dataset_name)
            # save noise cut as a tf record
            writer = tf.python_io.TFRecordWriter(dataset_name)
            feature = {
                'data': tf.train.Feature(
                            float_list=tf.train.FloatList(value=spec.flatten())),
                'merit': tf.train.Feature(
                            int64_list=tf.train.Int64List(value=[0])),
                'label': tf.train.Feature(
                            int64_list=tf.train.Int64List(value=[False]))
            }
            example = tf.train.Example(features=tf.train.Features(feature=feature))
            # serialize to string and write on the file
            writer.write(example.SerializeToString())
            writer.close()
            i += 1
        if verbose:
            percent = int(num_file*100/len(files))
            if last_percent != percent:
                if percent%10==0:
                    sys.stdout.write("%s%%" % percent)
                    sys.stdout.flush()
                else:
                    sys.stdout.write(".")
                    sys.stdout.flush()
                last_percent = percent
            num_file+=1
    end = time.time()
    if verbose:
        print("\nRuntime: {:.2f} seconds".format(end - start))

# Singular H5 file

In [None]:
def spectrogram_cuts_h5py(awd_event, site,verbose=False):
    '''Extract the whistler and noise cuts and store them in a h5py database'''
    start = time.time()
    files = utilities.all_files(awd_event, site)

    if verbose:
        print('\nGenerating whistler and noise cuts database for %s/%s' %('awdEvent'+str(awd_event),site))
        last_percent = None
        num_file = 0
    for file in files:
        indices, spectrogram, spec_cuts, noise_cuts, f_cut_length, t_cut_length = spectrogram_output_visualiser.spectrogram_cut(awd_event, site, file, 10)
        i = 0
        for cut in spec_cuts:
            spec = spectrogram[cut[0]:cut[1],cut[2]:cut[3]] # extract portion of interest in the spectrogram 
            dataset_name = os.path.join(parameters.hyp5_location,
                                        'awdEvents1',
                                        'cuts',
                                os.path.splitext(file)[0]+'.1'+'_'+str(cut[0])+'_'+str(cut[1])+'_'+str(cut[2])+'_'+str(cut[3])+'.h5')
            f = h5py.File(dataset_name, 'w')
            file_dataset = f.create_dataset(dataset_name,spec.shape,np.float32, compression="gzip", data=spec)
            file_dataset.attrs['pb'] = int(indices[i][-1])
            file_dataset.attrs['evt'] = True
            file_dataset.attrs['freq_length']=f_cut_length
            file_dataset.attrs['time_length']=t_cut_length
            f.close()
            i += 1
        for noise in noise_cuts:
            spec = spectrogram[noise[0]:noise[1], noise[2]:noise[3]]
            dataset_name = os.path.join(parameters.hyp5_location,
                                        'awdEvents1',
                                        'cuts',
                                os.path.splitext(file)[0]+'.0'+'_'+str(cut[0])+'_'+str(cut[1])+'_'+str(cut[2])+'_'+str(cut[3])+'.h5')
            f = h5py.File(dataset_name, 'w')
            file_dataset = f.create_dataset(dataset_name,spec.shape,np.float32, compression="gzip", data=spec)
            file_dataset.attrs['pb'] = 0
            file_dataset.attrs['evt'] = False
            file_dataset.attrs['freq_length']=f_cut_length
            file_dataset.attrs['time_length']=t_cut_length
            f.close()
            i += 1
        if verbose:
            percent = int(num_file*100/len(files))
            if last_percent != percent:
                if percent%10==0:
                    sys.stdout.write("%s%%" % percent)
                    sys.stdout.flush()
                else:
                    sys.stdout.write(".")
                    sys.stdout.flush()
                last_percent = percent
            num_file+=1
    end = time.time()
    if verbose:
        print("\nRuntime: {:.2f} seconds".format(end - start))