From 3e10195fd4cc2c37b14e65e63fab36ae50f1d6a0 Mon Sep 17 00:00:00 2001 From: robin <13999868+robinbaneke@users.noreply.github.com> Date: Wed, 19 Dec 2018 15:07:07 +0100 Subject: [PATCH 01/14] create pipeline module --- clipping/clipping.py | 4 +- filterbank/__init__.py | 2 + fourier/__init__.py | 2 +- pipeline/__init__.py | 1 + pipeline/pipeline.py | 82 ++++++++++++++++++++++++++++++++++++++++ pipeline/run.py | 11 ++++++ tests/test_benchmark.py | 39 +++++++++++++++++++ timeseries/timeseries.py | 2 +- 8 files changed, 139 insertions(+), 4 deletions(-) create mode 100644 pipeline/__init__.py create mode 100644 pipeline/pipeline.py create mode 100644 pipeline/run.py create mode 100644 tests/test_benchmark.py diff --git a/clipping/clipping.py b/clipping/clipping.py index d78d47e..6b3b978 100644 --- a/clipping/clipping.py +++ b/clipping/clipping.py @@ -24,7 +24,7 @@ def filter_samples(samples): Calulate mean power of all frequencies per time sample and remove samples with significantly high power """ - factor = 1.3 + factor = 1.01 new_samples = [] # calculate mean intensity per sample avg_sample = np.sum(samples)/len(samples) @@ -40,7 +40,7 @@ def filter_channels(channels, samples): Calculate mean power of all time samples per frequency and remove frequencies with significantly high power """ - factor = 1.3 + factor = 1.01 bad_channels = [] # calculate the mean power per channel avg_power_chan = samples.mean(axis=0) diff --git a/filterbank/__init__.py b/filterbank/__init__.py index e69de29..2c9bf1a 100644 --- a/filterbank/__init__.py +++ b/filterbank/__init__.py @@ -0,0 +1,2 @@ +from .filterbank import Filterbank +from .filterbank import read_header \ No newline at end of file diff --git a/fourier/__init__.py b/fourier/__init__.py index 8b4b1fc..a3811b7 100644 --- a/fourier/__init__.py +++ b/fourier/__init__.py @@ -1,4 +1,4 @@ """ import file for fourier.py """ -from .fourier import fft_freq, fft_vectorized, dft_slow, fft_matrix, fft_shift +from .fourier import fft_freq, fft_vectorized, dft_slow, fft_matrix, fft_shift, ifft diff --git a/pipeline/__init__.py b/pipeline/__init__.py new file mode 100644 index 0000000..f842286 --- /dev/null +++ b/pipeline/__init__.py @@ -0,0 +1 @@ +from .pipeline import Pipeline \ No newline at end of file diff --git a/pipeline/pipeline.py b/pipeline/pipeline.py new file mode 100644 index 0000000..732b180 --- /dev/null +++ b/pipeline/pipeline.py @@ -0,0 +1,82 @@ +""" + Pipeline for all modules +""" +import os +import sys +import inspect +CURRENT_DIR = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) +PARENT_DIR = os.path.dirname(CURRENT_DIR) +sys.path.insert(0, PARENT_DIR) +import filterbank.filterbank as filterbank +import clipping +import dedisperse +import timeseries.timeseries as timeseries +import fourier +import time + +class Pipeline(): + """ + The Pipeline combines the functionality of all modules + in the library. + """ + + def __init__(self, as_stream=False, filename=None, DM=230, scale=3): + """ + Initialize Pipeline object + + Args: + as_stream, read the filterbank data as stream + """ + if as_stream: + self.read_stream(filename, DM, scale) + else: + self.read_static(filename, DM, scale) + + + def read_stream(self, filename, DM, scale): + """ + Read the filterbank data as stream + """ + + + + def read_static(self, filename, DM, scale): + """ + Read the filterbank data at once + """ + stopwatch = dict.fromkeys(['time_read', 'time_select', 'time_clipping', 'time_dedisp', + 'time_t_series', 'time_downsample', 'time_fft_vect', 'time_dft' + 'time_ifft', 'time_fft_freq']) + time_start = time.time() + # init filterbank + fil = filterbank.Filterbank(filename, read_all=True) + stopwatch['time_read'] = time.time() - time_start + # select data + freqs, fil_data = fil.select_data() + stopwatch['time_select'] = time.time() - stopwatch['time_read'] + # clipping + _, _ = clipping.clipping(freqs, fil_data) + stopwatch['time_clipping'] = time.time() - stopwatch['time_select'] + # dedisperse + fil_data = dedisperse.dedisperse(fil_data, DM) + stopwatch['time_dedisp'] = time.time() - stopwatch['time_clipping'] + # timeseries + time_series = timeseries.Timeseries(fil_data) + stopwatch['time_t_series'] = time.time() - stopwatch['time_dedisp'] + # downsample + time_series = time_series.downsample(scale) + stopwatch['time_downsample'] = time.time()- stopwatch['time_t_series'] + # fft vect + fourier.fft_vectorized(time_series) + stopwatch['time_fft_vect'] = time.time() - stopwatch['time_downsample'] + # dft + fourier.dft_slow(time_series) + stopwatch['time_dft'] = time.time() - stopwatch['time_fft_vect'] + # ifft + fourier.ifft(time_series) + stopwatch['time_ifft'] = time.time() - stopwatch['time_dft'] + # fft freq + fourier.fft_freq(10) + stopwatch['time_fft_freq'] = time.time() - stopwatch['time_ifft'] + print(stopwatch) + diff --git a/pipeline/run.py b/pipeline/run.py new file mode 100644 index 0000000..71f1a50 --- /dev/null +++ b/pipeline/run.py @@ -0,0 +1,11 @@ +""" + Script for running the pipeline +""" + +import pipeline + + + +pipeline.Pipeline(filename='pspm32.fil') + + diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py new file mode 100644 index 0000000..e3606b2 --- /dev/null +++ b/tests/test_benchmark.py @@ -0,0 +1,39 @@ +""" + benchmarking all functions, except for plots +""" + +import time +import unittest + +from .context import clipping, dedisperse, fourier, filterbank, header + + +class TestBenchmark(unittest.TestCase): + """ + Testclass for benchmarking all methods + """ + + functions = clipping.clipping, dedisperse.dedisperse, fourier.dft_slow, fourier.fft_freq, fourier.fft_matrix, fourier.fft_shift, fourier.fft_vectorized, filterbank.Filterbank, filterbank.read_header + + times = {f.__name__: [] for f in functions} + + # def test_benchmark(self): + # for func in enumerate(self.functions): + # t0 = time.time() + # # print(func.__name__) + # t1 = time.time() + # # print(t1-t0) + # # self.times[func.__name__].append((t1-t0) * 1000) + + + def test_benchmark2(self): + for _, func in enumerate(self.functions): + t0 = time.time() + func() + t1 = time.time() + self.times[func.__name__].append((t1 - t0) * 1000) + + +if __name__ == '__main__': + unittest.main() + diff --git a/timeseries/timeseries.py b/timeseries/timeseries.py index 3f2d04c..e0ba981 100644 --- a/timeseries/timeseries.py +++ b/timeseries/timeseries.py @@ -29,7 +29,6 @@ def downsample(self, sample_scale): return self.timeseries - def get(self): """ Returns the current timeseries object which, @@ -37,6 +36,7 @@ def get(self): """ return self.timeseries + def from_filterbank(self, filterbank_object): """ Initializes timeseries object from filterbank object, From 63dfa4af4cf32886ee2ced67d76868b17e9627bb Mon Sep 17 00:00:00 2001 From: robin <13999868+robinbaneke@users.noreply.github.com> Date: Wed, 19 Dec 2018 15:36:59 +0100 Subject: [PATCH 02/14] add time measurements to pipeline --- pipeline/pipeline.py | 48 ++++++++++++++++++++++++++++++++++---------- 1 file changed, 37 insertions(+), 11 deletions(-) diff --git a/pipeline/pipeline.py b/pipeline/pipeline.py index 732b180..34b2357 100644 --- a/pipeline/pipeline.py +++ b/pipeline/pipeline.py @@ -13,6 +13,8 @@ import timeseries.timeseries as timeseries import fourier import time +import datetime +import timeit class Pipeline(): """ @@ -37,7 +39,14 @@ def read_stream(self, filename, DM, scale): """ Read the filterbank data as stream """ + stopwatch = dict.fromkeys(['time_read', 'time_select', 'time_clipping', 'time_dedisp', + 'time_t_series', 'time_downsample', 'time_fft_vect', 'time_dft' + 'time_ifft', 'time_fft_freq']) + # init filterbank as stream + fil = filterbank.Filterbank(filename) + fil.select_data() + def read_static(self, filename, DM, scale): @@ -47,36 +56,53 @@ def read_static(self, filename, DM, scale): stopwatch = dict.fromkeys(['time_read', 'time_select', 'time_clipping', 'time_dedisp', 'time_t_series', 'time_downsample', 'time_fft_vect', 'time_dft' 'time_ifft', 'time_fft_freq']) - time_start = time.time() + time_start = datetime.datetime.now() + + a = datetime.datetime.now() + b = datetime.datetime.now() + + time_took = b - a + + print(time_took.seconds) # init filterbank fil = filterbank.Filterbank(filename, read_all=True) - stopwatch['time_read'] = time.time() - time_start + stopwatch['time_read'] = time_start - datetime.datetime.now() # select data freqs, fil_data = fil.select_data() - stopwatch['time_select'] = time.time() - stopwatch['time_read'] + time_select = datetime.datetime.now() - stopwatch['time_read'] + stopwatch['time_read'] = time_select.microsecond # clipping _, _ = clipping.clipping(freqs, fil_data) - stopwatch['time_clipping'] = time.time() - stopwatch['time_select'] + time_clipping = datetime.datetime.now() - time_select + stopwatch['time_clipping'] = time_clipping.microseconds + # stopwatch['time_clipping'] = stopwatch['time_clipping'].microseconds # dedisperse fil_data = dedisperse.dedisperse(fil_data, DM) - stopwatch['time_dedisp'] = time.time() - stopwatch['time_clipping'] + time_dedisp = datetime.datetime.now() - time_clipping + stopwatch['time_dedisp'] = time_dedisp.microseconds # timeseries time_series = timeseries.Timeseries(fil_data) - stopwatch['time_t_series'] = time.time() - stopwatch['time_dedisp'] + time_t_series = datetime.datetime.now() - time_dedisp + stopwatch['time_t_series'] = time_t_series.microseconds # downsample time_series = time_series.downsample(scale) - stopwatch['time_downsample'] = time.time()- stopwatch['time_t_series'] + time_downsample = datetime.datetime.now() - time_t_series + stopwatch['time_downsample'] = time_downsample.microseconds # fft vect fourier.fft_vectorized(time_series) - stopwatch['time_fft_vect'] = time.time() - stopwatch['time_downsample'] + time_fft_vect = datetime.datetime.now() - time_downsample + stopwatch['time_fft_vect'] = time_fft_vect.microseconds # dft fourier.dft_slow(time_series) - stopwatch['time_dft'] = time.time() - stopwatch['time_fft_vect'] + time_dft = datetime.datetime.now() - time_fft_vect + stopwatch['time_dft'] = time_dft.microseconds # ifft fourier.ifft(time_series) - stopwatch['time_ifft'] = time.time() - stopwatch['time_dft'] + time_ifft = datetime.datetime.now() - time_dft + stopwatch['time_ifft'] = time_ifft.microseconds # fft freq fourier.fft_freq(10) - stopwatch['time_fft_freq'] = time.time() - stopwatch['time_ifft'] + time_fft_freq = datetime.datetime.now() - time_ifft + stopwatch['time_fft_freq'] = time_fft_freq.microseconds print(stopwatch) From d9e1d295d5a532994626789de97abe7d2a81794d Mon Sep 17 00:00:00 2001 From: robin <13999868+robinbaneke@users.noreply.github.com> Date: Wed, 19 Dec 2018 22:18:40 +0100 Subject: [PATCH 03/14] improve pipeline module --- filterbank/__init__.py | 5 +- pipeline/__init__.py | 5 +- pipeline/pipeline.py | 135 +++++++++++++++++++++++++--------------- pipeline/run.py | 9 +-- tests/test_benchmark.py | 39 ------------ timeseries/__init__.py | 4 ++ 6 files changed, 103 insertions(+), 94 deletions(-) delete mode 100644 tests/test_benchmark.py diff --git a/filterbank/__init__.py b/filterbank/__init__.py index 2c9bf1a..747d31a 100644 --- a/filterbank/__init__.py +++ b/filterbank/__init__.py @@ -1,2 +1,5 @@ +""" + Export filterbank methods +""" from .filterbank import Filterbank -from .filterbank import read_header \ No newline at end of file +from .filterbank import read_header diff --git a/pipeline/__init__.py b/pipeline/__init__.py index f842286..c18533d 100644 --- a/pipeline/__init__.py +++ b/pipeline/__init__.py @@ -1 +1,4 @@ -from .pipeline import Pipeline \ No newline at end of file +""" + Export pipeline methods +""" +from .pipeline import Pipeline diff --git a/pipeline/pipeline.py b/pipeline/pipeline.py index 34b2357..7920bee 100644 --- a/pipeline/pipeline.py +++ b/pipeline/pipeline.py @@ -1,28 +1,32 @@ """ Pipeline for all modules """ +# pylint: disable=wrong-import-position import os import sys import inspect CURRENT_DIR = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) PARENT_DIR = os.path.dirname(CURRENT_DIR) sys.path.insert(0, PARENT_DIR) -import filterbank.filterbank as filterbank +from timeit import default_timer as timer +import filterbank.filterbank +import timeseries.timeseries import clipping import dedisperse -import timeseries.timeseries as timeseries import fourier -import time -import datetime -import timeit + +# pylint: disable=too-many-locals +# pylint: disable=too-many-arguments +# pylint: disable=invalid-name +# pylint: disable=no-self-use class Pipeline(): """ The Pipeline combines the functionality of all modules in the library. """ - - def __init__(self, as_stream=False, filename=None, DM=230, scale=3): + # pylint: disable + def __init__(self, filename=None, as_stream=False, DM=230, scale=3, n=None): """ Initialize Pipeline object @@ -30,79 +34,112 @@ def __init__(self, as_stream=False, filename=None, DM=230, scale=3): as_stream, read the filterbank data as stream """ if as_stream: - self.read_stream(filename, DM, scale) + if n: + result = self.read_n_rows(n, filename, DM, scale) + file = open("stream_filterbank.txt", "a+") + else: + result = self.read_rows(filename) + file = open("row_filterbank.txt", "a+") else: - self.read_static(filename, DM, scale) + result = self.read_static(filename, DM, scale) + file = open("static_filterbank", "a+") + file.write(str(result) + ",") + file.close() - def read_stream(self, filename, DM, scale): + def read_rows(self, filename): """ Read the filterbank data as stream + and measure the time """ - stopwatch = dict.fromkeys(['time_read', 'time_select', 'time_clipping', 'time_dedisp', - 'time_t_series', 'time_downsample', 'time_fft_vect', 'time_dft' - 'time_ifft', 'time_fft_freq']) # init filterbank as stream fil = filterbank.Filterbank(filename) + time_start = timer() + while True: + fil_data = fil.next_row() + if isinstance(fil_data, bool): + break + time_stop = timer() - time_start + return time_stop + + def read_n_rows(self, n, filename, DM, scale): + """ + Read the filterbank data as stream + and measure the time + """ + fil = filterbank.Filterbank(filename) + stopwatch_list = list() + while True: + stopwatch = dict.fromkeys(['time_read', 'time_select', 'time_clipping', 'time_dedisp', + 'time_t_series', 'time_downsample', 'time_fft_vect', + 'time_dft', 'time_ifft', 'time_fft_freq']) + time_start = timer() + fil_data = fil.next_n_rows(n) + # break if EOF + if isinstance(fil_data, bool): + break + stopwatch['time_read'] = timer() - time_start + # run methods + stopwatch = self.measure_methods(stopwatch, fil_data, fil.freqs, DM, scale) + stopwatch_list.append(stopwatch) + return stopwatch_list - fil.select_data() - - def read_static(self, filename, DM, scale): """ Read the filterbank data at once + and measure the time per function/class """ - stopwatch = dict.fromkeys(['time_read', 'time_select', 'time_clipping', 'time_dedisp', - 'time_t_series', 'time_downsample', 'time_fft_vect', 'time_dft' + stopwatch = dict.fromkeys(['time_read', 'time_select', 'time_clipping', 'time_dedisp', + 'time_t_series', 'time_downsample', 'time_fft_vect', 'time_dft', 'time_ifft', 'time_fft_freq']) - time_start = datetime.datetime.now() - - a = datetime.datetime.now() - b = datetime.datetime.now() - - time_took = b - a - - print(time_took.seconds) + time_start = timer() # init filterbank - fil = filterbank.Filterbank(filename, read_all=True) - stopwatch['time_read'] = time_start - datetime.datetime.now() + fil = filterbank.Filterbank(filename, read_all=True, time_range=(0, 10000)) + stopwatch['time_read'] = timer() - time_start # select data + time_select = timer() freqs, fil_data = fil.select_data() - time_select = datetime.datetime.now() - stopwatch['time_read'] - stopwatch['time_read'] = time_select.microsecond + stopwatch['time_select'] = timer() - time_select + # run methods + stopwatch = self.measure_methods(stopwatch, fil_data, freqs, DM, scale) + return stopwatch + + + def measure_methods(self, stopwatch, fil_data, freqs, DM, scale): + """ + Run and time all methods/modules + """ # clipping + time_clipping = timer() _, _ = clipping.clipping(freqs, fil_data) - time_clipping = datetime.datetime.now() - time_select - stopwatch['time_clipping'] = time_clipping.microseconds - # stopwatch['time_clipping'] = stopwatch['time_clipping'].microseconds + stopwatch['time_clipping'] = timer() - time_clipping # dedisperse + time_dedisp = timer() fil_data = dedisperse.dedisperse(fil_data, DM) - time_dedisp = datetime.datetime.now() - time_clipping - stopwatch['time_dedisp'] = time_dedisp.microseconds + stopwatch['time_dedisp'] = timer() - time_dedisp # timeseries + time_t_series = timer() time_series = timeseries.Timeseries(fil_data) - time_t_series = datetime.datetime.now() - time_dedisp - stopwatch['time_t_series'] = time_t_series.microseconds + stopwatch['time_t_series'] = timer() - time_t_series # downsample + time_downsamp = timer() time_series = time_series.downsample(scale) - time_downsample = datetime.datetime.now() - time_t_series - stopwatch['time_downsample'] = time_downsample.microseconds + stopwatch['time_downsample'] = timer() - time_downsamp # fft vect + time_fft_vect = timer() fourier.fft_vectorized(time_series) - time_fft_vect = datetime.datetime.now() - time_downsample - stopwatch['time_fft_vect'] = time_fft_vect.microseconds + stopwatch['time_fft_vect'] = timer() - time_fft_vect # dft + time_dft = timer() fourier.dft_slow(time_series) - time_dft = datetime.datetime.now() - time_fft_vect - stopwatch['time_dft'] = time_dft.microseconds + stopwatch['time_dft'] = timer() - time_dft # ifft + time_ifft = timer() fourier.ifft(time_series) - time_ifft = datetime.datetime.now() - time_dft - stopwatch['time_ifft'] = time_ifft.microseconds + stopwatch['time_ifft'] = timer() - time_ifft # fft freq + time_fft_freq = timer() fourier.fft_freq(10) - time_fft_freq = datetime.datetime.now() - time_ifft - stopwatch['time_fft_freq'] = time_fft_freq.microseconds - print(stopwatch) - + stopwatch['time_fft_freq'] = timer() - time_fft_freq + return stopwatch diff --git a/pipeline/run.py b/pipeline/run.py index 71f1a50..bf9bed4 100644 --- a/pipeline/run.py +++ b/pipeline/run.py @@ -4,8 +4,9 @@ import pipeline - - +# read static pipeline.Pipeline(filename='pspm32.fil') - - +# read stream, row per row +pipeline.Pipeline(filename='pspm32.fil', as_stream=True) +# read stream, n rows +pipeline.Pipeline(filename='pspm32.fil', as_stream=True, n=10) diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py deleted file mode 100644 index e3606b2..0000000 --- a/tests/test_benchmark.py +++ /dev/null @@ -1,39 +0,0 @@ -""" - benchmarking all functions, except for plots -""" - -import time -import unittest - -from .context import clipping, dedisperse, fourier, filterbank, header - - -class TestBenchmark(unittest.TestCase): - """ - Testclass for benchmarking all methods - """ - - functions = clipping.clipping, dedisperse.dedisperse, fourier.dft_slow, fourier.fft_freq, fourier.fft_matrix, fourier.fft_shift, fourier.fft_vectorized, filterbank.Filterbank, filterbank.read_header - - times = {f.__name__: [] for f in functions} - - # def test_benchmark(self): - # for func in enumerate(self.functions): - # t0 = time.time() - # # print(func.__name__) - # t1 = time.time() - # # print(t1-t0) - # # self.times[func.__name__].append((t1-t0) * 1000) - - - def test_benchmark2(self): - for _, func in enumerate(self.functions): - t0 = time.time() - func() - t1 = time.time() - self.times[func.__name__].append((t1 - t0) * 1000) - - -if __name__ == '__main__': - unittest.main() - diff --git a/timeseries/__init__.py b/timeseries/__init__.py index e69de29..555e481 100644 --- a/timeseries/__init__.py +++ b/timeseries/__init__.py @@ -0,0 +1,4 @@ +""" + Export timeseries methods +""" +from .timeseries import Timeseries From 7e54d299b7eb12ff04ca235e2d7706550a0eaa04 Mon Sep 17 00:00:00 2001 From: robinbaneke <13999868+robinbaneke@users.noreply.github.com> Date: Thu, 20 Dec 2018 12:39:02 +0100 Subject: [PATCH 04/14] improve and prepare pipeline --- clipping/clipping.py | 23 ++++++++++------------- fourier/fourier.py | 2 ++ pipeline/pipeline.py | 3 ++- pipeline/run.py | 8 ++++---- tests/test_clipping.py | 5 +---- 5 files changed, 19 insertions(+), 22 deletions(-) diff --git a/clipping/clipping.py b/clipping/clipping.py index 6b3b978..dc09828 100644 --- a/clipping/clipping.py +++ b/clipping/clipping.py @@ -13,19 +13,21 @@ def clipping(channels, samples): # remove all rows(samples) with noise samples = filter_samples(samples) # remove all columns(channels) with noise, select first max n samples - channels, samples = filter_channels(channels, samples[:n_samples]) + bad_channels = filter_channels(samples[:n_samples]) + # remove bad channels from samples + channels = np.delete(channels, bad_channels) + samples = np.delete(samples, bad_channels, axis=1) # remove all individual cells with noise samples = filter_indv_channels(samples) return channels, samples -def filter_samples(samples): +def filter_samples(samples, factor=11): """ Calulate mean power of all frequencies per time sample and remove samples with significantly high power """ - factor = 1.01 - new_samples = [] + new_samples = list() # calculate mean intensity per sample avg_sample = np.sum(samples)/len(samples) # remove samples with significant high power @@ -35,13 +37,12 @@ def filter_samples(samples): return np.array(new_samples) -def filter_channels(channels, samples): +def filter_channels(samples, factor=9): """ Calculate mean power of all time samples per frequency and remove frequencies with significantly high power """ - factor = 1.01 - bad_channels = [] + bad_channels = list() # calculate the mean power per channel avg_power_chan = samples.mean(axis=0) # calculate the standard deviation per channel @@ -52,18 +53,14 @@ def filter_channels(channels, samples): for i, (avg_channel, sd_channel) in enumerate(zip(avg_power_chan, sd_power_chan)): if avg_channel >= (avg_power * factor) or sd_channel >= (avg_power * factor): bad_channels.append(i) - # remove bad channels from samples - new_channels = np.delete(channels, bad_channels) - new_samples = np.delete(samples, bad_channels, axis=1) - return new_channels, new_samples + return bad_channels -def filter_indv_channels(samples): +def filter_indv_channels(samples, factor=9): """ Calculate mean power per frequency and remove samples with significantly high power """ - factor = 1.3 new_samples = np.zeros((len(samples), len(samples[0]))) # calculate the mean power for each sample per channel avg_power_chan = samples.mean(axis=0) diff --git a/fourier/fourier.py b/fourier/fourier.py index bdcfc1f..fceff7f 100644 --- a/fourier/fourier.py +++ b/fourier/fourier.py @@ -80,6 +80,8 @@ def fft_vectorized(input_data, nfft=None, axis=-1): input_data = zeroes data_length = input_data.shape[0] + + print(np.log2(data_length)) if np.log2(data_length) % 1 > 0: raise ValueError("Size of input data must be a power of 2") diff --git a/pipeline/pipeline.py b/pipeline/pipeline.py index 7920bee..75856cb 100644 --- a/pipeline/pipeline.py +++ b/pipeline/pipeline.py @@ -62,6 +62,7 @@ def read_rows(self, filename): time_stop = timer() - time_start return time_stop + def read_n_rows(self, n, filename, DM, scale): """ Read the filterbank data as stream @@ -95,7 +96,7 @@ def read_static(self, filename, DM, scale): 'time_ifft', 'time_fft_freq']) time_start = timer() # init filterbank - fil = filterbank.Filterbank(filename, read_all=True, time_range=(0, 10000)) + fil = filterbank.Filterbank(filename, read_all=True, time_range=(0, 49150)) stopwatch['time_read'] = timer() - time_start # select data time_select = timer() diff --git a/pipeline/run.py b/pipeline/run.py index bf9bed4..34d634e 100644 --- a/pipeline/run.py +++ b/pipeline/run.py @@ -1,12 +1,12 @@ """ Script for running the pipeline """ - +import os import pipeline # read static -pipeline.Pipeline(filename='pspm32.fil') +pipeline.Pipeline(filename=os.path.abspath("E:/11100335.320.all.fil")) # read stream, row per row -pipeline.Pipeline(filename='pspm32.fil', as_stream=True) +# pipeline.Pipeline(filename=os.path.abspath("E:/11100335.320.all.fil"), as_stream=True) # read stream, n rows -pipeline.Pipeline(filename='pspm32.fil', as_stream=True, n=10) +# pipeline.Pipeline(filename='pspm32.fil', as_stream=True, n=10) diff --git a/tests/test_clipping.py b/tests/test_clipping.py index 30a8565..a9d9366 100644 --- a/tests/test_clipping.py +++ b/tests/test_clipping.py @@ -51,12 +51,9 @@ def test_filter_channels(self): When filtering samples, all channels with a relative high power should be removed """ - result_channels, result_samples = clipping.filter_channels( - self.fil_chans, np.array(self.fil_vector)) + result_channels = np.array(clipping.filter_channels(np.array(self.fil_vector))) expect_channels = np.delete(self.fil_chans, self.bad_channels) - expect_samples = np.delete(self.fil_vector, self.bad_channels, axis=1) self.assertEqual(result_channels.all(), expect_channels.all()) - self.assertEqual(result_samples.all(), expect_samples.all()) def test_filter_indv_channels(self): """ From b83c02ea855be6c59510a98bb2f1495e1d299ccf Mon Sep 17 00:00:00 2001 From: robinbaneke <13999868+robinbaneke@users.noreply.github.com> Date: Thu, 20 Dec 2018 12:41:23 +0100 Subject: [PATCH 05/14] solve pylint issues --- fourier/fourier.py | 2 -- pipeline/run.py | 4 ++-- plot/waterfall.py | 1 - 3 files changed, 2 insertions(+), 5 deletions(-) diff --git a/fourier/fourier.py b/fourier/fourier.py index fceff7f..bdcfc1f 100644 --- a/fourier/fourier.py +++ b/fourier/fourier.py @@ -80,8 +80,6 @@ def fft_vectorized(input_data, nfft=None, axis=-1): input_data = zeroes data_length = input_data.shape[0] - - print(np.log2(data_length)) if np.log2(data_length) % 1 > 0: raise ValueError("Size of input data must be a power of 2") diff --git a/pipeline/run.py b/pipeline/run.py index 34d634e..058ea9b 100644 --- a/pipeline/run.py +++ b/pipeline/run.py @@ -5,8 +5,8 @@ import pipeline # read static -pipeline.Pipeline(filename=os.path.abspath("E:/11100335.320.all.fil")) +# pipeline.Pipeline(filename=os.path.abspath("E:/11100335.320.all.fil")) # read stream, row per row # pipeline.Pipeline(filename=os.path.abspath("E:/11100335.320.all.fil"), as_stream=True) # read stream, n rows -# pipeline.Pipeline(filename='pspm32.fil', as_stream=True, n=10) +pipeline.Pipeline(filename=os.path.abspath("E:/11100335.320.all.fil"), as_stream=True, n=10) diff --git a/plot/waterfall.py b/plot/waterfall.py index 4786da8..331450b 100644 --- a/plot/waterfall.py +++ b/plot/waterfall.py @@ -89,7 +89,6 @@ def update_plot_labels(self): else: freq_range = ((center_freq - sample_freq/2)/1e6,\ (center_freq + sample_freq*(self.scans_per_sweep - 0.5))/1e6) - print(self.image) self.image.set_extent(freq_range + (0, 1)) self.fig.canvas.draw_idle() From d5bb3cb03af8138ea35dd860cdb84024c8ff5aeb Mon Sep 17 00:00:00 2001 From: robinbaneke <13999868+robinbaneke@users.noreply.github.com> Date: Thu, 20 Dec 2018 13:08:04 +0100 Subject: [PATCH 06/14] add test for pipeline --- tests/context.py | 1 + tests/test_pipeline.py | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+) create mode 100644 tests/test_pipeline.py diff --git a/tests/context.py b/tests/context.py index 0ac04d0..c7c4e7d 100644 --- a/tests/context.py +++ b/tests/context.py @@ -14,3 +14,4 @@ import dedisperse import filterbank.header as header import filterbank.filterbank as filterbank +import pipeline.pipeline as pipeline diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py new file mode 100644 index 0000000..0c70bd2 --- /dev/null +++ b/tests/test_pipeline.py @@ -0,0 +1,37 @@ +""" + pipeline.py unit tests +""" + +import unittest + +from .context import pipeline + +class TestPipeline(unittest.TestCase): + """ + Class for testing pipeline.py + """ + + def test_static_pipeline(self): + """ + When running the static pipeline, + expect a file with the time per method + """ + filename = './pspm8.fil' + pipeline.Pipeline(filename) + + def test_row_pipeline(self): + """ + When running the pipeline as stream, + expect a file with the time per method + """ + filename = './pspm8.fil' + pipeline.Pipeline(filename, as_stream=True) + + + def test_n_rows_pipeline(self): + """ + When running the pipeline as stream, + expect a file with the time per method per chunk + """ + filename = './pspm8.fil' + pipeline.Pipeline(filename, as_stream=True, n=10) From b352bea1e6bdb55b2f864d984a5cc620fb644809 Mon Sep 17 00:00:00 2001 From: robinbaneke <13999868+robinbaneke@users.noreply.github.com> Date: Thu, 20 Dec 2018 14:14:34 +0100 Subject: [PATCH 07/14] improve tests for pipeline --- pipeline/pipeline.py | 16 ++++++++-------- pipeline/run.py | 4 ++-- tests/test_pipeline.py | 15 +++++++++++++-- 3 files changed, 23 insertions(+), 12 deletions(-) diff --git a/pipeline/pipeline.py b/pipeline/pipeline.py index 75856cb..8cf7103 100644 --- a/pipeline/pipeline.py +++ b/pipeline/pipeline.py @@ -25,8 +25,8 @@ class Pipeline(): The Pipeline combines the functionality of all modules in the library. """ - # pylint: disable - def __init__(self, filename=None, as_stream=False, DM=230, scale=3, n=None): + + def __init__(self, filename=None, as_stream=False, DM=230, scale=3, n=None, size=None): """ Initialize Pipeline object @@ -36,13 +36,13 @@ def __init__(self, filename=None, as_stream=False, DM=230, scale=3, n=None): if as_stream: if n: result = self.read_n_rows(n, filename, DM, scale) - file = open("stream_filterbank.txt", "a+") + file = open("n_rows_filterbank.txt", "a+") else: result = self.read_rows(filename) - file = open("row_filterbank.txt", "a+") + file = open("rows_filterbank.txt", "a+") else: - result = self.read_static(filename, DM, scale) - file = open("static_filterbank", "a+") + result = self.read_static(filename, DM, scale, size) + file = open("static_filterbank.txt", "a+") file.write(str(result) + ",") file.close() @@ -86,7 +86,7 @@ def read_n_rows(self, n, filename, DM, scale): return stopwatch_list - def read_static(self, filename, DM, scale): + def read_static(self, filename, DM, scale, size): """ Read the filterbank data at once and measure the time per function/class @@ -96,7 +96,7 @@ def read_static(self, filename, DM, scale): 'time_ifft', 'time_fft_freq']) time_start = timer() # init filterbank - fil = filterbank.Filterbank(filename, read_all=True, time_range=(0, 49150)) + fil = filterbank.Filterbank(filename, read_all=True, time_range=(0, size)) stopwatch['time_read'] = timer() - time_start # select data time_select = timer() diff --git a/pipeline/run.py b/pipeline/run.py index 058ea9b..393ce07 100644 --- a/pipeline/run.py +++ b/pipeline/run.py @@ -5,8 +5,8 @@ import pipeline # read static -# pipeline.Pipeline(filename=os.path.abspath("E:/11100335.320.all.fil")) +pipeline.Pipeline(filename=os.path.abspath("E:/11100335.320.all.fil"), size=49150) # read stream, row per row # pipeline.Pipeline(filename=os.path.abspath("E:/11100335.320.all.fil"), as_stream=True) # read stream, n rows -pipeline.Pipeline(filename=os.path.abspath("E:/11100335.320.all.fil"), as_stream=True, n=10) +# pipeline.Pipeline(filename=os.path.abspath("E:/11100335.320.all.fil"), as_stream=True, n=10) \ No newline at end of file diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py index 0c70bd2..0e57989 100644 --- a/tests/test_pipeline.py +++ b/tests/test_pipeline.py @@ -1,7 +1,7 @@ """ pipeline.py unit tests """ - +import os import unittest from .context import pipeline @@ -17,7 +17,10 @@ def test_static_pipeline(self): expect a file with the time per method """ filename = './pspm8.fil' + new_file = './static_filterbank.txt' pipeline.Pipeline(filename) + self.assertTrue(os.path.exists(new_file)) + os.remove(new_file) def test_row_pipeline(self): """ @@ -25,8 +28,10 @@ def test_row_pipeline(self): expect a file with the time per method """ filename = './pspm8.fil' + new_file = './rows_filterbank.txt' pipeline.Pipeline(filename, as_stream=True) - + self.assertTrue(os.path.exists(new_file)) + os.remove(new_file) def test_n_rows_pipeline(self): """ @@ -34,4 +39,10 @@ def test_n_rows_pipeline(self): expect a file with the time per method per chunk """ filename = './pspm8.fil' + new_file = './n_rows_filterbank.txt' pipeline.Pipeline(filename, as_stream=True, n=10) + self.assertTrue(os.path.exists(new_file)) + os.remove(new_file) + +if __name__ == '__main__': + unittest.main() From 23cfdd92186bf614c13c4d060448320e5c46efed Mon Sep 17 00:00:00 2001 From: robin <13999868+robinbaneke@users.noreply.github.com> Date: Thu, 20 Dec 2018 17:10:03 +0100 Subject: [PATCH 08/14] add newline --- pipeline/run.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/pipeline/run.py b/pipeline/run.py index 393ce07..203cb3d 100644 --- a/pipeline/run.py +++ b/pipeline/run.py @@ -4,9 +4,10 @@ import os import pipeline -# read static -pipeline.Pipeline(filename=os.path.abspath("E:/11100335.320.all.fil"), size=49150) -# read stream, row per row -# pipeline.Pipeline(filename=os.path.abspath("E:/11100335.320.all.fil"), as_stream=True) -# read stream, n rows -# pipeline.Pipeline(filename=os.path.abspath("E:/11100335.320.all.fil"), as_stream=True, n=10) \ No newline at end of file +for i in range(10): + # # read static + pipeline.Pipeline(filename=os.path.abspath("E:/11100335.320.all.fil"), size=49150) + # read stream, row per row + pipeline.Pipeline(filename=os.path.abspath("E:/11100335.320.all.fil"), as_stream=True) + # read stream, n rows + pipeline.Pipeline(filename=os.path.abspath("E:/11100335.320.all.fil"), as_stream=True, n=10) From 3930ecdfe90c684695e9b1e5b4595377b3060aa1 Mon Sep 17 00:00:00 2001 From: robin <13999868+robinbaneke@users.noreply.github.com> Date: Sat, 22 Dec 2018 10:47:08 +0100 Subject: [PATCH 09/14] changes to run.py --- pipeline/run.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/pipeline/run.py b/pipeline/run.py index 203cb3d..53a9c98 100644 --- a/pipeline/run.py +++ b/pipeline/run.py @@ -4,10 +4,12 @@ import os import pipeline -for i in range(10): - # # read static - pipeline.Pipeline(filename=os.path.abspath("E:/11100335.320.all.fil"), size=49150) +fil_name = os.path.abspath("E:/11100335.320.all.fil") + +for i in range(1000): + # read static + pipeline.Pipeline(filename=fil_name, size=49150) # read stream, row per row - pipeline.Pipeline(filename=os.path.abspath("E:/11100335.320.all.fil"), as_stream=True) + pipeline.Pipeline(filename=fil_name, as_stream=True) # read stream, n rows - pipeline.Pipeline(filename=os.path.abspath("E:/11100335.320.all.fil"), as_stream=True, n=10) + pipeline.Pipeline(filename=fil_name, as_stream=True, n=10) From 53f628ae638488e0bb62cbaac37379d9574724e7 Mon Sep 17 00:00:00 2001 From: robin <13999868+robinbaneke@users.noreply.github.com> Date: Sat, 22 Dec 2018 16:00:11 +0100 Subject: [PATCH 10/14] move pipeline runner to examples --- examples/run_pipeline.py | 25 +++++++++++++++++++++++++ pipeline/pipeline.py | 4 ++-- pipeline/run.py | 15 --------------- 3 files changed, 27 insertions(+), 17 deletions(-) create mode 100644 examples/run_pipeline.py delete mode 100644 pipeline/run.py diff --git a/examples/run_pipeline.py b/examples/run_pipeline.py new file mode 100644 index 0000000..8326a43 --- /dev/null +++ b/examples/run_pipeline.py @@ -0,0 +1,25 @@ +""" + Script for running the pipeline +""" +#pylint: disable-all +import os,sys,inspect +CURRENT_DIR = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) +PARENT_DIR = os.path.dirname(CURRENT_DIR) +sys.path.insert(0, PARENT_DIR) +from pipeline.pipeline import Pipeline + +# init filterbank filename +fil_name = "./pspm.fil" +# init filterbank sample size +sample_size = 1534 +# init times the pipeline should run +n_times = 1000 + +# run the filterbank n times +for i in range(n): + # read static + Pipeline(filename=fil_name, size=sample_size) + # read stream, row per row + Pipeline(filename=fil_name, as_stream=True) + # read stream, n rows + Pipeline(filename=fil_name, as_stream=True, n=10) diff --git a/pipeline/pipeline.py b/pipeline/pipeline.py index 8cf7103..9540c21 100644 --- a/pipeline/pipeline.py +++ b/pipeline/pipeline.py @@ -1,5 +1,5 @@ """ - Pipeline for all modules + Pipeline for running all the modules in order """ # pylint: disable=wrong-import-position import os @@ -20,7 +20,7 @@ # pylint: disable=invalid-name # pylint: disable=no-self-use -class Pipeline(): +class Pipeline: """ The Pipeline combines the functionality of all modules in the library. diff --git a/pipeline/run.py b/pipeline/run.py deleted file mode 100644 index 53a9c98..0000000 --- a/pipeline/run.py +++ /dev/null @@ -1,15 +0,0 @@ -""" - Script for running the pipeline -""" -import os -import pipeline - -fil_name = os.path.abspath("E:/11100335.320.all.fil") - -for i in range(1000): - # read static - pipeline.Pipeline(filename=fil_name, size=49150) - # read stream, row per row - pipeline.Pipeline(filename=fil_name, as_stream=True) - # read stream, n rows - pipeline.Pipeline(filename=fil_name, as_stream=True, n=10) From 4561cefd7d8b69b6105efe2b5d56d830cbcebb5a Mon Sep 17 00:00:00 2001 From: robin <13999868+robinbaneke@users.noreply.github.com> Date: Wed, 16 Jan 2019 15:41:49 +0100 Subject: [PATCH 11/14] add docs for pipeline --- README.md | 6 ++++++ docs/pipeline.md | 10 ++++++++++ 2 files changed, 16 insertions(+) create mode 100644 docs/pipeline.md diff --git a/README.md b/README.md index 9358f35..95c1cf4 100644 --- a/README.md +++ b/README.md @@ -45,3 +45,9 @@ Creating a free and open source framework that contains the generic algorithms a 2. [Filter samples](docs/clipping.md#62-filter-samples) 3. [Filter channels](docs/clipping.md#63-filter-channels) 4. [Filter individual channels](docs/clipping.md#64-filter-individual-channels) +7. [Pipeline](docs/pipeline.md) + 1. [Introduction](docs/pipeline.md#71-introduction) + 2. [Read rows](docs/pipeline.md#72-read-rows) + 3. [Read n rows](docs/pipeline.md#73-read-n-rows) + 3. [Read static](docs/pipeline.md#74-read-static) + 4. [Measure methods](docs/pipeline.md#75-measure-methods) diff --git a/docs/pipeline.md b/docs/pipeline.md new file mode 100644 index 0000000..025846d --- /dev/null +++ b/docs/pipeline.md @@ -0,0 +1,10 @@ +# 7. Pipeline + +## 7.1 Introduction + +The Pipeline module is used to execute the different modules in a specific order. +There are currently three different options for running the pipeline. +These options include: +* read multiple rows +* read single rows +* read all rows From 01669cb244f311c945fc963128f242e94637a732 Mon Sep 17 00:00:00 2001 From: robin <13999868+robinbaneke@users.noreply.github.com> Date: Wed, 16 Jan 2019 15:42:11 +0100 Subject: [PATCH 12/14] update docs --- docs/pipeline.md | 48 +++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 45 insertions(+), 3 deletions(-) diff --git a/docs/pipeline.md b/docs/pipeline.md index 025846d..45e5b3c 100644 --- a/docs/pipeline.md +++ b/docs/pipeline.md @@ -4,7 +4,49 @@ The Pipeline module is used to execute the different modules in a specific order. There are currently three different options for running the pipeline. + These options include: -* read multiple rows -* read single rows -* read all rows +* read multiple rows, `read_n_rows` +* read single rows, `read_rows` +* read all rows, `read_static` + +The constructor of the pipeline module will recognize which method is fit for running which method, by looking at the given arguments to the constructor. + +| Parameter | Description | +| --- | --- | +| filename | The path to the filterbank file. | +| as_stream | This parameter decides whether the filterbank should be read as stream. | +| DM | The dispersion measure (DM) is used for performing dedispersion. | +| scale | The scale is used for performing downsampling the time series. | +| n | The `n` is the rowsize of chunks for reading the filterbank as stream. | +| size | The size parameter is used for deciding the size of the filterbank. | + +After deciding which method to run for running the filterbank in a pipeline, it will measure the time it takes to run each method. At the end it will append the results to a txt file. + +## 7.2 Read rows + +The `read_rows` method reads the Filterbank data row per row. Because it only reads the filterbank per row, it is unable to execute most methods. The alternative for this method is the `read_n_rows` method, which is able to run all methods. + +``` +pipeline.Pipeline(, as_stream=True) +``` + +## 7.3 Read n rows + +The `read_n_rows` method first splits all the filterbank data into chunks of n samples. After splitting the filterbank data in chunks, it will run the different modules of the pipeline for each chunk. + +``` +pipeline.Pipeline(, n= , as_stream=True) +``` + +## 7.4 Read static + +The `read_static` method reads the entire filterbank at once. If the filterbank file is too large for this method, the alternative is using `read_n_rows`. + +``` +pipeline.Pipeline() +``` + +## 7.5 Measure methods + +The `measure_methods` is ran for each of the above methods, and calculates the time it takes to run each of the different methods. From a2b7641d29a2133136cda5dba5a7882315af040b Mon Sep 17 00:00:00 2001 From: robin <13999868+robinbaneke@users.noreply.github.com> Date: Sat, 19 Jan 2019 19:34:16 +0100 Subject: [PATCH 13/14] improve reading stream for pipeline --- docs/pipeline.md | 11 +++++++---- examples/run_pipeline.py | 8 ++++---- filterbank/filterbank.py | 16 +++++++--------- fourier/fourier.py | 1 + 4 files changed, 19 insertions(+), 17 deletions(-) diff --git a/docs/pipeline.md b/docs/pipeline.md index 45e5b3c..36e10a0 100644 --- a/docs/pipeline.md +++ b/docs/pipeline.md @@ -21,7 +21,7 @@ The constructor of the pipeline module will recognize which method is fit for ru | n | The `n` is the rowsize of chunks for reading the filterbank as stream. | | size | The size parameter is used for deciding the size of the filterbank. | -After deciding which method to run for running the filterbank in a pipeline, it will measure the time it takes to run each method. At the end it will append the results to a txt file. +After deciding which method to run for running the filterbank in a pipeline, it will measure the time it takes to run each method using `measure_method`. After running all the different methods, the constructor will append the results (a dictionary) to a txt file. ## 7.2 Read rows @@ -33,7 +33,9 @@ pipeline.Pipeline(, as_stream=True) ## 7.3 Read n rows -The `read_n_rows` method first splits all the filterbank data into chunks of n samples. After splitting the filterbank data in chunks, it will run the different modules of the pipeline for each chunk. +The `read_n_rows` method first splits all the filterbank data into chunks of n samples. After splitting the filterbank data in chunks, it will run the different modules of the pipeline for each chunk. The remaining data, that which does not fit into the sample size, is currently ignored. + +The `n` or sample size should be a power of 2 multiplied with the given scale for the downsampling. ``` pipeline.Pipeline(, n= , as_stream=True) @@ -41,7 +43,7 @@ pipeline.Pipeline(, n= , as_stream=True) ## 7.4 Read static -The `read_static` method reads the entire filterbank at once. If the filterbank file is too large for this method, the alternative is using `read_n_rows`. +The `read_static` method reads the entire filterbank at once, and applies each method to the entire dataset. If the filterbank file is too large for running it in-memory, the alternative is using `read_n_rows`. ``` pipeline.Pipeline() @@ -49,4 +51,5 @@ pipeline.Pipeline() ## 7.5 Measure methods -The `measure_methods` is ran for each of the above methods, and calculates the time it takes to run each of the different methods. +The `measure_methods` is ran for each of the above methods, and calculates the time it takes to run each of the different methods. For each method it will create a key using the name of the method, and save the time it took to run the method as a value. +At the end, it will returns a dictionary with all the keys and values. diff --git a/examples/run_pipeline.py b/examples/run_pipeline.py index 8326a43..2f12df7 100644 --- a/examples/run_pipeline.py +++ b/examples/run_pipeline.py @@ -11,15 +11,15 @@ # init filterbank filename fil_name = "./pspm.fil" # init filterbank sample size -sample_size = 1534 +sample_size = 49152 # init times the pipeline should run -n_times = 1000 +n_times = 10 # run the filterbank n times -for i in range(n): +for i in range(n_times): # read static Pipeline(filename=fil_name, size=sample_size) # read stream, row per row Pipeline(filename=fil_name, as_stream=True) # read stream, n rows - Pipeline(filename=fil_name, as_stream=True, n=10) + Pipeline(filename=fil_name, as_stream=True, n=sample_size) diff --git a/filterbank/filterbank.py b/filterbank/filterbank.py index 2216cb8..a3cf4e1 100644 --- a/filterbank/filterbank.py +++ b/filterbank/filterbank.py @@ -25,8 +25,7 @@ def __init__(self, filename, freq_range=None, time_range=None, read_all=False): """ if not os.path.isfile(filename): raise FileNotFoundError(filename) - # iterator for stream - self.stream_iter = 0 + # header values self.data, self.freqs, self.n_chans_selected = None, None, None self.filename = filename self.header = read_header(filename) @@ -51,6 +50,8 @@ def __init__(self, filename, freq_range=None, time_range=None, read_all=False): self.fil.seek(int(self.ii_start * self.n_bytes * self.n_ifs * self.n_chans), 1) # find possible channels self.i_0, self.i_1 = self.setup_chans(freq_range) + # number if stream iterations + self.stream_iter = (self.n_samples * self.n_ifs) # read filterbank at once if read_all: self.read_filterbank() @@ -84,8 +85,8 @@ def next_row(self): returns False if EOF """ - if self.stream_iter < (self.n_samples * self.n_ifs): - self.stream_iter += 1 + if self.stream_iter > 0: + self.stream_iter -= 1 # skip bytes self.fil.seek(self.n_bytes * self.i_0, 1) # read row of data @@ -104,11 +105,8 @@ def next_n_rows(self, n_rows): returns False if EOF """ - if self.stream_iter < (self.n_samples * self.n_ifs): - # more rows requested than available - if self.stream_iter + n_rows >= self.n_samples * self.n_ifs: - n_rows = self.n_samples * self.n_ifs - self.stream_iter - self.stream_iter += n_rows + if self.stream_iter - n_rows > 0: + self.stream_iter -= n_rows # init array of n rows data = np.zeros((n_rows, self.n_chans_selected), dtype=self.dd_type) for row in range(n_rows): diff --git a/fourier/fourier.py b/fourier/fourier.py index bdcfc1f..c9c83fc 100644 --- a/fourier/fourier.py +++ b/fourier/fourier.py @@ -80,6 +80,7 @@ def fft_vectorized(input_data, nfft=None, axis=-1): input_data = zeroes data_length = input_data.shape[0] + print(data_length) if np.log2(data_length) % 1 > 0: raise ValueError("Size of input data must be a power of 2") From f2371b06c9cb00944c3595d0673ef1f1368d317a Mon Sep 17 00:00:00 2001 From: robin <13999868+robinbaneke@users.noreply.github.com> Date: Tue, 22 Jan 2019 10:58:55 +0100 Subject: [PATCH 14/14] minor changes to pipeline --- examples/run_pipeline.py | 2 +- fourier/fourier.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/examples/run_pipeline.py b/examples/run_pipeline.py index 2f12df7..7bc4ca1 100644 --- a/examples/run_pipeline.py +++ b/examples/run_pipeline.py @@ -9,7 +9,7 @@ from pipeline.pipeline import Pipeline # init filterbank filename -fil_name = "./pspm.fil" +fil_name = os.path.abspath("filterbank.fil") # init filterbank sample size sample_size = 49152 # init times the pipeline should run diff --git a/fourier/fourier.py b/fourier/fourier.py index c9c83fc..bdcfc1f 100644 --- a/fourier/fourier.py +++ b/fourier/fourier.py @@ -80,7 +80,6 @@ def fft_vectorized(input_data, nfft=None, axis=-1): input_data = zeroes data_length = input_data.shape[0] - print(data_length) if np.log2(data_length) % 1 > 0: raise ValueError("Size of input data must be a power of 2")