From 3e10195fd4cc2c37b14e65e63fab36ae50f1d6a0 Mon Sep 17 00:00:00 2001
From: robin <13999868+robinbaneke@users.noreply.github.com>
Date: Wed, 19 Dec 2018 15:07:07 +0100
Subject: [PATCH 01/14] create pipeline module

---
 clipping/clipping.py     |  4 +-
 filterbank/__init__.py   |  2 +
 fourier/__init__.py      |  2 +-
 pipeline/__init__.py     |  1 +
 pipeline/pipeline.py     | 82 ++++++++++++++++++++++++++++++++++++++++
 pipeline/run.py          | 11 ++++++
 tests/test_benchmark.py  | 39 +++++++++++++++++++
 timeseries/timeseries.py |  2 +-
 8 files changed, 139 insertions(+), 4 deletions(-)
 create mode 100644 pipeline/__init__.py
 create mode 100644 pipeline/pipeline.py
 create mode 100644 pipeline/run.py
 create mode 100644 tests/test_benchmark.py

diff --git a/clipping/clipping.py b/clipping/clipping.py
index d78d47e..6b3b978 100644
--- a/clipping/clipping.py
+++ b/clipping/clipping.py
@@ -24,7 +24,7 @@ def filter_samples(samples):
         Calulate mean power of all frequencies per time sample
         and remove samples with significantly high power
     """
-    factor = 1.3
+    factor = 1.01
     new_samples = []
     # calculate mean intensity per sample
     avg_sample = np.sum(samples)/len(samples)
@@ -40,7 +40,7 @@ def filter_channels(channels, samples):
         Calculate mean power of all time samples per frequency
         and remove frequencies with significantly high power
     """
-    factor = 1.3
+    factor = 1.01
     bad_channels = []
     # calculate the mean power per channel
     avg_power_chan = samples.mean(axis=0)
diff --git a/filterbank/__init__.py b/filterbank/__init__.py
index e69de29..2c9bf1a 100644
--- a/filterbank/__init__.py
+++ b/filterbank/__init__.py
@@ -0,0 +1,2 @@
+from .filterbank import Filterbank
+from .filterbank import read_header
\ No newline at end of file
diff --git a/fourier/__init__.py b/fourier/__init__.py
index 8b4b1fc..a3811b7 100644
--- a/fourier/__init__.py
+++ b/fourier/__init__.py
@@ -1,4 +1,4 @@
 """
     import file for fourier.py
 """
-from .fourier import fft_freq, fft_vectorized, dft_slow, fft_matrix, fft_shift
+from .fourier import fft_freq, fft_vectorized, dft_slow, fft_matrix, fft_shift, ifft
diff --git a/pipeline/__init__.py b/pipeline/__init__.py
new file mode 100644
index 0000000..f842286
--- /dev/null
+++ b/pipeline/__init__.py
@@ -0,0 +1 @@
+from .pipeline import Pipeline
\ No newline at end of file
diff --git a/pipeline/pipeline.py b/pipeline/pipeline.py
new file mode 100644
index 0000000..732b180
--- /dev/null
+++ b/pipeline/pipeline.py
@@ -0,0 +1,82 @@
+"""
+    Pipeline for all modules
+"""
+import os
+import sys
+import inspect
+CURRENT_DIR = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
+PARENT_DIR = os.path.dirname(CURRENT_DIR)
+sys.path.insert(0, PARENT_DIR)
+import filterbank.filterbank as filterbank
+import clipping
+import dedisperse
+import timeseries.timeseries as timeseries
+import fourier
+import time
+
+class Pipeline():
+    """
+        The Pipeline combines the functionality of all modules
+        in the library.
+    """
+
+    def __init__(self, as_stream=False, filename=None, DM=230, scale=3):
+        """
+            Initialize Pipeline object
+
+            Args:
+                as_stream, read the filterbank data as stream
+        """
+        if as_stream:
+            self.read_stream(filename, DM, scale)
+        else:
+            self.read_static(filename, DM, scale)
+
+
+    def read_stream(self, filename, DM, scale):
+        """
+            Read the filterbank data as stream
+        """
+
+
+    
+    def read_static(self, filename, DM, scale):
+        """
+            Read the filterbank data at once
+        """
+        stopwatch = dict.fromkeys(['time_read', 'time_select', 'time_clipping', 'time_dedisp', 
+                                   'time_t_series', 'time_downsample', 'time_fft_vect', 'time_dft'
+                                   'time_ifft', 'time_fft_freq'])
+        time_start = time.time()
+        # init filterbank
+        fil = filterbank.Filterbank(filename, read_all=True)
+        stopwatch['time_read'] = time.time() - time_start
+        # select data
+        freqs, fil_data = fil.select_data()
+        stopwatch['time_select'] = time.time() - stopwatch['time_read']
+        # clipping
+        _, _ = clipping.clipping(freqs, fil_data)
+        stopwatch['time_clipping'] = time.time() - stopwatch['time_select']
+        # dedisperse
+        fil_data = dedisperse.dedisperse(fil_data, DM)
+        stopwatch['time_dedisp'] = time.time() - stopwatch['time_clipping']
+        # timeseries
+        time_series = timeseries.Timeseries(fil_data)
+        stopwatch['time_t_series'] = time.time() - stopwatch['time_dedisp']
+        # downsample
+        time_series = time_series.downsample(scale)
+        stopwatch['time_downsample'] = time.time()- stopwatch['time_t_series']
+        # fft vect
+        fourier.fft_vectorized(time_series)
+        stopwatch['time_fft_vect'] = time.time() - stopwatch['time_downsample']
+        # dft
+        fourier.dft_slow(time_series)
+        stopwatch['time_dft'] = time.time() - stopwatch['time_fft_vect']
+        # ifft
+        fourier.ifft(time_series)
+        stopwatch['time_ifft'] = time.time() - stopwatch['time_dft']
+        # fft freq
+        fourier.fft_freq(10)
+        stopwatch['time_fft_freq'] = time.time() - stopwatch['time_ifft']
+        print(stopwatch)
+
diff --git a/pipeline/run.py b/pipeline/run.py
new file mode 100644
index 0000000..71f1a50
--- /dev/null
+++ b/pipeline/run.py
@@ -0,0 +1,11 @@
+"""
+    Script for running the pipeline
+"""
+
+import pipeline
+
+
+
+pipeline.Pipeline(filename='pspm32.fil')
+
+
diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py
new file mode 100644
index 0000000..e3606b2
--- /dev/null
+++ b/tests/test_benchmark.py
@@ -0,0 +1,39 @@
+"""
+    benchmarking all functions, except for plots
+"""
+
+import time
+import unittest
+
+from .context import clipping, dedisperse, fourier, filterbank, header
+
+
+class TestBenchmark(unittest.TestCase):
+    """
+        Testclass for benchmarking all methods
+    """
+
+    functions = clipping.clipping, dedisperse.dedisperse, fourier.dft_slow, fourier.fft_freq, fourier.fft_matrix, fourier.fft_shift, fourier.fft_vectorized, filterbank.Filterbank, filterbank.read_header
+
+    times = {f.__name__: [] for  f in functions}
+
+    # def test_benchmark(self):
+    #     for func in enumerate(self.functions):
+    #         t0 = time.time()
+    #         # print(func.__name__)
+    #         t1 = time.time()
+    #         # print(t1-t0)
+    #         # self.times[func.__name__].append((t1-t0) * 1000)
+
+
+    def test_benchmark2(self):
+        for _, func in enumerate(self.functions):
+            t0 = time.time()
+            func()
+            t1 = time.time()
+            self.times[func.__name__].append((t1 - t0) * 1000)
+
+
+if __name__ == '__main__':
+    unittest.main()
+
diff --git a/timeseries/timeseries.py b/timeseries/timeseries.py
index 3f2d04c..e0ba981 100644
--- a/timeseries/timeseries.py
+++ b/timeseries/timeseries.py
@@ -29,7 +29,6 @@ def downsample(self, sample_scale):
         return self.timeseries
 
 
-
     def get(self):
         """
            Returns the current timeseries object which,
@@ -37,6 +36,7 @@ def get(self):
         """
         return self.timeseries
 
+
     def from_filterbank(self, filterbank_object):
         """
                Initializes timeseries object from filterbank object,

From 63dfa4af4cf32886ee2ced67d76868b17e9627bb Mon Sep 17 00:00:00 2001
From: robin <13999868+robinbaneke@users.noreply.github.com>
Date: Wed, 19 Dec 2018 15:36:59 +0100
Subject: [PATCH 02/14] add time measurements to pipeline

---
 pipeline/pipeline.py | 48 ++++++++++++++++++++++++++++++++++----------
 1 file changed, 37 insertions(+), 11 deletions(-)

diff --git a/pipeline/pipeline.py b/pipeline/pipeline.py
index 732b180..34b2357 100644
--- a/pipeline/pipeline.py
+++ b/pipeline/pipeline.py
@@ -13,6 +13,8 @@
 import timeseries.timeseries as timeseries
 import fourier
 import time
+import datetime
+import timeit
 
 class Pipeline():
     """
@@ -37,7 +39,14 @@ def read_stream(self, filename, DM, scale):
         """
             Read the filterbank data as stream
         """
+        stopwatch = dict.fromkeys(['time_read', 'time_select', 'time_clipping', 'time_dedisp', 
+                                   'time_t_series', 'time_downsample', 'time_fft_vect', 'time_dft'
+                                   'time_ifft', 'time_fft_freq'])
+        # init filterbank as stream
+        fil = filterbank.Filterbank(filename)
 
+        fil.select_data()
+        
 
     
     def read_static(self, filename, DM, scale):
@@ -47,36 +56,53 @@ def read_static(self, filename, DM, scale):
         stopwatch = dict.fromkeys(['time_read', 'time_select', 'time_clipping', 'time_dedisp', 
                                    'time_t_series', 'time_downsample', 'time_fft_vect', 'time_dft'
                                    'time_ifft', 'time_fft_freq'])
-        time_start = time.time()
+        time_start = datetime.datetime.now()
+
+        a = datetime.datetime.now()
+        b = datetime.datetime.now()
+
+        time_took = b - a
+
+        print(time_took.seconds)
         # init filterbank
         fil = filterbank.Filterbank(filename, read_all=True)
-        stopwatch['time_read'] = time.time() - time_start
+        stopwatch['time_read'] = time_start - datetime.datetime.now()
         # select data
         freqs, fil_data = fil.select_data()
-        stopwatch['time_select'] = time.time() - stopwatch['time_read']
+        time_select = datetime.datetime.now() - stopwatch['time_read']
+        stopwatch['time_read'] = time_select.microsecond
         # clipping
         _, _ = clipping.clipping(freqs, fil_data)
-        stopwatch['time_clipping'] = time.time() - stopwatch['time_select']
+        time_clipping = datetime.datetime.now() - time_select
+        stopwatch['time_clipping'] = time_clipping.microseconds
+        # stopwatch['time_clipping'] = stopwatch['time_clipping'].microseconds
         # dedisperse
         fil_data = dedisperse.dedisperse(fil_data, DM)
-        stopwatch['time_dedisp'] = time.time() - stopwatch['time_clipping']
+        time_dedisp = datetime.datetime.now() - time_clipping
+        stopwatch['time_dedisp'] = time_dedisp.microseconds
         # timeseries
         time_series = timeseries.Timeseries(fil_data)
-        stopwatch['time_t_series'] = time.time() - stopwatch['time_dedisp']
+        time_t_series = datetime.datetime.now() - time_dedisp
+        stopwatch['time_t_series'] = time_t_series.microseconds
         # downsample
         time_series = time_series.downsample(scale)
-        stopwatch['time_downsample'] = time.time()- stopwatch['time_t_series']
+        time_downsample = datetime.datetime.now() - time_t_series
+        stopwatch['time_downsample'] = time_downsample.microseconds
         # fft vect
         fourier.fft_vectorized(time_series)
-        stopwatch['time_fft_vect'] = time.time() - stopwatch['time_downsample']
+        time_fft_vect = datetime.datetime.now() - time_downsample
+        stopwatch['time_fft_vect'] = time_fft_vect.microseconds
         # dft
         fourier.dft_slow(time_series)
-        stopwatch['time_dft'] = time.time() - stopwatch['time_fft_vect']
+        time_dft = datetime.datetime.now() - time_fft_vect
+        stopwatch['time_dft'] = time_dft.microseconds
         # ifft
         fourier.ifft(time_series)
-        stopwatch['time_ifft'] = time.time() - stopwatch['time_dft']
+        time_ifft = datetime.datetime.now() - time_dft
+        stopwatch['time_ifft'] = time_ifft.microseconds
         # fft freq
         fourier.fft_freq(10)
-        stopwatch['time_fft_freq'] = time.time() - stopwatch['time_ifft']
+        time_fft_freq = datetime.datetime.now() - time_ifft
+        stopwatch['time_fft_freq'] = time_fft_freq.microseconds
         print(stopwatch)
 

From d9e1d295d5a532994626789de97abe7d2a81794d Mon Sep 17 00:00:00 2001
From: robin <13999868+robinbaneke@users.noreply.github.com>
Date: Wed, 19 Dec 2018 22:18:40 +0100
Subject: [PATCH 03/14] improve pipeline module

---
 filterbank/__init__.py  |   5 +-
 pipeline/__init__.py    |   5 +-
 pipeline/pipeline.py    | 135 +++++++++++++++++++++++++---------------
 pipeline/run.py         |   9 +--
 tests/test_benchmark.py |  39 ------------
 timeseries/__init__.py  |   4 ++
 6 files changed, 103 insertions(+), 94 deletions(-)
 delete mode 100644 tests/test_benchmark.py

diff --git a/filterbank/__init__.py b/filterbank/__init__.py
index 2c9bf1a..747d31a 100644
--- a/filterbank/__init__.py
+++ b/filterbank/__init__.py
@@ -1,2 +1,5 @@
+"""
+    Export filterbank methods
+"""
 from .filterbank import Filterbank
-from .filterbank import read_header
\ No newline at end of file
+from .filterbank import read_header
diff --git a/pipeline/__init__.py b/pipeline/__init__.py
index f842286..c18533d 100644
--- a/pipeline/__init__.py
+++ b/pipeline/__init__.py
@@ -1 +1,4 @@
-from .pipeline import Pipeline
\ No newline at end of file
+"""
+    Export pipeline methods
+"""
+from .pipeline import Pipeline
diff --git a/pipeline/pipeline.py b/pipeline/pipeline.py
index 34b2357..7920bee 100644
--- a/pipeline/pipeline.py
+++ b/pipeline/pipeline.py
@@ -1,28 +1,32 @@
 """
     Pipeline for all modules
 """
+# pylint: disable=wrong-import-position
 import os
 import sys
 import inspect
 CURRENT_DIR = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
 PARENT_DIR = os.path.dirname(CURRENT_DIR)
 sys.path.insert(0, PARENT_DIR)
-import filterbank.filterbank as filterbank
+from timeit import default_timer as timer
+import filterbank.filterbank
+import timeseries.timeseries
 import clipping
 import dedisperse
-import timeseries.timeseries as timeseries
 import fourier
-import time
-import datetime
-import timeit
+
+# pylint: disable=too-many-locals
+# pylint: disable=too-many-arguments
+# pylint: disable=invalid-name
+# pylint: disable=no-self-use
 
 class Pipeline():
     """
         The Pipeline combines the functionality of all modules
         in the library.
     """
-
-    def __init__(self, as_stream=False, filename=None, DM=230, scale=3):
+    # pylint: disable
+    def __init__(self, filename=None, as_stream=False, DM=230, scale=3, n=None):
         """
             Initialize Pipeline object
 
@@ -30,79 +34,112 @@ def __init__(self, as_stream=False, filename=None, DM=230, scale=3):
                 as_stream, read the filterbank data as stream
         """
         if as_stream:
-            self.read_stream(filename, DM, scale)
+            if n:
+                result = self.read_n_rows(n, filename, DM, scale)
+                file = open("stream_filterbank.txt", "a+")
+            else:
+                result = self.read_rows(filename)
+                file = open("row_filterbank.txt", "a+")
         else:
-            self.read_static(filename, DM, scale)
+            result = self.read_static(filename, DM, scale)
+            file = open("static_filterbank", "a+")
+        file.write(str(result) + ",")
+        file.close()
 
 
-    def read_stream(self, filename, DM, scale):
+    def read_rows(self, filename):
         """
             Read the filterbank data as stream
+            and measure the time
         """
-        stopwatch = dict.fromkeys(['time_read', 'time_select', 'time_clipping', 'time_dedisp', 
-                                   'time_t_series', 'time_downsample', 'time_fft_vect', 'time_dft'
-                                   'time_ifft', 'time_fft_freq'])
         # init filterbank as stream
         fil = filterbank.Filterbank(filename)
+        time_start = timer()
+        while True:
+            fil_data = fil.next_row()
+            if isinstance(fil_data, bool):
+                break
+        time_stop = timer() - time_start
+        return time_stop
+
+    def read_n_rows(self, n, filename, DM, scale):
+        """
+            Read the filterbank data as stream
+            and measure the time
+        """
+        fil = filterbank.Filterbank(filename)
+        stopwatch_list = list()
+        while True:
+            stopwatch = dict.fromkeys(['time_read', 'time_select', 'time_clipping', 'time_dedisp',
+                                       'time_t_series', 'time_downsample', 'time_fft_vect',
+                                       'time_dft', 'time_ifft', 'time_fft_freq'])
+            time_start = timer()
+            fil_data = fil.next_n_rows(n)
+            # break if EOF
+            if isinstance(fil_data, bool):
+                break
+            stopwatch['time_read'] = timer() - time_start
+            # run methods
+            stopwatch = self.measure_methods(stopwatch, fil_data, fil.freqs, DM, scale)
+            stopwatch_list.append(stopwatch)
+        return stopwatch_list
 
-        fil.select_data()
-        
 
-    
     def read_static(self, filename, DM, scale):
         """
             Read the filterbank data at once
+            and measure the time per function/class
         """
-        stopwatch = dict.fromkeys(['time_read', 'time_select', 'time_clipping', 'time_dedisp', 
-                                   'time_t_series', 'time_downsample', 'time_fft_vect', 'time_dft'
+        stopwatch = dict.fromkeys(['time_read', 'time_select', 'time_clipping', 'time_dedisp',
+                                   'time_t_series', 'time_downsample', 'time_fft_vect', 'time_dft',
                                    'time_ifft', 'time_fft_freq'])
-        time_start = datetime.datetime.now()
-
-        a = datetime.datetime.now()
-        b = datetime.datetime.now()
-
-        time_took = b - a
-
-        print(time_took.seconds)
+        time_start = timer()
         # init filterbank
-        fil = filterbank.Filterbank(filename, read_all=True)
-        stopwatch['time_read'] = time_start - datetime.datetime.now()
+        fil = filterbank.Filterbank(filename, read_all=True, time_range=(0, 10000))
+        stopwatch['time_read'] = timer() - time_start
         # select data
+        time_select = timer()
         freqs, fil_data = fil.select_data()
-        time_select = datetime.datetime.now() - stopwatch['time_read']
-        stopwatch['time_read'] = time_select.microsecond
+        stopwatch['time_select'] = timer() - time_select
+        # run methods
+        stopwatch = self.measure_methods(stopwatch, fil_data, freqs, DM, scale)
+        return stopwatch
+
+
+    def measure_methods(self, stopwatch, fil_data, freqs, DM, scale):
+        """
+            Run and time all methods/modules
+        """
         # clipping
+        time_clipping = timer()
         _, _ = clipping.clipping(freqs, fil_data)
-        time_clipping = datetime.datetime.now() - time_select
-        stopwatch['time_clipping'] = time_clipping.microseconds
-        # stopwatch['time_clipping'] = stopwatch['time_clipping'].microseconds
+        stopwatch['time_clipping'] = timer() - time_clipping
         # dedisperse
+        time_dedisp = timer()
         fil_data = dedisperse.dedisperse(fil_data, DM)
-        time_dedisp = datetime.datetime.now() - time_clipping
-        stopwatch['time_dedisp'] = time_dedisp.microseconds
+        stopwatch['time_dedisp'] = timer() - time_dedisp
         # timeseries
+        time_t_series = timer()
         time_series = timeseries.Timeseries(fil_data)
-        time_t_series = datetime.datetime.now() - time_dedisp
-        stopwatch['time_t_series'] = time_t_series.microseconds
+        stopwatch['time_t_series'] = timer() - time_t_series
         # downsample
+        time_downsamp = timer()
         time_series = time_series.downsample(scale)
-        time_downsample = datetime.datetime.now() - time_t_series
-        stopwatch['time_downsample'] = time_downsample.microseconds
+        stopwatch['time_downsample'] = timer() - time_downsamp
         # fft vect
+        time_fft_vect = timer()
         fourier.fft_vectorized(time_series)
-        time_fft_vect = datetime.datetime.now() - time_downsample
-        stopwatch['time_fft_vect'] = time_fft_vect.microseconds
+        stopwatch['time_fft_vect'] = timer() - time_fft_vect
         # dft
+        time_dft = timer()
         fourier.dft_slow(time_series)
-        time_dft = datetime.datetime.now() - time_fft_vect
-        stopwatch['time_dft'] = time_dft.microseconds
+        stopwatch['time_dft'] = timer() - time_dft
         # ifft
+        time_ifft = timer()
         fourier.ifft(time_series)
-        time_ifft = datetime.datetime.now() - time_dft
-        stopwatch['time_ifft'] = time_ifft.microseconds
+        stopwatch['time_ifft'] = timer() - time_ifft
         # fft freq
+        time_fft_freq = timer()
         fourier.fft_freq(10)
-        time_fft_freq = datetime.datetime.now() - time_ifft
-        stopwatch['time_fft_freq'] = time_fft_freq.microseconds
-        print(stopwatch)
-
+        stopwatch['time_fft_freq'] = timer() - time_fft_freq
+        return stopwatch
diff --git a/pipeline/run.py b/pipeline/run.py
index 71f1a50..bf9bed4 100644
--- a/pipeline/run.py
+++ b/pipeline/run.py
@@ -4,8 +4,9 @@
 
 import pipeline
 
-
-
+# read static
 pipeline.Pipeline(filename='pspm32.fil')
-
-
+# read stream, row per row
+pipeline.Pipeline(filename='pspm32.fil', as_stream=True)
+# read stream, n rows
+pipeline.Pipeline(filename='pspm32.fil', as_stream=True, n=10)
diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py
deleted file mode 100644
index e3606b2..0000000
--- a/tests/test_benchmark.py
+++ /dev/null
@@ -1,39 +0,0 @@
-"""
-    benchmarking all functions, except for plots
-"""
-
-import time
-import unittest
-
-from .context import clipping, dedisperse, fourier, filterbank, header
-
-
-class TestBenchmark(unittest.TestCase):
-    """
-        Testclass for benchmarking all methods
-    """
-
-    functions = clipping.clipping, dedisperse.dedisperse, fourier.dft_slow, fourier.fft_freq, fourier.fft_matrix, fourier.fft_shift, fourier.fft_vectorized, filterbank.Filterbank, filterbank.read_header
-
-    times = {f.__name__: [] for  f in functions}
-
-    # def test_benchmark(self):
-    #     for func in enumerate(self.functions):
-    #         t0 = time.time()
-    #         # print(func.__name__)
-    #         t1 = time.time()
-    #         # print(t1-t0)
-    #         # self.times[func.__name__].append((t1-t0) * 1000)
-
-
-    def test_benchmark2(self):
-        for _, func in enumerate(self.functions):
-            t0 = time.time()
-            func()
-            t1 = time.time()
-            self.times[func.__name__].append((t1 - t0) * 1000)
-
-
-if __name__ == '__main__':
-    unittest.main()
-
diff --git a/timeseries/__init__.py b/timeseries/__init__.py
index e69de29..555e481 100644
--- a/timeseries/__init__.py
+++ b/timeseries/__init__.py
@@ -0,0 +1,4 @@
+"""
+    Export timeseries methods
+"""
+from .timeseries import Timeseries

From 7e54d299b7eb12ff04ca235e2d7706550a0eaa04 Mon Sep 17 00:00:00 2001
From: robinbaneke <13999868+robinbaneke@users.noreply.github.com>
Date: Thu, 20 Dec 2018 12:39:02 +0100
Subject: [PATCH 04/14] improve and prepare pipeline

---
 clipping/clipping.py   | 23 ++++++++++-------------
 fourier/fourier.py     |  2 ++
 pipeline/pipeline.py   |  3 ++-
 pipeline/run.py        |  8 ++++----
 tests/test_clipping.py |  5 +----
 5 files changed, 19 insertions(+), 22 deletions(-)

diff --git a/clipping/clipping.py b/clipping/clipping.py
index 6b3b978..dc09828 100644
--- a/clipping/clipping.py
+++ b/clipping/clipping.py
@@ -13,19 +13,21 @@ def clipping(channels, samples):
     # remove all rows(samples) with noise
     samples = filter_samples(samples)
     # remove all columns(channels) with noise, select first max n samples
-    channels, samples = filter_channels(channels, samples[:n_samples])
+    bad_channels = filter_channels(samples[:n_samples])
+    # remove bad channels from samples
+    channels = np.delete(channels, bad_channels)
+    samples = np.delete(samples, bad_channels, axis=1)
     # remove all individual cells with noise
     samples = filter_indv_channels(samples)
     return channels, samples
 
 
-def filter_samples(samples):
+def filter_samples(samples, factor=11):
     """
         Calulate mean power of all frequencies per time sample
         and remove samples with significantly high power
     """
-    factor = 1.01
-    new_samples = []
+    new_samples = list()
     # calculate mean intensity per sample
     avg_sample = np.sum(samples)/len(samples)
     # remove samples with significant high power
@@ -35,13 +37,12 @@ def filter_samples(samples):
     return np.array(new_samples)
 
 
-def filter_channels(channels, samples):
+def filter_channels(samples, factor=9):
     """
         Calculate mean power of all time samples per frequency
         and remove frequencies with significantly high power
     """
-    factor = 1.01
-    bad_channels = []
+    bad_channels = list()
     # calculate the mean power per channel
     avg_power_chan = samples.mean(axis=0)
     # calculate the standard deviation per channel
@@ -52,18 +53,14 @@ def filter_channels(channels, samples):
     for i, (avg_channel, sd_channel) in enumerate(zip(avg_power_chan, sd_power_chan)):
         if avg_channel >= (avg_power * factor) or sd_channel >= (avg_power * factor):
             bad_channels.append(i)
-    # remove bad channels from samples
-    new_channels = np.delete(channels, bad_channels)
-    new_samples = np.delete(samples, bad_channels, axis=1)
-    return new_channels, new_samples
+    return bad_channels
 
 
-def filter_indv_channels(samples):
+def filter_indv_channels(samples, factor=9):
     """
         Calculate mean power per frequency
         and remove samples with significantly high power
     """
-    factor = 1.3
     new_samples = np.zeros((len(samples), len(samples[0])))
     # calculate the mean power for each sample per channel
     avg_power_chan = samples.mean(axis=0)
diff --git a/fourier/fourier.py b/fourier/fourier.py
index bdcfc1f..fceff7f 100644
--- a/fourier/fourier.py
+++ b/fourier/fourier.py
@@ -80,6 +80,8 @@ def fft_vectorized(input_data, nfft=None, axis=-1):
             input_data = zeroes
 
     data_length = input_data.shape[0]
+    
+    print(np.log2(data_length))
 
     if np.log2(data_length) % 1 > 0:
         raise ValueError("Size of input data must be a power of 2")
diff --git a/pipeline/pipeline.py b/pipeline/pipeline.py
index 7920bee..75856cb 100644
--- a/pipeline/pipeline.py
+++ b/pipeline/pipeline.py
@@ -62,6 +62,7 @@ def read_rows(self, filename):
         time_stop = timer() - time_start
         return time_stop
 
+
     def read_n_rows(self, n, filename, DM, scale):
         """
             Read the filterbank data as stream
@@ -95,7 +96,7 @@ def read_static(self, filename, DM, scale):
                                    'time_ifft', 'time_fft_freq'])
         time_start = timer()
         # init filterbank
-        fil = filterbank.Filterbank(filename, read_all=True, time_range=(0, 10000))
+        fil = filterbank.Filterbank(filename, read_all=True, time_range=(0, 49150))
         stopwatch['time_read'] = timer() - time_start
         # select data
         time_select = timer()
diff --git a/pipeline/run.py b/pipeline/run.py
index bf9bed4..34d634e 100644
--- a/pipeline/run.py
+++ b/pipeline/run.py
@@ -1,12 +1,12 @@
 """
     Script for running the pipeline
 """
-
+import os
 import pipeline
 
 # read static
-pipeline.Pipeline(filename='pspm32.fil')
+pipeline.Pipeline(filename=os.path.abspath("E:/11100335.320.all.fil"))
 # read stream, row per row
-pipeline.Pipeline(filename='pspm32.fil', as_stream=True)
+# pipeline.Pipeline(filename=os.path.abspath("E:/11100335.320.all.fil"), as_stream=True)
 # read stream, n rows
-pipeline.Pipeline(filename='pspm32.fil', as_stream=True, n=10)
+# pipeline.Pipeline(filename='pspm32.fil', as_stream=True, n=10)
diff --git a/tests/test_clipping.py b/tests/test_clipping.py
index 30a8565..a9d9366 100644
--- a/tests/test_clipping.py
+++ b/tests/test_clipping.py
@@ -51,12 +51,9 @@ def test_filter_channels(self):
             When filtering samples, all channels
             with a relative high power should be removed
         """
-        result_channels, result_samples = clipping.filter_channels(
-            self.fil_chans, np.array(self.fil_vector))
+        result_channels = np.array(clipping.filter_channels(np.array(self.fil_vector)))
         expect_channels = np.delete(self.fil_chans, self.bad_channels)
-        expect_samples = np.delete(self.fil_vector, self.bad_channels, axis=1)
         self.assertEqual(result_channels.all(), expect_channels.all())
-        self.assertEqual(result_samples.all(), expect_samples.all())
 
     def test_filter_indv_channels(self):
         """

From b83c02ea855be6c59510a98bb2f1495e1d299ccf Mon Sep 17 00:00:00 2001
From: robinbaneke <13999868+robinbaneke@users.noreply.github.com>
Date: Thu, 20 Dec 2018 12:41:23 +0100
Subject: [PATCH 05/14] solve pylint issues

---
 fourier/fourier.py | 2 --
 pipeline/run.py    | 4 ++--
 plot/waterfall.py  | 1 -
 3 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/fourier/fourier.py b/fourier/fourier.py
index fceff7f..bdcfc1f 100644
--- a/fourier/fourier.py
+++ b/fourier/fourier.py
@@ -80,8 +80,6 @@ def fft_vectorized(input_data, nfft=None, axis=-1):
             input_data = zeroes
 
     data_length = input_data.shape[0]
-    
-    print(np.log2(data_length))
 
     if np.log2(data_length) % 1 > 0:
         raise ValueError("Size of input data must be a power of 2")
diff --git a/pipeline/run.py b/pipeline/run.py
index 34d634e..058ea9b 100644
--- a/pipeline/run.py
+++ b/pipeline/run.py
@@ -5,8 +5,8 @@
 import pipeline
 
 # read static
-pipeline.Pipeline(filename=os.path.abspath("E:/11100335.320.all.fil"))
+# pipeline.Pipeline(filename=os.path.abspath("E:/11100335.320.all.fil"))
 # read stream, row per row
 # pipeline.Pipeline(filename=os.path.abspath("E:/11100335.320.all.fil"), as_stream=True)
 # read stream, n rows
-# pipeline.Pipeline(filename='pspm32.fil', as_stream=True, n=10)
+pipeline.Pipeline(filename=os.path.abspath("E:/11100335.320.all.fil"), as_stream=True, n=10)
diff --git a/plot/waterfall.py b/plot/waterfall.py
index 4786da8..331450b 100644
--- a/plot/waterfall.py
+++ b/plot/waterfall.py
@@ -89,7 +89,6 @@ def update_plot_labels(self):
         else:
             freq_range = ((center_freq - sample_freq/2)/1e6,\
                           (center_freq + sample_freq*(self.scans_per_sweep - 0.5))/1e6)
-        print(self.image)
         self.image.set_extent(freq_range + (0, 1))
         self.fig.canvas.draw_idle()
 

From d5bb3cb03af8138ea35dd860cdb84024c8ff5aeb Mon Sep 17 00:00:00 2001
From: robinbaneke <13999868+robinbaneke@users.noreply.github.com>
Date: Thu, 20 Dec 2018 13:08:04 +0100
Subject: [PATCH 06/14] add test for pipeline

---
 tests/context.py       |  1 +
 tests/test_pipeline.py | 37 +++++++++++++++++++++++++++++++++++++
 2 files changed, 38 insertions(+)
 create mode 100644 tests/test_pipeline.py

diff --git a/tests/context.py b/tests/context.py
index 0ac04d0..c7c4e7d 100644
--- a/tests/context.py
+++ b/tests/context.py
@@ -14,3 +14,4 @@
 import dedisperse
 import filterbank.header as header
 import filterbank.filterbank as filterbank
+import pipeline.pipeline as pipeline
diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py
new file mode 100644
index 0000000..0c70bd2
--- /dev/null
+++ b/tests/test_pipeline.py
@@ -0,0 +1,37 @@
+"""
+    pipeline.py unit tests
+"""
+
+import unittest
+
+from .context import pipeline
+
+class TestPipeline(unittest.TestCase):
+    """
+        Class for testing pipeline.py
+    """
+
+    def test_static_pipeline(self):
+        """
+            When running the static pipeline,
+            expect a file with the time per method
+        """
+        filename = './pspm8.fil'
+        pipeline.Pipeline(filename)
+
+    def test_row_pipeline(self):
+        """
+            When running the pipeline as stream,
+            expect a file with the time per method
+        """
+        filename = './pspm8.fil'
+        pipeline.Pipeline(filename, as_stream=True)
+    
+
+    def test_n_rows_pipeline(self):
+        """
+            When running the pipeline as stream,
+            expect a file with the time per method per chunk
+        """
+        filename = './pspm8.fil'
+        pipeline.Pipeline(filename, as_stream=True, n=10)

From b352bea1e6bdb55b2f864d984a5cc620fb644809 Mon Sep 17 00:00:00 2001
From: robinbaneke <13999868+robinbaneke@users.noreply.github.com>
Date: Thu, 20 Dec 2018 14:14:34 +0100
Subject: [PATCH 07/14] improve tests for pipeline

---
 pipeline/pipeline.py   | 16 ++++++++--------
 pipeline/run.py        |  4 ++--
 tests/test_pipeline.py | 15 +++++++++++++--
 3 files changed, 23 insertions(+), 12 deletions(-)

diff --git a/pipeline/pipeline.py b/pipeline/pipeline.py
index 75856cb..8cf7103 100644
--- a/pipeline/pipeline.py
+++ b/pipeline/pipeline.py
@@ -25,8 +25,8 @@ class Pipeline():
         The Pipeline combines the functionality of all modules
         in the library.
     """
-    # pylint: disable
-    def __init__(self, filename=None, as_stream=False, DM=230, scale=3, n=None):
+
+    def __init__(self, filename=None, as_stream=False, DM=230, scale=3, n=None, size=None):
         """
             Initialize Pipeline object
 
@@ -36,13 +36,13 @@ def __init__(self, filename=None, as_stream=False, DM=230, scale=3, n=None):
         if as_stream:
             if n:
                 result = self.read_n_rows(n, filename, DM, scale)
-                file = open("stream_filterbank.txt", "a+")
+                file = open("n_rows_filterbank.txt", "a+")
             else:
                 result = self.read_rows(filename)
-                file = open("row_filterbank.txt", "a+")
+                file = open("rows_filterbank.txt", "a+")
         else:
-            result = self.read_static(filename, DM, scale)
-            file = open("static_filterbank", "a+")
+            result = self.read_static(filename, DM, scale, size)
+            file = open("static_filterbank.txt", "a+")
         file.write(str(result) + ",")
         file.close()
 
@@ -86,7 +86,7 @@ def read_n_rows(self, n, filename, DM, scale):
         return stopwatch_list
 
 
-    def read_static(self, filename, DM, scale):
+    def read_static(self, filename, DM, scale, size):
         """
             Read the filterbank data at once
             and measure the time per function/class
@@ -96,7 +96,7 @@ def read_static(self, filename, DM, scale):
                                    'time_ifft', 'time_fft_freq'])
         time_start = timer()
         # init filterbank
-        fil = filterbank.Filterbank(filename, read_all=True, time_range=(0, 49150))
+        fil = filterbank.Filterbank(filename, read_all=True, time_range=(0, size))
         stopwatch['time_read'] = timer() - time_start
         # select data
         time_select = timer()
diff --git a/pipeline/run.py b/pipeline/run.py
index 058ea9b..393ce07 100644
--- a/pipeline/run.py
+++ b/pipeline/run.py
@@ -5,8 +5,8 @@
 import pipeline
 
 # read static
-# pipeline.Pipeline(filename=os.path.abspath("E:/11100335.320.all.fil"))
+pipeline.Pipeline(filename=os.path.abspath("E:/11100335.320.all.fil"), size=49150)
 # read stream, row per row
 # pipeline.Pipeline(filename=os.path.abspath("E:/11100335.320.all.fil"), as_stream=True)
 # read stream, n rows
-pipeline.Pipeline(filename=os.path.abspath("E:/11100335.320.all.fil"), as_stream=True, n=10)
+# pipeline.Pipeline(filename=os.path.abspath("E:/11100335.320.all.fil"), as_stream=True, n=10)
\ No newline at end of file
diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py
index 0c70bd2..0e57989 100644
--- a/tests/test_pipeline.py
+++ b/tests/test_pipeline.py
@@ -1,7 +1,7 @@
 """
     pipeline.py unit tests
 """
-
+import os
 import unittest
 
 from .context import pipeline
@@ -17,7 +17,10 @@ def test_static_pipeline(self):
             expect a file with the time per method
         """
         filename = './pspm8.fil'
+        new_file = './static_filterbank.txt'
         pipeline.Pipeline(filename)
+        self.assertTrue(os.path.exists(new_file))
+        os.remove(new_file)
 
     def test_row_pipeline(self):
         """
@@ -25,8 +28,10 @@ def test_row_pipeline(self):
             expect a file with the time per method
         """
         filename = './pspm8.fil'
+        new_file = './rows_filterbank.txt'
         pipeline.Pipeline(filename, as_stream=True)
-    
+        self.assertTrue(os.path.exists(new_file))
+        os.remove(new_file)
 
     def test_n_rows_pipeline(self):
         """
@@ -34,4 +39,10 @@ def test_n_rows_pipeline(self):
             expect a file with the time per method per chunk
         """
         filename = './pspm8.fil'
+        new_file = './n_rows_filterbank.txt'
         pipeline.Pipeline(filename, as_stream=True, n=10)
+        self.assertTrue(os.path.exists(new_file))
+        os.remove(new_file)
+
+if __name__ == '__main__':
+    unittest.main()

From 23cfdd92186bf614c13c4d060448320e5c46efed Mon Sep 17 00:00:00 2001
From: robin <13999868+robinbaneke@users.noreply.github.com>
Date: Thu, 20 Dec 2018 17:10:03 +0100
Subject: [PATCH 08/14] add newline

---
 pipeline/run.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/pipeline/run.py b/pipeline/run.py
index 393ce07..203cb3d 100644
--- a/pipeline/run.py
+++ b/pipeline/run.py
@@ -4,9 +4,10 @@
 import os
 import pipeline
 
-# read static
-pipeline.Pipeline(filename=os.path.abspath("E:/11100335.320.all.fil"), size=49150)
-# read stream, row per row
-# pipeline.Pipeline(filename=os.path.abspath("E:/11100335.320.all.fil"), as_stream=True)
-# read stream, n rows
-# pipeline.Pipeline(filename=os.path.abspath("E:/11100335.320.all.fil"), as_stream=True, n=10)
\ No newline at end of file
+for i in range(10):
+    # # read static
+    pipeline.Pipeline(filename=os.path.abspath("E:/11100335.320.all.fil"), size=49150)
+    # read stream, row per row
+    pipeline.Pipeline(filename=os.path.abspath("E:/11100335.320.all.fil"), as_stream=True)
+    # read stream, n rows
+    pipeline.Pipeline(filename=os.path.abspath("E:/11100335.320.all.fil"), as_stream=True, n=10)

From 3930ecdfe90c684695e9b1e5b4595377b3060aa1 Mon Sep 17 00:00:00 2001
From: robin <13999868+robinbaneke@users.noreply.github.com>
Date: Sat, 22 Dec 2018 10:47:08 +0100
Subject: [PATCH 09/14] changes to run.py

---
 pipeline/run.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/pipeline/run.py b/pipeline/run.py
index 203cb3d..53a9c98 100644
--- a/pipeline/run.py
+++ b/pipeline/run.py
@@ -4,10 +4,12 @@
 import os
 import pipeline
 
-for i in range(10):
-    # # read static
-    pipeline.Pipeline(filename=os.path.abspath("E:/11100335.320.all.fil"), size=49150)
+fil_name = os.path.abspath("E:/11100335.320.all.fil")
+
+for i in range(1000):
+    # read static
+    pipeline.Pipeline(filename=fil_name, size=49150)
     # read stream, row per row
-    pipeline.Pipeline(filename=os.path.abspath("E:/11100335.320.all.fil"), as_stream=True)
+    pipeline.Pipeline(filename=fil_name, as_stream=True)
     # read stream, n rows
-    pipeline.Pipeline(filename=os.path.abspath("E:/11100335.320.all.fil"), as_stream=True, n=10)
+    pipeline.Pipeline(filename=fil_name, as_stream=True, n=10)

From 53f628ae638488e0bb62cbaac37379d9574724e7 Mon Sep 17 00:00:00 2001
From: robin <13999868+robinbaneke@users.noreply.github.com>
Date: Sat, 22 Dec 2018 16:00:11 +0100
Subject: [PATCH 10/14] move pipeline runner to examples

---
 examples/run_pipeline.py | 25 +++++++++++++++++++++++++
 pipeline/pipeline.py     |  4 ++--
 pipeline/run.py          | 15 ---------------
 3 files changed, 27 insertions(+), 17 deletions(-)
 create mode 100644 examples/run_pipeline.py
 delete mode 100644 pipeline/run.py

diff --git a/examples/run_pipeline.py b/examples/run_pipeline.py
new file mode 100644
index 0000000..8326a43
--- /dev/null
+++ b/examples/run_pipeline.py
@@ -0,0 +1,25 @@
+"""
+    Script for running the pipeline
+"""
+#pylint: disable-all
+import os,sys,inspect
+CURRENT_DIR = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
+PARENT_DIR = os.path.dirname(CURRENT_DIR)
+sys.path.insert(0, PARENT_DIR)
+from pipeline.pipeline import Pipeline
+
+# init filterbank filename
+fil_name = "./pspm.fil"
+# init filterbank sample size
+sample_size = 1534
+# init times the pipeline should run
+n_times = 1000
+
+# run the filterbank n times
+for i in range(n):
+    # read static
+    Pipeline(filename=fil_name, size=sample_size)
+    # read stream, row per row
+    Pipeline(filename=fil_name, as_stream=True)
+    # read stream, n rows
+    Pipeline(filename=fil_name, as_stream=True, n=10)
diff --git a/pipeline/pipeline.py b/pipeline/pipeline.py
index 8cf7103..9540c21 100644
--- a/pipeline/pipeline.py
+++ b/pipeline/pipeline.py
@@ -1,5 +1,5 @@
 """
-    Pipeline for all modules
+    Pipeline for running all the modules in order
 """
 # pylint: disable=wrong-import-position
 import os
@@ -20,7 +20,7 @@
 # pylint: disable=invalid-name
 # pylint: disable=no-self-use
 
-class Pipeline():
+class Pipeline:
     """
         The Pipeline combines the functionality of all modules
         in the library.
diff --git a/pipeline/run.py b/pipeline/run.py
deleted file mode 100644
index 53a9c98..0000000
--- a/pipeline/run.py
+++ /dev/null
@@ -1,15 +0,0 @@
-"""
-    Script for running the pipeline
-"""
-import os
-import pipeline
-
-fil_name = os.path.abspath("E:/11100335.320.all.fil")
-
-for i in range(1000):
-    # read static
-    pipeline.Pipeline(filename=fil_name, size=49150)
-    # read stream, row per row
-    pipeline.Pipeline(filename=fil_name, as_stream=True)
-    # read stream, n rows
-    pipeline.Pipeline(filename=fil_name, as_stream=True, n=10)

From 4561cefd7d8b69b6105efe2b5d56d830cbcebb5a Mon Sep 17 00:00:00 2001
From: robin <13999868+robinbaneke@users.noreply.github.com>
Date: Wed, 16 Jan 2019 15:41:49 +0100
Subject: [PATCH 11/14] add docs for pipeline

---
 README.md        |  6 ++++++
 docs/pipeline.md | 10 ++++++++++
 2 files changed, 16 insertions(+)
 create mode 100644 docs/pipeline.md

diff --git a/README.md b/README.md
index 9358f35..95c1cf4 100644
--- a/README.md
+++ b/README.md
@@ -45,3 +45,9 @@ Creating a free and open source framework that contains the generic algorithms a
     2. [Filter samples](docs/clipping.md#62-filter-samples)
     3. [Filter channels](docs/clipping.md#63-filter-channels)
     4. [Filter individual channels](docs/clipping.md#64-filter-individual-channels)
+7. [Pipeline](docs/pipeline.md)
+    1. [Introduction](docs/pipeline.md#71-introduction)
+    2. [Read rows](docs/pipeline.md#72-read-rows)
+    3. [Read n rows](docs/pipeline.md#73-read-n-rows)
+    3. [Read static](docs/pipeline.md#74-read-static)
+    4. [Measure methods](docs/pipeline.md#75-measure-methods)
diff --git a/docs/pipeline.md b/docs/pipeline.md
new file mode 100644
index 0000000..025846d
--- /dev/null
+++ b/docs/pipeline.md
@@ -0,0 +1,10 @@
+# 7. Pipeline
+
+## 7.1 Introduction
+
+The Pipeline module is used to execute the different modules in a specific order.
+There are currently three different options for running the pipeline.  
+These options include:
+* read multiple rows
+* read single rows
+* read all rows

From 01669cb244f311c945fc963128f242e94637a732 Mon Sep 17 00:00:00 2001
From: robin <13999868+robinbaneke@users.noreply.github.com>
Date: Wed, 16 Jan 2019 15:42:11 +0100
Subject: [PATCH 12/14] update docs

---
 docs/pipeline.md | 48 +++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 45 insertions(+), 3 deletions(-)

diff --git a/docs/pipeline.md b/docs/pipeline.md
index 025846d..45e5b3c 100644
--- a/docs/pipeline.md
+++ b/docs/pipeline.md
@@ -4,7 +4,49 @@
 
 The Pipeline module is used to execute the different modules in a specific order.
 There are currently three different options for running the pipeline.  
+
 These options include:
-* read multiple rows
-* read single rows
-* read all rows
+* read multiple rows, `read_n_rows`
+* read single rows, `read_rows`
+* read all rows, `read_static`
+
+The constructor of the pipeline module will recognize which method is fit for running which method, by looking at the given arguments to the constructor.
+
+| Parameter | Description |
+| --- | --- |
+| filename | The path to the filterbank file. |
+| as_stream | This parameter decides whether the filterbank should be read as stream. |
+| DM | The dispersion measure (DM) is used for performing dedispersion. |
+| scale | The scale is used for performing downsampling the time series. |
+| n | The `n` is the rowsize of chunks for reading the filterbank as stream. |
+| size | The size parameter is used for deciding the size of the filterbank. |
+
+After deciding which method to run for running the filterbank in a pipeline, it will measure the time it takes to run each method. At the end it will append the results to a txt file.
+
+## 7.2 Read rows
+
+The `read_rows` method reads the Filterbank data row per row. Because it only reads the filterbank per row, it is unable to execute most methods. The alternative for this method is the `read_n_rows` method, which is able to run all methods.
+
+```
+pipeline.Pipeline(<filterbank_file>, as_stream=True)
+```
+
+## 7.3 Read n rows
+
+The `read_n_rows` method first splits all the filterbank data into chunks of n samples. After splitting the filterbank data in chunks, it will run the different modules of the pipeline for each chunk.
+
+```
+pipeline.Pipeline(<filterbank_file>, n=<size> , as_stream=True)
+```
+
+## 7.4 Read static
+
+The `read_static` method reads the entire filterbank at once. If the filterbank file is too large for this method, the alternative is using `read_n_rows`.
+
+```
+pipeline.Pipeline(<filterbank_file>)
+```
+
+## 7.5 Measure methods
+
+The `measure_methods` is ran for each of the above methods, and calculates the time it takes to run each of the different methods.

From a2b7641d29a2133136cda5dba5a7882315af040b Mon Sep 17 00:00:00 2001
From: robin <13999868+robinbaneke@users.noreply.github.com>
Date: Sat, 19 Jan 2019 19:34:16 +0100
Subject: [PATCH 13/14] improve reading stream for pipeline

---
 docs/pipeline.md         | 11 +++++++----
 examples/run_pipeline.py |  8 ++++----
 filterbank/filterbank.py | 16 +++++++---------
 fourier/fourier.py       |  1 +
 4 files changed, 19 insertions(+), 17 deletions(-)

diff --git a/docs/pipeline.md b/docs/pipeline.md
index 45e5b3c..36e10a0 100644
--- a/docs/pipeline.md
+++ b/docs/pipeline.md
@@ -21,7 +21,7 @@ The constructor of the pipeline module will recognize which method is fit for ru
 | n | The `n` is the rowsize of chunks for reading the filterbank as stream. |
 | size | The size parameter is used for deciding the size of the filterbank. |
 
-After deciding which method to run for running the filterbank in a pipeline, it will measure the time it takes to run each method. At the end it will append the results to a txt file.
+After deciding which method to run for running the filterbank in a pipeline, it will measure the time it takes to run each method using `measure_method`. After running all the different methods, the constructor will append the results (a dictionary) to a txt file.
 
 ## 7.2 Read rows
 
@@ -33,7 +33,9 @@ pipeline.Pipeline(<filterbank_file>, as_stream=True)
 
 ## 7.3 Read n rows
 
-The `read_n_rows` method first splits all the filterbank data into chunks of n samples. After splitting the filterbank data in chunks, it will run the different modules of the pipeline for each chunk.
+The `read_n_rows` method first splits all the filterbank data into chunks of n samples. After splitting the filterbank data in chunks, it will run the different modules of the pipeline for each chunk. The remaining data, that which does not fit into the sample size, is currently ignored.
+
+The `n` or sample size should be a power of 2 multiplied with the given scale for the downsampling.
 
 ```
 pipeline.Pipeline(<filterbank_file>, n=<size> , as_stream=True)
@@ -41,7 +43,7 @@ pipeline.Pipeline(<filterbank_file>, n=<size> , as_stream=True)
 
 ## 7.4 Read static
 
-The `read_static` method reads the entire filterbank at once. If the filterbank file is too large for this method, the alternative is using `read_n_rows`.
+The `read_static` method reads the entire filterbank at once, and applies each method to the entire dataset. If the filterbank file is too large for running it in-memory, the alternative is using `read_n_rows`.
 
 ```
 pipeline.Pipeline(<filterbank_file>)
@@ -49,4 +51,5 @@ pipeline.Pipeline(<filterbank_file>)
 
 ## 7.5 Measure methods
 
-The `measure_methods` is ran for each of the above methods, and calculates the time it takes to run each of the different methods.
+The `measure_methods` is ran for each of the above methods, and calculates the time it takes to run each of the different methods. For each method it will create a key using the name of the method, and save the time it took to run the method as a value.
+At the end, it will returns a dictionary with all the keys and values.
diff --git a/examples/run_pipeline.py b/examples/run_pipeline.py
index 8326a43..2f12df7 100644
--- a/examples/run_pipeline.py
+++ b/examples/run_pipeline.py
@@ -11,15 +11,15 @@
 # init filterbank filename
 fil_name = "./pspm.fil"
 # init filterbank sample size
-sample_size = 1534
+sample_size = 49152
 # init times the pipeline should run
-n_times = 1000
+n_times = 10
 
 # run the filterbank n times
-for i in range(n):
+for i in range(n_times):
     # read static
     Pipeline(filename=fil_name, size=sample_size)
     # read stream, row per row
     Pipeline(filename=fil_name, as_stream=True)
     # read stream, n rows
-    Pipeline(filename=fil_name, as_stream=True, n=10)
+    Pipeline(filename=fil_name, as_stream=True, n=sample_size)
diff --git a/filterbank/filterbank.py b/filterbank/filterbank.py
index 2216cb8..a3cf4e1 100644
--- a/filterbank/filterbank.py
+++ b/filterbank/filterbank.py
@@ -25,8 +25,7 @@ def __init__(self, filename, freq_range=None, time_range=None, read_all=False):
         """
         if not os.path.isfile(filename):
             raise FileNotFoundError(filename)
-        # iterator for stream
-        self.stream_iter = 0
+        # header values
         self.data, self.freqs, self.n_chans_selected = None, None, None
         self.filename = filename
         self.header = read_header(filename)
@@ -51,6 +50,8 @@ def __init__(self, filename, freq_range=None, time_range=None, read_all=False):
         self.fil.seek(int(self.ii_start * self.n_bytes * self.n_ifs * self.n_chans), 1)
         # find possible channels
         self.i_0, self.i_1 = self.setup_chans(freq_range)
+        # number if stream iterations
+        self.stream_iter = (self.n_samples * self.n_ifs)
         # read filterbank at once
         if read_all:
             self.read_filterbank()
@@ -84,8 +85,8 @@ def next_row(self):
 
             returns False if EOF
         """
-        if self.stream_iter < (self.n_samples * self.n_ifs):
-            self.stream_iter += 1
+        if self.stream_iter > 0:
+            self.stream_iter -= 1
             # skip bytes
             self.fil.seek(self.n_bytes * self.i_0, 1)
             # read row of data
@@ -104,11 +105,8 @@ def next_n_rows(self, n_rows):
 
             returns False if EOF
         """
-        if self.stream_iter < (self.n_samples * self.n_ifs):
-            # more rows requested than available
-            if self.stream_iter + n_rows >= self.n_samples * self.n_ifs:
-                n_rows = self.n_samples * self.n_ifs - self.stream_iter
-            self.stream_iter += n_rows
+        if self.stream_iter - n_rows > 0:
+            self.stream_iter -= n_rows
             # init array of n rows
             data = np.zeros((n_rows, self.n_chans_selected), dtype=self.dd_type)
             for row in range(n_rows):
diff --git a/fourier/fourier.py b/fourier/fourier.py
index bdcfc1f..c9c83fc 100644
--- a/fourier/fourier.py
+++ b/fourier/fourier.py
@@ -80,6 +80,7 @@ def fft_vectorized(input_data, nfft=None, axis=-1):
             input_data = zeroes
 
     data_length = input_data.shape[0]
+    print(data_length)
 
     if np.log2(data_length) % 1 > 0:
         raise ValueError("Size of input data must be a power of 2")

From f2371b06c9cb00944c3595d0673ef1f1368d317a Mon Sep 17 00:00:00 2001
From: robin <13999868+robinbaneke@users.noreply.github.com>
Date: Tue, 22 Jan 2019 10:58:55 +0100
Subject: [PATCH 14/14] minor changes to pipeline

---
 examples/run_pipeline.py | 2 +-
 fourier/fourier.py       | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/examples/run_pipeline.py b/examples/run_pipeline.py
index 2f12df7..7bc4ca1 100644
--- a/examples/run_pipeline.py
+++ b/examples/run_pipeline.py
@@ -9,7 +9,7 @@
 from pipeline.pipeline import Pipeline
 
 # init filterbank filename
-fil_name = "./pspm.fil"
+fil_name = os.path.abspath("filterbank.fil")
 # init filterbank sample size
 sample_size = 49152
 # init times the pipeline should run
diff --git a/fourier/fourier.py b/fourier/fourier.py
index c9c83fc..bdcfc1f 100644
--- a/fourier/fourier.py
+++ b/fourier/fourier.py
@@ -80,7 +80,6 @@ def fft_vectorized(input_data, nfft=None, axis=-1):
             input_data = zeroes
 
     data_length = input_data.shape[0]
-    print(data_length)
 
     if np.log2(data_length) % 1 > 0:
         raise ValueError("Size of input data must be a power of 2")