In [1]:
%matplotlib inline
import numpy as np
from scipy import stats
from blimpy import Waterfall
from blimpy.utils import rebin
from matplotlib import pyplot as plt
from bisect import bisect_left
from tqdm import tqdm
import dask.array as da
import h5py
from time import time

from utils import *

fil_path = "data/filterbanks/"
h5_path = "data/h5/"

test_fil = fil_path + "blc20_guppi_57991_48899_3C161_0007.gpuspec.0000.fil"

fri_obs = h5_path + "GBT_57532_09539_HIP56445_fine.h5"

plt_args = {
            'aspect':'auto',
            'origin':'lower',
            'rasterized':True,
            'interpolation':'nearest',
            'cmap':'viridis'
            }
# try:
#     client.close()
# except NameError:
#     pass
# from dask.distributed import Client, progress
# client = Client(processes=False, threads_per_worker=3,
#                 n_workers=4, memory_limit='8GB')
# client

In [2]:
# Show Info
wf = Waterfall(fri_obs, load_data=False)
wf.info()
header = wf.header

blimpy.file_wrapper INFO     Skipping loading data ...

--- File Info ---
b'DIMENSION_LABELS' : [b'frequency' b'feed_id' b'time']
     b'az_start' :                              0.0
    b'data_type' :                                1
         b'fch1' :                    1926.26953125
         b'foff' :           -2.835503418452676e-06
   b'machine_id' :                               20
        b'nbits' :                               32
       b'nchans' :                        318230528
         b'nifs' :                                1
  b'source_name' :                      b'HIP56445'
      b'src_dej' :                     3d03m34.006s
      b'src_raj' :                    11h34m21.699s
 b'telescope_id' :                                6
        b'tsamp' :                     17.986224128
       b'tstart' :                57532.11040509259
     b'za_start' :                              0.0

Num ints in file :                               16
      File shape :               (16,

  self.h5 = h5py.File(self.filename)


In [None]:
freqs = wf.populate_freqs()

In [None]:
channel_len = 1033216
channel_len

In [None]:
data = data[:, :16 * 1033216]
freqs = freqs[:16 * 1033216]

integrated = np.sum(data, axis=0)/len(data)
for n in np.nonzero(integrated > 10**13):
    integrated[n] = (integrated[n-1] + integrated[n+1]) /2
plt.figure()
plt.plot(freqs, np.log(integrated))


In [None]:
block21 = np.load("GBT_57532_09539_HIP56445_fine/cleaned/21.npy")

In [None]:
plot_segment(block21)

In [None]:
data = block21

In [None]:
res = []
for chan in tqdm(range(14)):
    res.append([])
    window = data[:, channel_len*(chan):channel_len*(chan+1)]
    # window_f = freqs[channel_len*(chan):channel_len*(chan+1)]
    for i in range(0, (len(window[0])//200*200), 100):
        test_data = window[:, i:i+200]
        s, p = norm_test(test_data)
        if p < 1e-25:
            res[chan].append((i, s, p))

In [None]:
from multiprocessing import Pool
from time import time

import warnings
warnings.filterwarnings("ignore")


def threshold_hits(chan):
    res = list()
    window = data[:, channel_len*(chan):channel_len*(chan+1)]
    # window_f = freqs[channel_len*(chan):channel_len*(chan+1)]
    for i in range(0, (len(window[0])//200*200), 100):
        test_data = window[:, i:i+200]
        s, p = norm_test(test_data)
        if p < 1e-40:
            res.append([channel_len*(chan) + i, s, p])
    return res

start = time()
with Pool(12) as p:
    chan_hits = p.map(threshold_hits, range(14))
end = time()
print(end-start)

In [None]:
sum([len(chan) for chan in chan_hits])

In [None]:
chan_hits

In [None]:
left = 824400
plot_segment(block21[:, left:left+200])

In [None]:
sub_freqs = freqs[21*channel_len+left:21*channel_len+left+201]
plt.figure(figsize=(10, 6))
plt.yticks(np.arange(0, 16), list(map(lambda x: "%.2f" % x, np.arange(0, wf.header[b'tsamp']*18, wf.header[b'tsamp']))))
plt.xticks(np.arange(0, 200, 49), list(map(lambda x: "%.5f" % x, sub_freqs[np.arange(0, 200, 49)])))
plt.ylabel("time from obs start [s]")
plt.xlabel("frequency [MHz]")
plt.imshow(rebin(block21[:, left:left+200], 1, 1), **plt_args)

In [None]:
foff = freqs[1] - freqs[0]
foff, a, b
np.arange(b, a, -foff*100)

In [None]:
show_stamp_f(freqs, data, 1025.0100805927195)

In [None]:
hits = [len(e) for e in chan_hits]
print(hits)
print(sum(hits))

In [None]:
sorted_hits = sorted(res[7], key=lambda x: x[2])
sorted_hits

In [None]:
top = [x[0] for x in sorted_hits[:20]]
top

In [None]:
for i in top:
    print(i)
    show_stamp(window, i)

In [None]:
res[:15]

In [None]:
plt.figure()
plt.imshow(data[:, 8:8+128])

In [None]:
with open("pfb512coef.txt", "r") as f:
    coef_file = f.read()

In [None]:
lines = coef_file.splitlines()
filter_coefs = []
for line in lines:
    filter_coefs.append(float(line))

In [None]:
coefs = np.array(filter_coefs)/2**17

In [None]:
plt.figure()
plt.plot(coefs)

In [None]:
from numpy import fft
l = 2**16
f = fft.fft(coefs, l)
plt.figure()
plt.plot(np.log(np.abs(f)**2))

In [None]:
stacked = np.reshape(integrated, (15, 1033216))
plt.figure()
for i in range(15):
    plt.plot(np.log(stacked[i]))

In [None]:
rebined = rebin(stacked, 1, 64)

In [None]:
plt.figure()
plt.imshow(np.log(rebined), **plt_args)

In [None]:
model_shape = np.sum(stacked, axis=0)/15
model_shape[model_shape > 10**10] = np.mean(model_shape)

In [None]:
plt.figure()
plt.plot(model_shape)

In [None]:
h5_file = h5py.File(fri_obs, "r")
a = da.from_array(h5_file["data"], chunks=(2, 1, channel_len*14))
a = a
a

In [None]:
start = time()
a.persist()
da.to_npy_stack("original_a", a, axis=2)
end = time()
print(f"{end-start}")

In [None]:
a = da.from_npy_stack("original_a")
a

In [None]:
start = time()
means = da.mean(a, axis=2)
# means_data = means.compute()
# means_data
end = time()

print(f"{end-start}")
means = da.reshape(means, (16,1,1))
means

In [None]:
a

In [None]:
308/14

In [None]:
normalized_a = da.divide(a, means)
normalized_a

In [None]:
start = time()

da.to_npy_stack('normalized_a/', normalized_a, axis=2)

end = time()

print(f"{end-start}")

In [None]:
print(normalized_a.shape)
print(normalized_a.chunks)

In [None]:
start = time()
normalized_a_data = normalized_a.compute()
end = time()
print(f"{end-start}")

In [None]:
start = time()

original_a_data = a[:, 0, :16 * 1033216].compute()

end = time()
print(f"{end-start}")

In [None]:
read_a = da.from_array(original_a_data)
read_and_normalize = da.divide(read_a, means_data)

start = time()
normalized_a_data = read_and_normalize.compute()
end = time()
print(f"{end-start}")

# 1.6420118808746338

In [None]:
del normalized_a_data

In [None]:
start = time()

numpy_normalized_a_data = original_a_data / means_data

end = time()
print(f"{end-start}")

# 0.6659681797027588

In [None]:
del numpy_normalized_a_data

In [None]:
numpy_normalized_a_data.shape

In [None]:
plot_segment(normalized_a_data)

In [None]:
plot_segment(original_a_data)

In [None]:
print(normalized_a_data.shape == numpy_normalized_a_data.shape)
print(np.allclose(normalized_a_data, numpy_normalized_a_data))