In [1]:
import numpy as np
import matplotlib.pyplot as pl
import time

In [2]:
%load_ext line_profiler

In [3]:
c = 3e8
G = 6.67e-11
pi = np.pi
const = 96/5*pi**(8/3)*(G/c**3)**(5/3)

Specific arguments

In [4]:
f0 = 120
Mc = 3e-4* 2e30
f_max = 200
T_obs = 10**(4)
pad_len = int(323.4*T_obs)
beta = const*f0**(8/3)*Mc**(5/3)

f_signal = 40*f_max
nt = round(f_signal*T_obs)
t = np.arange(nt)/f_signal

In [5]:
phi = -6*pi/5*f0*(1-8./3.*beta*t)**(5/8)/beta
phi = np.mod(phi,2*pi)
signal = 1*np.exp(1j*phi)

noise = 0
data = signal + noise

## Strobo

In [6]:
def strobo(beta, data, f_ratio):
    new_t = -3/5*(1-8/3*beta*t)**(5/8)/beta
    f_new = f_signal/f_ratio
    
    new_t *= f_new 
    floor_t = np.floor(new_t) 
    idx = np.nonzero(np.diff(floor_t))
    resampled = data[idx]
    t_out = (new_t[idx]-new_t[0])/f_new
    return (resampled, t_out)

In [7]:
f_ratio = 25
out, t_out = strobo(beta, data, f_ratio)
corrected = np.fft.fftshift(np.fft.fft(out))
freq_corrected = np.fft.fftshift(np.fft.fftfreq(len(t_out), d=t_out[1]-t_out[0]))
corrected = corrected[len(corrected)//2:]
resampled_power = np.abs(corrected/len(out))**2

freq_corrected = freq_corrected[len(freq_corrected)//2:]
nt_new = len(out)

In [8]:
# strobo(beta, data, f_ratio)

In [32]:
%lprun -f strobo strobo(beta, data, f_ratio)

In [33]:
# strobo dominated by new_t definition and idx calculation
# using np operations for new_t definition has negligible effect
# only way to move gain improvements is testing with cython or numba

## For single beta

In [8]:
def inspecting(beta):
    out, t_out = strobo(beta, data, f_ratio)
    corrected = np.fft.fftshift(np.fft.fft(out))
    freq_corrected = np.fft.fftshift(np.fft.fftfreq(len(t_out), d=t_out[1]-t_out[0]))
#     corrected = corrected[len(corrected)//2:]
#     freq_corrected = freq_corrected[len(freq_corrected)//2:] # there is a bug here, isn't same length as resampled_power if even or odd
    resampled_power = np.abs(corrected/len(out))**2
    
    arg_max = np.argmax(resampled_power)
    peak_freq = freq_corrected[arg_max]
    peak_power = resampled_power[arg_max]
    return freq_corrected, resampled_power

In [35]:
%lprun -f inspecting inspecting(beta)

In [36]:
# inspecting is dominated by strobo and fft
# for t_obs = 1e3, strobo is more important. 
# but fft grows as O(nlogn) but strobo grows as (n) so will eventually dominate
# at t_obs = 1e4.5, strobo accounts for ~75% of time

## Padding the data

In [9]:
def pad_calc(beta):
    out, t_out = strobo(beta, data, f_ratio)
    padded = np.full(pad_len, 0, dtype = complex)
    padded[:len(out)] = out
    pad_corrected = np.fft.fftshift(np.fft.fft(padded))
    pad_freq_corrected = np.fft.fftshift(np.fft.fftfreq(len(padded), d=t_out[1]-t_out[0]))
    pad_resampled_power = np.abs(pad_corrected/len(padded))**2
    
    arg_max = np.argmax(pad_resampled_power)
    peak_freq = pad_freq_corrected[arg_max]
    peak_power = pad_resampled_power[arg_max]
    return peak_freq, peak_power

In [10]:
%lprun -f pad_calc pad_calc(beta)

In [11]:
# pad_calc profile identical to inspecing as expected

In [12]:
from joblib import Parallel, delayed

In [14]:
tic = time.time()
offset_arr = np.logspace(-8, -3, 30)
result = [pad_calc(beta+i*beta) for i in offset_arr]
toc = time.time()
print((toc-tic)/60)

1.045946180820465


In [13]:
tic = time.time()
offset_arr = np.logspace(-8, -3, 30)
result = Parallel(10)(delayed(pad_calc)(beta+i*beta) for i in offset_arr)
toc = time.time()
print((toc-tic)/60)

0.4799941062927246


In [73]:
# T_obs = 1e3: Parallel improves by factor of 1/2. Should improve for longer T_obs
# T_obs = 1e4: memory constraints are an issue which scale with num_cpu, improvement was still around 1/2?