In [1]:
from scipy.optimize import minimize
import numpy as np
import multiprocessing as mp
import shelve

from helper import get_predator_prey_data, mse_trunc, \
    remove_data_points_rand, remove_data_points_det, int_cost_lotka_volterra
from workers import vary_truncation_worker

In [2]:
def get_n_removed_points(d):
    return len(np.argwhere(d.flatten() == -1))

def trunc_cost(*args):
    return int_cost_lotka_volterra(*args, cost=mse_trunc)

def generate_truncated_datasets(P, n, max_points, col):
    Ps = [P]
    for i in range(max_points//n):
        P = remove_data_points_det(P.copy(), n, col=col)
        Ps.append(P)
    return Ps

In [3]:
def vary_truncation(trunc_datasets, rv, n_sim=100, T_start=200, T_steps=2000):
    manager = mp.Manager()
    results = manager.list()

    work_queue = mp.Queue()
    for i in range(len(trunc_datasets)):
        results.append(manager.list())
        for j in range(n_sim):
            work_queue.put((i, j))

    processes = []

    for i in range(mp.cpu_count()):
        p = mp.Process(target=vary_truncation_worker, args=(
            work_queue, results, T_start, T_steps, t, rv, trunc_datasets
        ))
        p.daemon = True
        p.start()
        processes.append(p)

    for p in processes:
        p.join()

    return [list(t) for t in results], trunc_datasets

In [20]:
%%time
if True:
    with shelve.open("Arrays/data_removal") as shelf:
        n = 10           # number of points to remove every time
        max_points = 100 # max number of points to remove. if not divisible by n, will floor down
        n_sim = 3

        t, P = get_predator_prey_data()

        np.random.seed(0o1331)
        
        # truncate x timeseries
        trunc_datasets_x = generate_truncated_datasets(P, n, max_points, 0)
        print([get_n_removed_points(P) for P in trunc_datasets_x])
        rv = [1, 1, 1, 1]

        costs_x, datasets_x = vary_truncation(trunc_datasets_x, rv, n_sim=n_sim)
        shelf["data_removal_costs_x"] = costs_x
        shelf["datasets_x"] = datasets_x

        # truncate y timeseries
        trunc_datasets_y = generate_truncated_datasets(P, n, max_points, 1)
        print([get_n_removed_points(P) for P in trunc_datasets_y])

        costs_y, datasets_y = vary_truncation(trunc_datasets_y, rv, n_sim=n_sim)
        shelf["data_removal_costs_y"] = costs_y
        shelf["datasets_y"] = datasets_y

        shelf.close()

[0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]
[0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]


  s = (x.conj() * x).real
  s = (x.conj() * x).real
  s = (x.conj() * x).real


CPU times: user 83.2 ms, sys: 88.5 ms, total: 172 ms
Wall time: 2min 4s


In [21]:
#Open data
shelf = shelve.open("Arrays/data_removal")
costs_x = shelf["data_removal_costs_x"] 
datasets_x = shelf["datasets_x"]

costs_y = shelf["data_removal_costs_y"] 
datasets_y = shelf["datasets_y"] 
shelf.close() 

In [22]:
costs_x

[[5.898598431679339, 5.898598431679339, 5.898598431679339],
 [7.216425553069774, 7.215876000986826, 7.215876000986826],
 [7.097358679269526, 7.098555064846292, 7.097421191041987],
 [6.220510088434671, 6.220510088434671, 6.221037503593258],
 [6.268721392945568, 6.268721392945568, 6.26902818721251],
 [5.9011074591111665, 5.901346156247254, 5.901346156247254],
 [5.325361449519944, 5.325108340920113, 5.327505362373598],
 [5.345663510947823, 5.343350469195431, 5.343479799092125],
 [4.543657326309521, 4.545228993109932, 4.543479784479679],
 [4.451041077922979, 4.452860596938904, 4.453160071366083],
 [4.379609670470187, 4.367881350007014, 4.379805019222678]]

In [23]:
costs_y

[[5.898598431679339, 5.898598431679339, 5.898598431679339],
 [1.4216841918627218e+18, 2.2005083363548424e+18, 2.2005083363548424e+18],
 [2.2005083363548424e+18, 2.647187548298168e+18, nan],
 [nan, nan, 1.737265154030728e+18],
 [1.0120913251374524e+18, 1.0120913251374524e+18, 1.0120913251374524e+18],
 [1.3045884265960855e+18, 1.0823463968807363e+18, 1.0823463968807363e+18],
 [1.0823463968807363e+18, 5.4985984552364314e+17, 1.0880482901591923e+18],
 [1.0880482901591923e+18, 1.0880482901591923e+18, 1.262336453257498e+18],
 [1.3515396717857428e+18, 1.3515396717857428e+18, 1.3515396717857428e+18],
 [1.3665490466172298e+18, 1.4721440130755348e+18, 1.4721440130755348e+18],
 [1.4721440130755348e+18, 9.881951643985581e+17, 9.525956152107196e+17]]