In [1]:
import os
import sys
sys.path.insert(0, os.path.abspath('../'))
import numpy as np
import random
from andi_datasets.models_phenom import models_phenom
from andi_datasets.datasets_phenom import datasets_phenom
from andi_datasets.utils_challenge import label_continuous_to_list

In [2]:
N = 10
T = 200
L = None

WINDOW_WIDTHS = np.arange(10, 100, 2)
SHIFT_WIDTH = 40
REG_JUMP = 2

In [3]:
def uncumulate(xs:np.ndarray):
    assert xs.ndim == 1
    uncum_list = [0.]
    for i in range(1, len(xs)):
        uncum_list.append(xs[i] - xs[i-1])
    return np.array(uncum_list)

In [4]:
def make_signal(x_pos, y_pos, win_widths):
    all_vals = []
    for win_width in win_widths:
        if win_width >= len(x_pos):
            continue
        vals = []
        for checkpoint in range(int(win_width/2), len(x_pos) - int(win_width/2)):
            xs = x_pos[checkpoint - int(win_width/2) : checkpoint + int(win_width/2)]
            ys = y_pos[checkpoint - int(win_width/2) : checkpoint + int(win_width/2)]

            xs1 = xs[:int(len(xs)/2)] - float(xs[:int(len(xs)/2)][0])
            xs2 = xs[int(len(xs)/2):] - float(xs[int(len(xs)/2):][0])

            ys1 = ys[:int(len(ys)/2)] - float(ys[:int(len(ys)/2)][0])
            ys2 = ys[int(len(ys)/2):] - float(ys[int(len(ys)/2):][0])

            std_xs1 = np.std(xs1)
            std_xs2 = np.std(xs2)
            std_ys1 = np.std(ys1)
            std_ys2 = np.std(ys2)

            surface_xs1 = abs(np.sum(xs1)) / win_width
            surface_xs2 = abs(np.sum(xs2)) / win_width
            surface_ys1 = abs(np.sum(ys1)) / win_width
            surface_ys2 = abs(np.sum(ys2)) / win_width


            xs1 = np.cumsum(abs(xs1)) #* surface_xs1
            xs2 = np.cumsum(abs(xs2)) #* surface_xs2
            ys1 = np.cumsum(abs(ys1)) #* surface_ys1
            ys2 = np.cumsum(abs(ys2)) #* surface_ys2


            xs_max_val = max(np.max(abs(xs1)), np.max(abs(xs2)))
            xs1 = xs1 / xs_max_val
            xs2 = xs2 / xs_max_val
            xs1 = xs1 / win_width
            xs2 = xs2 / win_width

            ys_max_val = max(np.max(abs(ys1)), np.max(abs(ys2)))
            ys1 = ys1 / ys_max_val
            ys2 = ys2 / ys_max_val
            ys1 = ys1 / win_width 
            ys2 = ys2 / win_width

            vals.append(abs(np.sum(xs1 - xs2 + ys1 - ys2)) 
                       * (max(std_xs1, std_xs2) / min(std_xs1, std_xs2)) 
                             * (max(std_ys1, std_ys2) / min(std_ys1, std_ys2)))

        vals = np.concatenate((np.ones(int(win_width/2)) * -1, vals))
        vals = np.concatenate((vals, np.ones(int(win_width/2)) * -1))
        vals = np.array(vals)
        all_vals.append(vals)
    
    all_vals = np.array(all_vals) + 1e-7
    normalized_vals = all_vals.copy()
    for i in range(len(normalized_vals)):
            normalized_vals[i] = normalized_vals[i] / np.max(normalized_vals[i])
    return all_vals, normalized_vals

In [5]:
def compress_signals(signals):
    compressed_signal = []
    nb_signal = signals.shape[1]
    
    for row in signals.transpose():
        nb_activ_window = np.sum(row >= 0)
        if nb_activ_window != 0:
            comp = np.sum(row[row >= 0])
        else:
            comp = 1e-7
        compressed_signal.append(comp)
    compressed_signal = (np.array(compressed_signal) - float(np.min(compressed_signal))) / np.max(compressed_signal)
    return compressed_signal

In [6]:
def chop_with_shift(signal, feat1, feat2, changepoints=None, count_0=None, count_1=None):
    chopped_signals = []
    chopped_labels = []
    reg_chopped_signals = []
    reg_chopped_labels = []
    pat=0

    changepoints_reg = []
    for cp in changepoints:
        changepoints_reg.extend(range(cp - SHIFT_WIDTH//4, cp + SHIFT_WIDTH//4))
    changepoints_reg = set(changepoints_reg)
        
    if len(changepoints) != 0:
        for cp in changepoints:
            if cp >= (SHIFT_WIDTH//2) and cp < signal.shape[0] - (SHIFT_WIDTH//2):
                same_c = 0
                tmp1 = np.hstack((signal[cp - (SHIFT_WIDTH//2):cp + (SHIFT_WIDTH//2)], feat1))
                tmp1 = np.hstack((tmp1, feat2))
                chopped_signals.append(tmp1)
                chopped_labels.append(1)
                count_1 += 1
                    
                for relative_x in range(-SHIFT_WIDTH//4 + cp, SHIFT_WIDTH//4 + cp, REG_JUMP):
                    reg_signal_seq = signal[relative_x - (SHIFT_WIDTH//2): relative_x + (SHIFT_WIDTH//2)]
                    if reg_signal_seq.shape[0] == SHIFT_WIDTH:
                        reg_chopped_signals.append(reg_signal_seq)
                        reg_chopped_labels.append(cp - relative_x)
                    
                while True:
                    pat += 1
                    random_selec = np.random.randint(0, T)
                    random_selec_reg = set(np.arange(random_selec - SHIFT_WIDTH//4, random_selec + SHIFT_WIDTH//4))
                        
                    if len(changepoints_reg & random_selec_reg) == 0:
                        if random_selec >= (SHIFT_WIDTH//2) and random_selec < signal.shape[0] - (SHIFT_WIDTH//2):
                            tmp0 = np.hstack((signal[random_selec - (SHIFT_WIDTH//2):random_selec + (SHIFT_WIDTH//2)], feat1))
                            tmp0 = np.hstack((tmp0, feat2))
                            chopped_signals.append(tmp0)
                            chopped_labels.append(0)

                        elif random_selec < (SHIFT_WIDTH//2):
                            tmp0 = np.hstack((signal[random_selec:random_selec + SHIFT_WIDTH], feat1))
                            tmp0 = np.hstack((tmp0, feat2))
                            chopped_signals.append(tmp0)
                            chopped_labels.append(0)

                        else:
                            tmp0 = np.hstack((signal[random_selec - SHIFT_WIDTH:random_selec], feat1))
                            tmp0 = np.hstack((tmp0, feat2))
                            chopped_signals.append(tmp0)
                            chopped_labels.append(0)
                            
                        count_0 += 1
                        same_c += 1
                        if same_c >= 3:
                            break
                    if pat >= 10:
                        break
    else:
        for _ in range(1):
            random_selec = np.random.randint(0, T)
            if random_selec >= (SHIFT_WIDTH//2) and random_selec < signal.shape[0] - (SHIFT_WIDTH//2):
                tmp0 = np.hstack((signal[random_selec - (SHIFT_WIDTH//2):random_selec + (SHIFT_WIDTH//2)], feat1))
                tmp0 = np.hstack((tmp0, feat2))
                chopped_signals.append(tmp0)
                chopped_labels.append(0)
            elif random_selec < (SHIFT_WIDTH//2):
                tmp0 = np.hstack((signal[random_selec:random_selec + SHIFT_WIDTH], feat1))
                tmp0 = np.hstack((tmp0, feat2))
                chopped_signals.append(tmp0)
                chopped_labels.append(0)
            else:
                tmp0 = np.hstack((signal[random_selec - SHIFT_WIDTH:random_selec], feat1))
                tmp0 = np.hstack((tmp0, feat2))
                chopped_signals.append(tmp0)
                chopped_labels.append(0)
            count_0 += 1
    return np.array(chopped_signals), np.array(chopped_labels), count_0, count_1, np.array(reg_chopped_signals), np.array(reg_chopped_labels)

In [7]:
input_signals = []
input_labels = []
input_reg_signals = []
input_reg_labels = []

K_bound = [1e-12, 1000000.0]
alpha_bound = [0, 1.999]
alphas1 = [0.001, 0.3]
alphas2 = [1.0, 1.999]
count_0 = 0
count_1 = 0

for step in range(1500):
    if step % 100 == 0: print(step, count_0, count_1)
    alpha1 = np.random.uniform(alphas1[0], alphas1[1])
    alpha2 = np.random.uniform(alphas2[0], alphas2[1])
    single_alpha = np.random.choice([alpha1, alpha2])
    multi_trajs_model, multi_labels_model = models_phenom().multi_state(N=N,
                                                            L=L,
                                                            T=T,
                                                            alphas=[alpha1, alpha2],  # Fixed alpha for each state
                                                            Ds=[[0.05, 0.0], [0.1, 0.0]],# Mean and variance of each state
                                                            M=[[0.98, 0.02], [0.02, 0.98]]
                                                           )

    single_trajs_model, single_labels_model = models_phenom().multi_state(N=N,
                                                            L=L,
                                                            T=T,
                                                            alphas=[single_alpha, single_alpha],  # Fixed alpha for each state
                                                            Ds=[[0.1, 0.0], [0.1, 0.0]],# Mean and variance of each state
                                                            M=[[1.0, 0.0], [0.0, 1.0]]
                                                           )
    
    for i in range(N):
        multi_s, multi_s_norm = make_signal(multi_trajs_model[:, i, 0], multi_trajs_model[:, i, 1], WINDOW_WIDTHS)
        changepoints, alphas_cp, Ds, state_num = label_continuous_to_list(multi_labels_model[:, i, :])
        
        comp_signal = compress_signals(multi_s)
        feat1 = np.mean(comp_signal)**2 / np.std(comp_signal)**2
        feat2 = np.max(multi_s, axis=1).mean()
        
        chop_signal, chop_label, count_0, count_1, reg_signal, reg_label = chop_with_shift(comp_signal,
                                                                                           feat1,
                                                                                           feat2,
                                                                                           changepoints[:-1], 
                                                                                           count_0, count_1)        
        input_signals.extend(chop_signal)
        input_labels.extend(chop_label)
        input_reg_signals.extend(reg_signal)
        input_reg_labels.extend(reg_label)

        single_s, single_s_norm = make_signal(single_trajs_model[:, i, 0], single_trajs_model[:, i, 1], WINDOW_WIDTHS)
        changepoints, alphas_cp, Ds, state_num = label_continuous_to_list(single_labels_model[:, i, :])
        
        comp_signal = compress_signals(single_s)
        feat1 = np.mean(comp_signal)**2 / np.std(comp_signal)**2
        feat2 = np.max(single_s, axis=1).mean()
        
        chop_signal, chop_label, count_0, count_1, _, _ = chop_with_shift(comp_signal,
                                                                          feat1,
                                                                          feat2,
                                                                          changepoints[:-1],
                                                                          count_0, count_1)        
        input_signals.extend(chop_signal)
        input_labels.extend(chop_label)

for i in range(1000):
    single_trajs_model, single_labels_model = models_phenom().multi_state(N=2,
                                                            L=L,
                                                            T=T,
                                                            alphas=[1.0, 1.0],  # Fixed alpha for each state
                                                            Ds=[[0.1, 0.0], [0.1, 0.0]],# Mean and variance of each state
                                                            M=[[1.0, 0.0], [0.0, 1.0]]
                                                           )
    single_s, single_s_norm = make_signal(single_trajs_model[:, 0, 0], single_trajs_model[:, 0, 1], WINDOW_WIDTHS)
    changepoints, alphas_cp, Ds, state_num = label_continuous_to_list(single_labels_model[:, 0, :])
    
    comp_signal = compress_signals(single_s)
    feat1 = np.mean(comp_signal)**2 / np.std(comp_signal)**2
    feat2 = np.max(single_s, axis=1).mean()
    
    chop_signal, chop_label, count_0, count_1, _, _ = chop_with_shift(comp_signal,
                                                                      feat1,
                                                                      feat2,
                                                                      changepoints[:-1],
                                                                      count_0,
                                                                      count_1)        
    input_signals.extend(chop_signal)
    input_labels.extend(chop_label)

0 0 0
100 3738 2747
200 7390 5525
300 11214 8333
400 14938 11220
500 18685 14004
600 22350 16711
700 25983 19472
800 29693 22276
900 33426 25064
1000 37291 27826
1100 40946 30560
1200 44621 33415
1300 48461 36257
1400 52214 39063


In [8]:
input_signals = np.array(input_signals)
input_labels = np.array(input_labels)
input_reg_signals = np.array(input_reg_signals)
input_reg_labels = np.array(input_reg_labels)

In [9]:
total = count_0 + count_1
weight_for_0 = (1 / count_0) * (total / 2.0)
weight_for_1 = (1 / count_1) * (total / 2.0)
class_weight = np.array([weight_for_0, weight_for_1])

print(input_signals.shape, input_labels.shape)
print(input_reg_signals.shape, input_reg_labels.shape)
print(count_0, count_1)
print(class_weight)

(98661, 62) (98661,)
(595589, 60) (595589,)
56847 41814
[0.86777666 1.17976037]


In [10]:
np.savez_compressed(f'./training_set_{SHIFT_WIDTH}_{REG_JUMP}.npz',
                    input_signals=input_signals,
                    input_labels=input_labels,
                    input_reg_signals=input_reg_signals,
                    input_reg_labels=input_reg_labels,
                    count_0=count_0,
                    count_1=count_1,
                    class_weight=class_weight,
                   )