In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics import f1_score

In [2]:
# Use the result from previous notebook
train = pd.read_csv('../data/train_2.csv')
test = pd.read_csv('../data/test_2.csv')

In [3]:
BATCHES = np.array([0, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 65, 70])
CATEGORIES = np.array([1, 1, 2, 3, 5, 4, 2, 3, 4, 5, 6, 3, 4, 6, 2, 5, 4, 5, 6, 3, 6, 6])
CATEGORY = 3

signal = np.concatenate((train['signal'].values, test['signal'].values))

ix = np.where(CATEGORIES == CATEGORY)[0]
starts = BATCHES[ix]
ends = BATCHES[ix + 1]

X = []
y = []
for start, end in zip(starts, ends):
    subsignal = signal[start*100_000:end*100_000]
    if start < 50:
        subchannels = train['open_channels'].values[start*100_000:end*100_000]
    else:
        subchannels = [-1]*((end-start)*100_000)
        
    if start == 35:
        subsignal = list(subsignal[:100000]) + list(subsignal[-100000:])
        subchannels = list(subchannels[:100000]) + list(subchannels[-100000:])
    
    X.extend(subsignal)
    y.extend(subchannels)
    
X = np.array(X)
y = np.array(y)
print(len(X), len(y))

900000 900000


In [4]:
def forward(Psig, Ptran, etat_in=None, coef=1, normalize=True):
    if etat_in is None: etat_in = np.ones(Psig.shape)/Psig.shape[1]
    alpha = np.zeros(Psig.shape) # len(sig) x n_state
    etat = np.zeros(Psig.shape) # len(sig) x n_state
    C = np.zeros(Psig.shape[0]) # scale vector for each timestep
    
    etat[0] = etat_in[0]
    alpha[0] = etat_in[0]
    if normalize: 
        alpha[0] = etat_in[0]*Psig[0]
        alpha[0]/=alpha[0].sum()

    for j in range(1, Psig.shape[0]):
        etat[j] = alpha[j-1]@Ptran
        if normalize: etat[j] /= etat[j].sum()
        etat[j] = (etat[j]**coef) * ((etat_in[j])**(1-coef))
        if normalize: etat[j] /= etat[j].sum()
        alpha[j] = etat[j]  * Psig[j]
        alpha[j] /= alpha[j].sum()
    return alpha, etat

def calculate_matrix(transition_matrix, states, number_processes):
    """
    Expand a transition matrix to model separate processes.
    If max(open_channels) = K, then we assume K 0/1 processes. 
    E.g. our data category 3 corresponds to a maximum
    of 3 open_channels, so 3 processes.
    
    We create model a combination_with_repetition(3, 4) = 20
    transition matrix. The first row & col corresponds to all
    processes being in the first hidden state (1, 1, 1). The
    second row & col corresponds to (1, 1, 2), and so on until
    (4, 4, 4).
    
    To calculate the transition probability from (1, 2, 2) to
    (1, 1, 3), we calculate P(1->1) * P(2->1) * P(2->3). But
    also for all permutations (e.g. (2, 1, 2) and (3, 1, 1)).
    In the end, we normalize our transition matrix.
    """
    # Fill in diagonals such that each row sums to 1
    for i in range(transition_matrix.shape[0]):
        transition_matrix[i, i] = 1 - np.sum(transition_matrix[i, :])

    n0 = len(states)
    new_transition_matrix = transition_matrix.copy()
    new_states = [(x,) for x in range(n0)]
    for process in range(1, number_processes):
        # We expand our current transition matrix (that models up to `process` number
        # of separate processes) its' dimensions by n0. We basically add another
        # possible state transition for a new process.
        nc = new_transition_matrix.shape[0]
        temp_transition_matrix = np.zeros((n0*nc, n0*nc))
        temp_states = []
        for i in range(n0):
            temp_states.extend([s + (i,) for s in new_states])
            for j in range(n0):
                # We add i -> j as our final transition
                temp_transition_matrix[i*nc:(i+1)*nc, j*nc:(j+1)*nc] = transition_matrix[i][j] * new_transition_matrix
              
        # We now group similar processes together to reduce our matrix. 
        # E.g. (1, 2, 3) is the same as (2, 3, 1)
        new_states = sorted(list(set([tuple(sorted(x)) for x in temp_states])))
        new_transition_matrix = np.zeros((len(new_states), len(new_states)))
        for i in range(len(new_states)):
            ix_i = [k for k, x in enumerate(temp_states) if tuple(sorted(x)) == new_states[i]]
            for j in range(len(new_states)):
                ix_j = [k for k, x in enumerate(temp_states) if tuple(sorted(x)) == new_states[j]]
                new_transition_matrix[i, j] = np.sum(temp_transition_matrix[ix_i, :][:, ix_j])
                new_transition_matrix[i, j] /= len(ix_i)
    
    new_channels = []
    for s in new_states:
        new_channels.append(sum([states[x] for x in s]))
    new_channels= np.array(new_channels)
        
    return new_transition_matrix, new_channels

def get_Psig(signal, States, kexp):
    Psig = np.zeros((len(signal), len(States)))
    for i in range(len(Psig)):
        Psig[i] = np.exp((-(signal[i] - States)**2)/(kexp))
    return Psig

In [5]:
Ptran = np.array([[0     , 0.0067, 0     , 0     ],
                  [0.0373, 0     , 0.2762, 0.0230],
                  [0     , 0.1991, 0     , 0     ],
                  [0     , 0.0050, 0     , 0     ]])
States = [1, 1, 0, 0]

Ptran, States = calculate_matrix(Ptran, States, 3)
print('Transition matrix shape: {}'.format(Ptran.shape))

Transition matrix shape: (20, 20)


In [6]:
# These hyper-parameters can be tuned in a supervised manner with macro-F1 (which works best
# for a competition), but also in an unsupervised manner using, for example, likelihood.
Kexp = .1307
Kexpp = 1.8
COEF_BACK = .9192
COEF_FOR = .8869
COEF_FIN = 0.50
COEF_FIN3 = 0.3

In [7]:
preds = np.zeros(len(X))
for k in range(len(X) // 100000):
    sig = X[100000*k:100000*(k + 1)]
    nstates = Ptran.shape[0]
    Psig = get_Psig(sig, States, Kexp)
    
    alpha0, etat0 = forward(Psig, Ptran, normalize=False)
    alpha1, etat1 = forward(Psig[::-1], np.transpose(Ptran), etat_in=etat0[::-1], coef=COEF_BACK)
    alpha2, etat2 = forward(Psig, Ptran, etat_in=etat1[::-1], coef=COEF_FOR)

    alpha3 = etat1[::-1]*etat2*Psig**Kexpp
    for j, alp in enumerate(alpha3): alpha3[j] /= alp.sum()

    pred = COEF_FIN*(alpha1[::-1]) + (1-COEF_FIN-COEF_FIN3)*alpha2 + COEF_FIN3*alpha3

    preds[100000*k:100000*(k + 1)] = pred@States
    print('Max/min', (pred@States).max(), (pred@States).min())

Max/min 2.999999999924185 4.73579276123209e-09
Max/min 2.9999999999806106 2.8490946323121153e-09
Max/min 2.999999999937666 5.147520748358743e-09
Max/min 2.9999999999242286 8.617654375472815e-09
Max/min 2.999999999996627 3.1780437095570796e-09
Max/min 2.9999999999867057 1.3924418429139992e-09
Max/min 2.9999999999956555 5.215197487300455e-10
Max/min 2.9999999999999996 4.306866578467323e-09
Max/min 2.999999999993938 9.61203510709536e-09


In [8]:
print(f1_score(y[y >= 0], np.round(preds[y >= 0]), average='macro'))

0.986794704167445


In [9]:
np.save('../data/cat3_preds.npy', preds)