In [1]:
import numpy as np
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

In [2]:
data = np.load('../record/results_2025-07-30-18-34/data/spike_countsWT1p6000_NC3N0100.npz')
rng = np.random.default_rng(100)

In [3]:
# remove transient period
data_without_transient = {}
data_without_transient['time'] = data['time'][100:]
data_without_transient['soma_potentials'] = data['soma_potentials'][100:]
data_without_transient['input_seq'] = data['input_seq'][100:]

In [4]:
def split_data_into_groups(input_pattern, ca1_output, d, m):
    d_input_history = np.zeros((np.size(input_pattern), d))
    for i in range(d-1, np.size(input_pattern)):
        d_input_history[i, :] = input_pattern[i-d+1:i+1]

    num_groups = m**(d-1)
    group = np.zeros(np.size(input_pattern))
    for i in range(d-1):
        group[i] = None

    for i in range(d-1, np.size(input_pattern)):
        for j in range(1, d):
            group[i] += d_input_history[i, j] * m**(j-1)

    return group


def split_data_into_segments(rng, num_data, num_segments):
    data_to_segment = rng.integers(0, num_segments, size=num_data)
    return data_to_segment

In [5]:
def rename_input_pattern(input_seq):
    new_input_pattern = np.zeros(np.size(input_seq))
    No_input_pattern = np.unique(input_seq)
    for idx, input in enumerate(input_seq):
        for new_input, old_input in enumerate(No_input_pattern):
            if input == old_input:
                new_input_pattern[idx] = new_input

    return new_input_pattern

In [6]:
data_without_transient['input_seq'] = rename_input_pattern(data_without_transient['input_seq'])

In [7]:
def shift_array(arr, d):
    shift_amount = d-1
    if shift_amount == 0:
        return arr
    else:
        none_padding = np.array([None] * shift_amount, dtype=object)
        shifted_elements = arr[:-shift_amount]
        return np.concatenate((none_padding, shifted_elements))

def _get_data_within_group(group, data_to_group, input_seq, ca1_output, m, d):
    new_data = {}
    new_data['latest_input'] = input_seq[np.where(data_to_group == group)]
    new_data['oldest_input'] = shift_array(input_seq, d)
    new_data['oldest_input']  = new_data['oldest_input'][np.where(data_to_group == group)]
    new_data['soma_potentials'] = ca1_output[np.where(data_to_group == group)]
    new_data['group'] = data_to_group[np.where(data_to_group == group)]
    new_data['num_data'] = np.size(new_data['latest_input'])

    return new_data

In [8]:
def _get_data_within_segments(segment, data_to_segment, input_seq, ca1_output):
    new_data = {}
    new_data['input_seq'] = input_seq[np.where(data_to_segment == segment)]
    new_data['soma_potentials'] = ca1_output[np.where(data_to_segment == segment)]
    new_data['segment'] = data_to_segment[np.where(data_to_segment == segment)]
    new_data['num_data'] = np.size(new_data['input_seq'])

    return new_data

In [9]:
def calculate_centroid(z, label, m):
    centroid = []
    for i in range(m):
        centroid.append(np.mean(z[label==i]))
    return centroid

def classify_validate_data(centroid, z_validate):
    estimation = np.zeros(np.size(z_validate))
    for z_idx, z in enumerate(z_validate):
        minimum_distance = np.max(z_validate) - np.min(z_validate)
        for cls, centroid_of_class in enumerate(centroid):
            if abs(centroid_of_class - z) < minimum_distance:
                minimum_distance = abs(centroid_of_class - z)
                estimation[z_idx] = cls
    return estimation
            
def calculate_ER(estimation, ground_truth):
    num_error = 0
    for est, truth in zip(estimation, ground_truth):
        if est != truth:
            num_error += 1
    
    return num_error / np.size(estimation)


def conduct_lda(label, ca1_output, data_to_segment, segment, m, d, group):
    print(f'conduct lda for segment {segment}')
    from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
    lda = LinearDiscriminantAnalysis(n_components=1)
    z = lda.fit(ca1_output[data_to_segment!=segment], label[data_to_segment!=segment].tolist()).transform(ca1_output[data_to_segment!=segment])
    z_validate = lda.transform(ca1_output[data_to_segment==segment])
    estimated_class = classify_validate_data(calculate_centroid(z, label[data_to_segment!=segment], m), z_validate)
    from matplotlib import pyplot as plt
    colors = ['blue', 'red']
    for color, i, target_name in zip(colors, [0, 1], [0, 1]):
        plt.hist(z_validate[estimated_class==i],
                    label=target_name)
    plt.xlabel('LDA1')
    plt.ylabel('num data point')
    plt.legend(loc='best', shadow=False, scatterpoints=1)
    plt.title('LDA')
    output_filename = 'output_' + str(d) + '_' + str(group) + '_' + str(segment) + 'png'
    plt.savefig(output_filename)
    plt.close()
    ER = calculate_ER(estimated_class, label[data_to_segment==segment])
    return ER

In [10]:
num_segments = 10
d = 1
m = 2
num_groups = m**(d-1)
num_all_data = 0
data_without_transient['group'] = split_data_into_groups(data_without_transient['input_seq'], data_without_transient['soma_potentials'], d, m)
for i in range(num_groups):
    data_in_group = _get_data_within_group(i,  data_without_transient['group'], data_without_transient['input_seq'], data_without_transient['soma_potentials'], m, d)
    print(data_in_group['oldest_input'])
    print(f'LDA for group {i}')
    print(f'num data in group {i} = {data_in_group['num_data']}')
    error_rate = np.zeros(num_segments)
    data_to_segments = split_data_into_segments(rng, data_in_group['num_data'], num_segments)
    for j in range(num_segments):
        error_rate[j] = conduct_lda(data_in_group['oldest_input'], data_in_group['soma_potentials'], data_to_segments, j, m, d, i)
        print(error_rate[j])
    print(np.mean(error_rate))
    num_all_data += data_in_group['num_data']
print(f'number of all data is {num_all_data}')

[0. 1. 1. ... 0. 0. 1.]
LDA for group 0
num data in group 0 = 9900
conduct lda for segment 0
0.014940239043824702
conduct lda for segment 1
0.030643513789581207
conduct lda for segment 2
0.013888888888888888
conduct lda for segment 3
0.028154327424400417
conduct lda for segment 4
0.015991471215351813
conduct lda for segment 5
0.018304431599229287
conduct lda for segment 6
0.01693227091633466
conduct lda for segment 7
0.01749271137026239
conduct lda for segment 8
0.022340425531914895
conduct lda for segment 9
0.01998001998001998
0.019866829975980823
number of all data is 9900


In [11]:
num_segments = 10
d = 2
m = 2
num_groups = m**(d-1)
num_all_data = 0
data_without_transient['group'] = split_data_into_groups(data_without_transient['input_seq'], data_without_transient['soma_potentials'], d, m)
print(np.unique(data_without_transient['group']))
for i in range(num_groups):
    data_in_group = _get_data_within_group(i,  data_without_transient['group'], data_without_transient['input_seq'], data_without_transient['soma_potentials'], m, d)
    print(f'LDA for group {i}')
    print(f'num data in group {i} = {data_in_group['num_data']}')
    error_rate = np.zeros(num_segments)
    data_to_segments = split_data_into_segments(rng, data_in_group['num_data'], num_segments)
    for j in range(num_segments):
        error_rate[j] = conduct_lda(data_in_group['oldest_input'], data_in_group['soma_potentials'], data_to_segments, j, m, d, i)
        print(error_rate[j])
    print(np.mean(error_rate))
    num_all_data += data_in_group['num_data']
print(f'number of all data is {num_all_data}')

[ 0.  1. nan]
LDA for group 0
num data in group 0 = 5013
conduct lda for segment 0
0.014893617021276596
conduct lda for segment 1
0.008016032064128256
conduct lda for segment 2
0.020491803278688523
conduct lda for segment 3
0.015151515151515152
conduct lda for segment 4
0.01556420233463035
conduct lda for segment 5
0.015238095238095238
conduct lda for segment 6
0.015810276679841896
conduct lda for segment 7
0.013779527559055118
conduct lda for segment 8
0.018072289156626505
conduct lda for segment 9
0.012578616352201259
0.014959597483605888
LDA for group 1
num data in group 1 = 4886
conduct lda for segment 0
0.01775147928994083
conduct lda for segment 1
0.011013215859030838
conduct lda for segment 2
0.005758157389635317
conduct lda for segment 3
0.006535947712418301
conduct lda for segment 4
0.009940357852882704
conduct lda for segment 5
0.016494845360824743
conduct lda for segment 6
0.014893617021276596
conduct lda for segment 7
0.007619047619047619
conduct lda for segment 8
0.0124223

In [12]:
num_segments = 10
d = 3
m = 2
num_groups = m**(d-1)
num_all_data = 0
data_without_transient['group'] = split_data_into_groups(data_without_transient['input_seq'], data_without_transient['soma_potentials'], d, m)
print(np.unique(data_without_transient['group']))
for i in range(num_groups):
    data_in_group = _get_data_within_group(i,  data_without_transient['group'], data_without_transient['input_seq'], data_without_transient['soma_potentials'], m, d)
    print(f'LDA for group {i}')
    print(f'num data in group {i} = {data_in_group['num_data']}')
    error_rate = np.zeros(num_segments)
    data_to_segments = split_data_into_segments(rng, data_in_group['num_data'], num_segments)
    for j in range(num_segments):
        error_rate[j] = conduct_lda(data_in_group['oldest_input'], data_in_group['soma_potentials'], data_to_segments, j, m, d, i)
        print(error_rate[j])
    print(np.mean(error_rate))
    num_all_data += data_in_group['num_data']
print(f'number of all data is {num_all_data}')

[ 0.  1.  2.  3. nan]
LDA for group 0
num data in group 0 = 2555
conduct lda for segment 0
0.015209125475285171
conduct lda for segment 1
0.03319502074688797
conduct lda for segment 2
0.0
conduct lda for segment 3
0.006993006993006993
conduct lda for segment 4
0.003745318352059925
conduct lda for segment 5
0.004149377593360996
conduct lda for segment 6
0.007168458781362007
conduct lda for segment 7
0.004081632653061225
conduct lda for segment 8
0.01282051282051282
conduct lda for segment 9
0.011811023622047244
0.009917347703758433
LDA for group 1
num data in group 1 = 2458
conduct lda for segment 0
0.014705882352941176
conduct lda for segment 1
0.02631578947368421
conduct lda for segment 2
0.00819672131147541
conduct lda for segment 3
0.012295081967213115
conduct lda for segment 4
0.0
conduct lda for segment 5
0.012
conduct lda for segment 6
0.008064516129032258
conduct lda for segment 7
0.019305019305019305
conduct lda for segment 8
0.0
conduct lda for segment 9
0.016129032258064516
0

In [13]:
num_segments = 10
d = 4
m = 2
num_groups = m**(d-1)
num_all_data = 0
data_without_transient['group'] = split_data_into_groups(data_without_transient['input_seq'], data_without_transient['soma_potentials'], d, m)
print(np.unique(data_without_transient['group']))
for i in range(num_groups):
    data_in_group = _get_data_within_group(i,  data_without_transient['group'], data_without_transient['input_seq'], data_without_transient['soma_potentials'], m, d)
    print(f'LDA for group {i}')
    print(f'num data in group {i} = {data_in_group['num_data']}')
    error_rate = np.zeros(num_segments)
    data_to_segments = split_data_into_segments(rng, data_in_group['num_data'], num_segments)
    for j in range(num_segments):
        error_rate[j] = conduct_lda(data_in_group['oldest_input'], data_in_group['soma_potentials'], data_to_segments, j, m, d, i)
        print(error_rate[j])
    print(np.mean(error_rate))
    num_all_data += data_in_group['num_data']
print(f'number of all data is {num_all_data}')

[ 0.  1.  2.  3.  4.  5.  6.  7. nan]
LDA for group 0
num data in group 0 = 1276
conduct lda for segment 0
0.06015037593984962
conduct lda for segment 1
0.056910569105691054
conduct lda for segment 2
0.05517241379310345
conduct lda for segment 3
0.05185185185185185
conduct lda for segment 4
0.03389830508474576
conduct lda for segment 5
0.0234375
conduct lda for segment 6
0.042735042735042736
conduct lda for segment 7
0.064
conduct lda for segment 8
0.041666666666666664
conduct lda for segment 9
0.06060606060606061
0.049042878578301176
LDA for group 1
num data in group 1 = 1279
conduct lda for segment 0
0.031746031746031744
conduct lda for segment 1
0.03937007874015748
conduct lda for segment 2
0.007407407407407408
conduct lda for segment 3
0.031007751937984496
conduct lda for segment 4
0.04201680672268908
conduct lda for segment 5
0.0078125
conduct lda for segment 6
0.03816793893129771
conduct lda for segment 7
0.013333333333333334
conduct lda for segment 8
0.024390243902439025
conduct

In [14]:
num_segments = 10
d = 5
m = 2
num_groups = m**(d-1)
num_all_data = 0
data_without_transient['group'] = split_data_into_groups(data_without_transient['input_seq'], data_without_transient['soma_potentials'], d, m)
print(np.unique(data_without_transient['group']))
for i in range(num_groups):
    data_in_group = _get_data_within_group(i,  data_without_transient['group'], data_without_transient['input_seq'], data_without_transient['soma_potentials'], m, d)
    print(f'LDA for group {i}')
    print(f'num data in group {i} = {data_in_group['num_data']}')
    error_rate = np.zeros(num_segments)
    data_to_segments = split_data_into_segments(rng, data_in_group['num_data'], num_segments)
    for j in range(num_segments):
        error_rate[j] = conduct_lda(data_in_group['oldest_input'], data_in_group['soma_potentials'], data_to_segments, j, m, d, i)
        print(error_rate[j])
    print(np.mean(error_rate))
    num_all_data += data_in_group['num_data']
print(f'number of all data is {num_all_data}')

[ 0.  1.  2.  3.  4.  5.  6.  7.  8.  9. 10. 11. 12. 13. 14. 15. nan]
LDA for group 0
num data in group 0 = 632
conduct lda for segment 0
0.018518518518518517
conduct lda for segment 1
0.0784313725490196
conduct lda for segment 2
0.07407407407407407
conduct lda for segment 3
0.060240963855421686
conduct lda for segment 4
0.07042253521126761
conduct lda for segment 5
0.05333333333333334
conduct lda for segment 6
0.06349206349206349
conduct lda for segment 7
0.12903225806451613
conduct lda for segment 8
0.05
conduct lda for segment 9
0.03389830508474576
0.06314434241829603
LDA for group 1
num data in group 1 = 644
conduct lda for segment 0
0.015384615384615385
conduct lda for segment 1
0.030303030303030304
conduct lda for segment 2
0.03278688524590164
conduct lda for segment 3
0.07462686567164178
conduct lda for segment 4
0.04838709677419355
conduct lda for segment 5
0.028985507246376812
conduct lda for segment 6
0.058823529411764705
conduct lda for segment 7
0.03636363636363636
conduct 