# Entropy calculation for encoded spike trains

In [None]:
import pandas as pd

from scipy import stats
from scipy.stats import mannwhitneyu as mw
from scipy.stats import sem
from scipy.stats import iqr
from scipy.stats import ttest_ind
from scipy.stats import ttest_rel
from scipy.stats import wilcoxon

import random

import math

import EntropyHub as EH

import numpy as np

import time

from os import listdir
from os.path import isfile, join

import neo

import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
df = pd.read_csv('encoded_isi_v3.csv') #open file created with 2 CSV to ENCODED

In [1]:
# Function for counting number of subsets in list

def counter_1(base, find):
    max_base = len(base) - len(find) + 1
    result = 0
    for i in range(max_base):
        yes = True
        for j in range(len(find)):
            if base[i + j] != find[j]:
                yes = False
                break
        if yes:
            result += 1
    return result


In [None]:
# entropy calculation


def CAE_entropy(isi_change_list, word_len=2):
    N = len(isi_change_list) - word_len + 1

    rel_freq = {}  # Здесь хранится вероятность встретить каждый символ
    counts_list = []
    for i in range(0, N):
        word = tuple(isi_change_list[i : i + word_len])
        if word in rel_freq.keys():
            pass
        else:
            counts = counter_1(isi_change_list, word)
            counts_list.append(counts)
            rel_freq[word] = counts / N
    counts_list = np.array(counts_list)
    counts_list = counts_list[counts_list > 0]
    n = np.sum(counts_list)
    p = counts_list / n

    f1 = np.count_nonzero(counts_list == 1)
    if f1 == n:
        f1 = n - 1

    C = 1 - f1 / n
    pa = C * p
    la = 1 - (1 - pa) ** n

    return -np.sum(pa * np.log2(pa) / la), -np.sum(p * np.log2(p))  # CAE, ENT


In [None]:
# Conditional entropy calculation

def cond_ent(isi_list, samp_isi_list, word_len=2):
    N = len(isi_list) - word_len + 1
    M = len(samp_isi_list) - word_len + 1
    if M != N:
        return print("ERROR")
    r_prob = {}  # Здесь хранится вероятность встретить каждый символ
    trial = {}
    s_prob = {}
    trial_list = []
    for i in range(0, N):
        word_r = tuple(isi_list[i : i + word_len])
        word_s = tuple(samp_isi_list[i : i + word_len])
        if word_r in r_prob.keys():
            pass
        else:
            counts = counter_1(isi_list, word_r)
            r_prob[word_r] = counts / N
        if word_s in trial.keys():
            trial[word_s].append(word_r)
        else:
            trial[word_s] = []
            trial[word_s].append(word_r)
            counts = counter_1(samp_isi_list, word_s)
            s_prob[word_s] = counts / M
    h_list = []
    for word_s in s_prob.keys():
        unique = set(trial[word_s])
        for el in unique:
            cond_prob = trial[word_s].count(el) / len(trial[word_s])
            h_list.append(-s_prob[word_s] * cond_prob * np.log2(cond_prob))
    return np.sum(h_list)


In [None]:
data = df["isitisi"][0].strip("][").split(", ")
data = [float(j) for j in data]
print(len(data))
CAE_entropy(data)


In [None]:
params = ['sib', 'isitm', 'isitisi']

In [None]:
ent_dict = {}
low = 1
top = 2
for word_length in range(low, top + 1):
    for param in params:
        print(param)
        ent_param_name = "ent_" + param + "_wl" + str(word_length)
        cond_ent_param_name = "cond_ent_" + param + "_wl" + str(word_length)
        shuf_ent_param_name = "shuf_ent_" + param + "_wl" + str(word_length)

        ent_dict[ent_param_name] = []
        ent_dict[cond_ent_param_name] = []
        ent_dict[shuf_ent_param_name] = []

        for i in range(len(df)):

            data = df[param][i].strip("][").split(", ")
            data = [float(j) for j in data]

            res = CAE_entropy(data, word_len=word_length)

            ent_dict[ent_param_name].append(res[1])

            temp_res_cond_ent = []
            temp_res_ent = []

            for m in range(100):

                samp_data = random.sample(data, k=len(data))

                # calculating conditional entropy with shuffled signal
                temp_res_cond = cond_ent(data, samp_data, word_len=word_length)
                temp_res_cond_ent.append(temp_res_cond)

                # calculation entropy of shuffled signal
                temp_res = CAE_entropy(samp_data, word_len=word_length)
                temp_res_ent.append(temp_res[1])

            ent_dict[cond_ent_param_name].append(np.median(temp_res_cond_ent))
            ent_dict[shuf_ent_param_name].append(np.median(temp_res_ent))
    for key in ent_dict.keys():
        df[key] = ent_dict[key]
    df.to_csv("encoded_data_with_ents_wl" + str(word_length) + ".csv")


In [None]:
for key in ent_dict.keys():
    df[key]=ent_dict[key]
df.to_csv('encoded_data_with_ents.csv')