Set parameters and select segmenting signal

In [40]:
from toolbox_jocha.hdf5 import get_data_from_dataset
from toolbox_jocha.ets import split_into_bins

# "308-8", "308-10", "308-12", "308-14", "316-8", "316-10"
# "316-12", "322-6", "322-8", "322-10", "322-12", "353-6"    
# "353-8", "353-10", "361-6", "365-6", "367-6", "374-6"
# "374-8", "374-10", "387-6", "387-10", "396-6", "397-6"
# "410-6", "410-8", "410-10", "412-8", "412-10", "415-6", "415-8"


##########################

mice_num = ["39-12","42-12","44-12","45-12","46-12","251-6","254-6"]
output_file_id = "noise_v1"
GCaMP_str = "GCaMP_shuffled"

indexes = [f"M{mouse_num}_{output_file_id}" for mouse_num in mice_num]

base_dataframe_filename = "WT_shuffled_noise_df.csv"


#############################

# mice_num = ["396-1_3fps_GCaMP_highpass_HbT_infraslow"]
# indexes = [f"M{mouse_num}" for mouse_num in mice_num]

#############################

n_mice = len(mice_num)
n_segments = 2

segmenting_str = GCaMP_str

def signals_filename(mouse_num, filename_str):
    return f"D:/mouse_data/new_data/M{mouse_num}/formatted/M{mouse_num}_{filename_str}.h5"

def dfc_filename(mouse_num, filename_str, signal_str):
    return f"D:/mouse_data/new_data/M{mouse_num}/formatted/M{mouse_num}_{filename_str}_{signal_str}_dfc.h5"

##################################

# def signals_filename(mouse_num, filename_str):
#     return f"D:/mouse_data/new_data/M396-1/formatted/M{mouse_num}.h5"

# def dfc_filename(mouse_num, filename_str, signal_str):
#     return f"D:/mouse_data/new_data/M396-1/formatted/M{mouse_num}_{signal_str}_dfc.h5"

###################################


segment_mats = []
segment_indices = []

for mouse_num in mice_num:
    
    cts, _ = get_data_from_dataset(dfc_filename(mouse_num, output_file_id, segmenting_str), "cts")

    mats, indices = split_into_bins(cts, n_segments)

    segment_mats.append(mats)
    segment_indices.append(indices)

Read existing dataframe to add data.

In [41]:
import pandas as pd
import shutil
import numpy as np
import os

def return_dataframe(filename):
    return pd.read_csv(filename).set_index("id")

def copy_dataframe(input_file, output_file):
    if not os.path.exists(output_file):
        shutil.copyfile(input_file, output_file)

def update_dataframe(df, data, columns, indexes):
    if data.ndim == 1:
        data = np.array([data]).T  # Ensure 2D

    for col in columns:
        if col not in df.columns:
            df[col] = np.nan

    for i, idx in enumerate(indexes):
        if idx in df.index:
            # Update existing row using Series and df.update
            for col_idx, col in enumerate(columns):
                df.at[idx, col] = data[i, col_idx]
        else:
            # Create a new row dictionary
            new_row = {col: data[i, col_idx] for col_idx, col in enumerate(columns)}
            df.loc[idx] = new_row  # This adds a new row

    return df

df = return_dataframe(base_dataframe_filename)
print(df)

# data = np.array([1, 2])
# cols = ["Col R"]
# indexes = ["M308-8_v1", "M316-8_v1"]

# df = update_dataframe(df, data, cols, indexes)

# print(df)

                 number
id                     
M39-12_noise_v1      39
M42-12_noise_v1      42
M44-12_noise_v1      44
M45-12_noise_v1      45
M46-12_noise_v1      46
M251-6_noise_v1     251
M254-6_noise_v1     254


1. Neurovascular coupling (correlation between GCaMP activity and dHbT activity)

In [42]:
import numpy as np
from toolbox_jocha.correlation import r_coeff_2mats
from toolbox_jocha.hdf5 import get_data_from_dataset

filename = "nvc_"+base_dataframe_filename

copy_dataframe(base_dataframe_filename, filename)
df = return_dataframe(filename)


squared_r = False
compute_lag = True
max_shift_seconds = 5
fps = 3
convert_to_s = True
lag_sign = None

max_shift = max_shift_seconds * fps
neurovascular_coupling = np.zeros((n_mice, n_segments))
neurcoup_whole = np.zeros(n_mice)
lag = np.zeros((n_mice, n_segments))
lag_whole = np.zeros(n_mice)

for i, mouse_num in enumerate(mice_num):

    print(f"Processing mouse M{mouse_num}.")

    GCaMP_signal, _ = get_data_from_dataset(signals_filename(mouse_num, output_file_id), f"data/3d/{GCaMP_str}")
    HbT_signal, _ = get_data_from_dataset(signals_filename(mouse_num, output_file_id), "data/3d/dHbT")

    lag_mat, correlation_mat = r_coeff_2mats(GCaMP_signal, HbT_signal, max_shift=max_shift, lag=compute_lag, convert_to_s=convert_to_s, fps=fps, squared=squared_r, lag_sign=lag_sign)
    neurcoup_whole[i] = np.nanmean(correlation_mat)
    lag_whole[i] = np.nanmean(lag_mat)

    for j, indices in enumerate(segment_indices[i]): # The j-th segment of mouse i

        sliced_GCaMP_signal = GCaMP_signal[indices,:,:]
        sliced_HbT_signal = HbT_signal[indices,:,:]

        lag_mat, correlation_mat = r_coeff_2mats(sliced_GCaMP_signal, sliced_HbT_signal, max_shift=max_shift, lag=compute_lag, convert_to_s=convert_to_s, fps=fps, squared=squared_r, lag_sign=lag_sign)

        neurovascular_coupling[i,j] = np.nanmean(correlation_mat)
        lag[i,j] = np.nanmean(lag_mat)


# mouse i's neurovascular coupling (as a list) is given by
# neurovasc_i = list(neurovascular_coupling[i,:])

# We want to append it to the file as nvc_{segmenting_str}segmented_{n_segments}segments

# print(neurovascular_coupling)
# print(lag)

# Seems to be about 8 seconds per segment with 5 segments

nvc_columns = ["nvc_whole"] + [f"nvc_segment_{i}/{n_segments}" for i in range(n_segments)]
df = update_dataframe(df, np.column_stack((neurcoup_whole, neurovascular_coupling)), nvc_columns, indexes)

lag_columns = ["lag_whole"] + [f"lag_segment_{i}/{n_segments}" for i in range(n_segments)]
df = update_dataframe(df, np.column_stack((lag_whole, lag)), lag_columns, indexes)

# df[f"nvc_{n_segments}_{segmenting_str}_segments"].update(pd.Series(neurovascular_coupling.tolist(), index=indexes))
# df[f"nvc_whole"].update(pd.Series(neurcoup_whole.tolist(), index=indexes))

# df[f"lag_{n_segments}_{segmenting_str}_segments"].update(pd.Series(lag.tolist(), index=indexes))
# df[f"lag_whole"].update(pd.Series(lag_whole.tolist(), index=indexes))

df.to_csv(filename)

del neurovascular_coupling, neurcoup_whole, lag, lag_whole, GCaMP_signal, HbT_signal, lag_mat, correlation_mat

Processing mouse M39-12.
Processing mouse M42-12.
Processing mouse M44-12.
Processing mouse M45-12.
Processing mouse M46-12.
Processing mouse M251-6.
Processing mouse M254-6.


In [43]:
# Functional representativity
funcrep_filename = "funcrep_"+base_dataframe_filename
copy_dataframe(base_dataframe_filename, funcrep_filename)
funcrep_df = return_dataframe(funcrep_filename)

# Modularity
modularity_filename = "modularity_"+base_dataframe_filename
copy_dataframe(base_dataframe_filename, modularity_filename)
modularity_df = return_dataframe(modularity_filename)

# Functional similarity
funcsim_filename = "funcsim_"+base_dataframe_filename
copy_dataframe(base_dataframe_filename, funcsim_filename)
funcsim_df = return_dataframe(funcsim_filename)

In [44]:
import bct
import numpy as np

GCaMP_FC_whole = [np.nan for i in mice_num] # A
dHbT_FC_whole = [np.nan for i in mice_num] # B

funcsim_whole = np.zeros(n_mice) # C
functional_similarity = np.zeros((n_mice, n_segments)) # D

GCaMP_functional_representativity = np.zeros((n_mice, n_segments)) # E
dHbT_functional_representativity = np.zeros((n_mice, n_segments)) # F

GCaMP_mod_whole = np.zeros(n_mice) # G
dHbT_mod_whole = np.zeros(n_mice) # H
GCaMP_modularity = np.zeros((n_mice, n_segments)) # I
dHbT_modularity = np.zeros((n_mice, n_segments)) # J

def flat_to_symmetric(flat, N):
    """Convert a flattened upper triangle vector to a full symmetric matrix."""
    mat = np.zeros((N, N))
    inds = np.triu_indices(N)
    mat[inds] = flat
    mat[(inds[1], inds[0])] = flat  # Reflect upper triangle to lower
    return mat

def compute_modularity(fc, n_elems):
    N = int((np.sqrt(8*n_elems+1)-1)/2)
    sym_FC = flat_to_symmetric(fc, N)
    Ci, Q = bct.modularity_und(sym_FC)
    return Q



for i, mouse_num in enumerate(mice_num):
    
    print(f"Processing mouse M{mouse_num}.")

    GCaMP_dfc, _ = get_data_from_dataset(dfc_filename(mouse_num, output_file_id, GCaMP_str), "dfc")
    dHbT_dfc, _ = get_data_from_dataset(dfc_filename(mouse_num, output_file_id, "dHbT"), "dfc")

    GCaMP_FC_whole[i] = np.mean(GCaMP_dfc, axis=0) # A
    dHbT_FC_whole[i] = np.mean(dHbT_dfc, axis=0) # B

    funcsim_whole[i] = np.corrcoef(GCaMP_FC_whole[i], dHbT_FC_whole[i])[0,1] # C

    GCaMP_mod_whole[i] = compute_modularity(GCaMP_FC_whole[i], GCaMP_dfc.shape[1]) # G
    dHbT_mod_whole[i] = compute_modularity(dHbT_FC_whole[i], dHbT_dfc.shape[1]) # H

    for j, indices in enumerate(segment_indices[i]): # The j-th segment of mouse i

        sliced_GCaMP_dfc = GCaMP_dfc[indices,:]
        sliced_dHbT_dfc = dHbT_dfc[indices,:]

        GCaMP_FC = np.mean(sliced_GCaMP_dfc, axis=0)
        dHbT_FC = np.mean(sliced_dHbT_dfc, axis=0)

        functional_similarity[i,j] = np.corrcoef(GCaMP_FC, dHbT_FC)[0,1] # D

        GCaMP_functional_representativity[i,j] = np.corrcoef(GCaMP_FC, GCaMP_FC_whole[i])[0,1] # E
        dHbT_functional_representativity[i,j] = np.corrcoef(dHbT_FC, dHbT_FC_whole[i])[0,1] # F

        GCaMP_modularity[i,j] = compute_modularity(GCaMP_FC, GCaMP_dfc.shape[1]) # I
        dHbT_modularity[i,j] = compute_modularity(dHbT_FC, dHbT_dfc.shape[1]) # J

        del sliced_GCaMP_dfc, sliced_dHbT_dfc

    del GCaMP_dfc, dHbT_dfc

# We're not saving A and B

# df[f"funcsim_whole"].update(pd.Series(funcsim_whole.tolist(), index=indexes)) # C
# df[f"funcsim_{n_segments}_{segmenting_str}_segments"].update(pd.Series(functional_similarity.tolist(), index=indexes)) # D

funcsim_columns = ["funcsim_whole"] + [f"funcsim_segment_{i}/{n_segments}" for i in range(n_segments)]
funcsim_df = update_dataframe(funcsim_df, np.column_stack((funcsim_whole, functional_similarity)), funcsim_columns, indexes) # C and D
funcsim_df.to_csv(funcsim_filename)

# df[f"GCaMP_funcrep_{n_segments}_{segmenting_str}_segments"].update(pd.Series(GCaMP_functional_representativity.tolist(), index=indexes)) # E
# df[f"dHbT_funcrep_{n_segments}_{segmenting_str}_segments"].update(pd.Series(dHbT_functional_representativity.tolist(), index=indexes)) # F

funcrep_columns = [f"GCaMP_funcrep_segment_{i}/{n_segments}" for i in range(n_segments)] + [f"dHbT_funcrep_segment_{i}/{n_segments}" for i in range(n_segments)]
funcrep_df = update_dataframe(funcrep_df, np.column_stack((GCaMP_functional_representativity, dHbT_functional_representativity)), funcrep_columns, indexes) # E and F
funcrep_df.to_csv(funcrep_filename)

# df[f"GCaMP_modularity_whole"].update(pd.Series(GCaMP_mod_whole.tolist(), index=indexes)) # G
# df[f"dHbT_modularity_whole"].update(pd.Series(dHbT_mod_whole.tolist(), index=indexes)) # H

# df[f"GCaMP_modularity_{n_segments}_{segmenting_str}_segments"].update(pd.Series(GCaMP_modularity.tolist(), index=indexes)) # I
# df[f"dHbT_modularity_{n_segments}_{segmenting_str}_segments"].update(pd.Series(dHbT_modularity.tolist(), index=indexes)) # J

modularity_columns = ["GCaMP_modularity_whole"] + [f"GCaMP_modularity_segment_{i}/{n_segments}" for i in range(n_segments)] + ["dHbT_modularity_whole"] + [f"dHbT_modularity_segment_{i}/{n_segments}" for i in range(n_segments)]
modularity_df = update_dataframe(modularity_df, np.column_stack((GCaMP_mod_whole, GCaMP_modularity, dHbT_mod_whole, dHbT_modularity)), modularity_columns, indexes) # G, H, I and J
modularity_df.to_csv(modularity_filename)

# df.to_csv(base_dataframe_filename)

Processing mouse M39-12.


  return asarray(a).size


Processing mouse M42-12.
Processing mouse M44-12.
Processing mouse M45-12.
Processing mouse M46-12.
Processing mouse M251-6.
Processing mouse M254-6.
