In [1]:
# from   pathlib      import Path
# import importlib

# import numpy                as np
# import matplotlib           as mpl
# import matplotlib.pyplot    as plt
# import seaborn              as sns
# import pandas               as pd

# from scipy.stats   import kruskal, wilcoxon, mannwhitneyu, ranksums
# from scipy.optimize import curve_fit
# import statsmodels.api as sm
# from statsmodels.multivariate.manova import MANOVA
# import statsmodels.formula.api as smf
# from eidynamics     import utils, plot_tools
# plt.rcParams['font.family'] = 'Arial'
# plt.rcParams['font.size'] = 12
# plt.rcParams['svg.fonttype'] = 'none'

# # make a colour map viridis
# viridis = mpl.colormaps["viridis"]
# flare   = mpl.colormaps["flare"]
# crest   = mpl.colormaps["crest"]
# magma   = mpl.colormaps["magma"]
# edge    = mpl.colormaps['edge']

# color_E = "flare"
# color_I = "crest"
# color_freq = {1:magma(0.05), 5:magma(0.1), 10:magma(0.2), 20:magma(.4), 30:magma(.5), 40:magma(.6), 50:magma(.7), 100:magma(.9)}
# color_squares = color_squares = {1:viridis(0.2), 5:viridis(.4), 7:viridis(.6), 15:viridis(.8), 20:viridis(1.0)}
# color_EI = {-70:flare(0), 0:crest(0)}
# colors_EI = {-70:flare, 0:crest}

# Fs = 2e4
# %matplotlib widget
# freq_sweep_pulses = np.arange(9)

# from eidynamics.fit_PSC     import find_sweep_expected
# # from Findsim        import tab_presyn_patterns_LR_43
# # import parse_data
# from eidynamics     import utils, plot_tools
# import all_cells
# # import plotFig2
# import stat_annotate

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
import plotFig2
%colors nocolor
Fs=2e4

In [3]:
# load data
figure_raw_material_location = Path(r"paper_figure_matter\\")
paper_figure_export_location = Path(r"paper_figures\\Figure2v3_CC\\")
data_path                    = Path(r"parsed_data\\FreqSweep\\")

# Load the dataset
freq_sweep_cc_datapath =  Path(r"parsed_data\\FreqSweep\\all_cells_FreqSweep_CC_long.h5")
df = pd.read_hdf(freq_sweep_cc_datapath, key='data')

# # expanded dataframe (processed dataframe with metadata and analysed params)
# expanded_data_path = Path(r"parsed_data\all_cells_FreqSweep_combined_expanded.h5")
# xc_FS_analyseddf = pd.read_hdf(expanded_data_path, key='data')

In [4]:
df.shape

(4971, 80073)

In [5]:
# remove columns that are not needed
# not needed columns are 20049 to 80049
df = df.drop(df.columns[20049:80049], axis=1)
df.shape

(4971, 20073)

In [6]:
import importlib
importlib.reload(plotFig2)

new_fields = ['valley_', 'valleytime_', 'peak_', 'peaktime_', 'slope_', 'delay_', 'normpeak_', 'PSC_', 'normPSC_', 'spike_']
newcolumns = [field + str(i) for field in new_fields for i in range(9)]
Fs=2e4
# Adding new columns with NaN values
df = df.assign(**{col: np.nan for col in newcolumns})
c = 0
d=0
r=0
k=0
for idx, row in df.iterrows():
    print(f"Processing {idx} row with trialID {row['trialID']}")
    # do not process if freq==10
    if row['stimFreq'] <20:
        print(f"Skipping {idx} row with trialID {row['trialID']}: Low frequency")
        d+=0
        continue
    
    try:
        if row['probePulseStart'] == row['pulseTrainStart']:
            freq = row['stimFreq']
            isi = int(0.9*Fs / freq)
            print(f"interpolating {idx} row with trialID {row['trialID']}: No probe pulse. {row['probePulseStart']} =/= { row['pulseTrainStart']}")
            row[2000:2000+isi] = row[4620:4620+isi]
            row[2000+isi-100:4620] = np.interp(np.arange(2000+isi-100,4620), [2000+isi-100,4620], [row[2000+isi-100], row[4620]])
            c+=1
            results = plotFig2.deconv(row[49:80049], row['stimFreq'], 0.1, row['pulseTrainStart'], None, noprobepulse=False)
        else:
            results = plotFig2.deconv(row[49:80049], row['stimFreq'], row['probePulseStart'], row['pulseTrainStart'], None, noprobepulse=False)
    except RuntimeError:
        r+=1
        continue
    except FloatingPointError as e:
        print(f"Error at {idx} row with trialID {row['trialID']}: {e}")
        print(row['stimFreq'], row['numSq'], row['probePulseStart'], row['pulseTrainStart'])
        k+=1
        continue
    
    valleyTimes, valleys, peakTimes, peaks = results[2]
    # elementwise difference between peak and valley is PSC
    pscs = np.array(peaks) - np.array(valleys)
    normPSCs = np.array(pscs) / pscs[0]
    normpeaks = np.array(peaks) / peaks[0]
    stimTimes = np.array(results[-1]) / Fs    
    slopes = np.array([(peaks[i] - valleys[i]) / (peakTimes[i] - valleyTimes[i]) for i in range(len(peaks))])
    delays = np.array([(valleyTimes[i] - stimTimes[i]) for i in range(len(valleyTimes))])
    # spikes if peaks are greater than 20
    spikes = np.array([1 if peak > 20 else 0 for peak in peaks])

    for col in newcolumns:
        prefix, col_idx = col.split('_')
        col_idx = int(col_idx)

        if col_idx < len(valleys):  # Ensure index is within bounds
            if prefix == 'valley':
                df.loc[row.name, col] = valleys[col_idx]
            elif prefix == 'valleytime':
                df.loc[row.name, col] = valleyTimes[col_idx]
            elif prefix == 'peak':
                df.loc[row.name, col] = peaks[col_idx]
            elif prefix == 'peaktime':
                df.loc[row.name, col] = peakTimes[col_idx]
            elif prefix == 'slope':
                df.loc[row.name, col] = slopes[col_idx]
            elif prefix == 'delay':
                df.loc[row.name, col] = delays[col_idx]
            elif prefix == 'normpeak':
                df.loc[row.name, col] = normpeaks[col_idx]
            elif prefix == 'PSC':
                df.loc[row.name, col] = pscs[col_idx]
            elif prefix == 'normPSC':
                df.loc[row.name, col] = normPSCs[col_idx]
            elif prefix == 'spike':
                df.loc[row.name, col] = spikes[col_idx]


    # counter for every 100 rows
    if idx % 100 == 0:
        print(f"Processed {idx} rows")

# Assuming df is your final DataFrame
metadata_cols1 = df.iloc[:, :49]
metadata_cols2 = df.iloc[:, 20049:]

# Concatenating the two slices into a new DataFrame
new_df = pd.concat([metadata_cols1, metadata_cols2], axis=1)

# Display the shape of the new DataFrame to verify
print(new_df.shape)
print(f"Kernel errors: {k}, Runtime errors: {r}, No probe pulse: {c}, Low frequency: {d}")

Processing 0 row with trialID 4041_3_1
Processed 0 rows
Processing 1 row with trialID 4041_3_2
Processing 2 row with trialID 4041_3_3
Processing 3 row with trialID 4041_3_4
Processing 4 row with trialID 4041_3_5
Processing 5 row with trialID 4041_3_6
Processing 6 row with trialID 4041_3_7
Processing 7 row with trialID 4041_3_8
Processing 8 row with trialID 4041_3_9
Processing 9 row with trialID 4041_3_10
Processing 10 row with trialID 4041_3_11
Processing 11 row with trialID 4041_3_12
Processing 12 row with trialID 4041_3_13
Processing 13 row with trialID 4041_3_14
Processing 14 row with trialID 4041_3_15
Processing 15 row with trialID 4041_3_16
Processing 16 row with trialID 4041_3_17
Processing 17 row with trialID 4041_3_18
Processing 18 row with trialID 4041_3_19
Processing 19 row with trialID 4041_3_20
Processing 20 row with trialID 4041_3_21
Processing 21 row with trialID 4041_3_22
Processing 22 row with trialID 4041_3_23
Processing 23 row with trialID 4041_3_24
Processing 24 row 

  ret, cov = sci.curve_fit(lambda t,a,tau: a*np.exp(-t/tau), x, y, p0=(pk-baseline,0.02) )


Processed 100 rows
Processing 101 row with trialID 4041_14_30
Processing 102 row with trialID 4041_14_31
Processing 103 row with trialID 4041_14_32
Processing 104 row with trialID 4041_14_33
Processing 105 row with trialID 4041_14_34
Processing 106 row with trialID 4041_14_35
Processing 107 row with trialID 4041_14_36
Processing 108 row with trialID 4041_14_37
Processing 109 row with trialID 4041_14_38
Processing 110 row with trialID 4041_14_39
Processing 111 row with trialID 4041_14_40
Processing 112 row with trialID 4041_14_41
Processing 113 row with trialID 4041_14_42
Processing 114 row with trialID 4041_14_43
Processing 115 row with trialID 4041_14_44
Processing 116 row with trialID 4041_14_45
Processing 117 row with trialID 4041_14_46
negative kernel!
Error at 117 row with trialID 4041_14_46: calcKernel: negative kernel
20 1 0.20000000298023224 0.5
Processing 118 row with trialID 4041_14_47
Processing 119 row with trialID 4041_14_48
Processing 120 row with trialID 4041_14_49
Proce



Processing 152 row with trialID 4041_14_81
Processing 153 row with trialID 4041_14_82
negative kernel!
Error at 153 row with trialID 4041_14_82: calcKernel: negative kernel
20 1 0.20000000298023224 0.5
Processing 154 row with trialID 4041_14_83
Processing 155 row with trialID 4041_14_84
Processing 156 row with trialID 4041_14_85
Processing 157 row with trialID 4041_14_86
Processing 158 row with trialID 4041_14_87
Processing 159 row with trialID 4041_14_88
Processing 160 row with trialID 4041_14_89
Processing 161 row with trialID 4041_14_90
Processing 162 row with trialID 4041_14_91
Processing 163 row with trialID 4041_14_92
Processing 164 row with trialID 4041_14_93
Processing 165 row with trialID 4041_14_94
Processing 166 row with trialID 4041_14_95
Processing 167 row with trialID 4041_14_96
Processing 168 row with trialID 4041_14_97
Processing 169 row with trialID 4041_14_98
Processing 170 row with trialID 4041_14_99
Processing 171 row with trialID 4041_14_100
Processing 172 row with

  ret, cov = sci.curve_fit(lambda t,a,tau: a*np.exp(-t/tau), x, y, p0=(pk-baseline,0.02) )


Processing 310 row with trialID 3951_1_8
Processing 311 row with trialID 3951_1_9
Processing 312 row with trialID 3951_1_10
Processing 313 row with trialID 3951_1_11
Processing 314 row with trialID 3951_1_12
Processing 315 row with trialID 3951_1_13
Processing 316 row with trialID 3951_1_14
Processing 317 row with trialID 3951_1_15
Processing 318 row with trialID 3951_1_16
Processing 319 row with trialID 3951_1_17
Processing 320 row with trialID 3951_1_18
Processing 321 row with trialID 3951_1_19
Processing 322 row with trialID 3951_1_20
Processing 323 row with trialID 3951_1_21
Processing 324 row with trialID 3951_1_22
Processing 325 row with trialID 3951_1_23
Processing 326 row with trialID 3951_1_24
Processing 327 row with trialID 3901_0_1
Processing 328 row with trialID 3901_0_2
Processing 329 row with trialID 3901_0_3
Processing 330 row with trialID 3901_0_4
Processing 331 row with trialID 3901_0_5
Processing 332 row with trialID 3901_0_6
Processing 333 row with trialID 3901_0_7
P

In [7]:
print(new_df.shape)
# save df
new_df.to_hdf(data_path / "all_cells_FreqSweep_CC_kernelfit_response_measurements.h5", key='data', mode='w')
# save as excel
new_df.to_excel(data_path / "all_cells_FreqSweep_CC_kernelfit_response_measurements.xlsx")

# drop those rows from df that have NaN values in column 'valley_0'
new_df = new_df.dropna(subset=['valley_0'])
print(new_df.shape)

(4971, 163)


your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed-integer,key->block5_values] [items->Index(['sex', 'unit', 'location', 'protocol', 'numSq', 'clampMode',
       'clampPotential', 'condition', 'numPatterns', 'patternList',
       'frameChangeTimes', 'pulseTimes', 'fieldData', 'peaks_cell',
       'peaks_cell_norm', 'auc_cell', 'slope_cell', 'delay_cell',
       'peaks_field', 'peaks_field_norm', 'cell_fpr', 'field_fpr', 'cell_ppr',
       'cell_stpr', 'field_ppr', 'field_stpr', 'trialID', 'cellunit',
       'fieldunit'],
      dtype='object')]

  new_df.to_hdf(data_path / "all_cells_FreqSweep_CC_kernelfit_response_measurements.h5", key='data', mode='w')


(4460, 163)


In [8]:
# save df
new_df.to_hdf(data_path / "all_cells_FreqSweep_CC_kernelfit_response_measurements_noNANs.h5", key='data', mode='w')
# save as excel
new_df.to_excel(data_path / "all_cells_FreqSweep_CC_kernelfit_response_measurements_noNANs.xlsx")

your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed-integer,key->block5_values] [items->Index(['sex', 'unit', 'location', 'protocol', 'numSq', 'clampMode',
       'clampPotential', 'condition', 'numPatterns', 'patternList',
       'frameChangeTimes', 'pulseTimes', 'fieldData', 'peaks_cell',
       'peaks_cell_norm', 'auc_cell', 'slope_cell', 'delay_cell',
       'peaks_field', 'peaks_field_norm', 'cell_fpr', 'field_fpr', 'cell_ppr',
       'cell_stpr', 'field_ppr', 'field_stpr', 'trialID', 'cellunit',
       'fieldunit'],
      dtype='object')]

  new_df.to_hdf(data_path / "all_cells_FreqSweep_CC_kernelfit_response_measurements_noNANs.h5", key='data', mode='w')
