In [58]:
import os
from pathlib import Path
from typing import Dict

import pandas as pd
import numpy as np

In [114]:
def get_general_metadata_df(path_to_recordings_dir: Path) -> pd.DataFrame:
    metadata_df = pd.read_excel(path_to_recordings_dir.joinpath(f'{path_to_recordings_dir.name}.xlsx'),
                                sheet_name = 'General information')
    general_metadata = {'date': path_to_recordings_dir.name[:10],
                        'session_cell_id': path_to_recordings_dir.name[path_to_recordings_dir.name.rfind('_') + 1:],
                        'mouse_line': metadata_df['Animal line'][0],
                        'brain_region': metadata_df['Region'][0],
                        'cell_type': metadata_df['Type'][0],
                        'sex': metadata_df['Sex'][0]}
    return pd.DataFrame(general_metadata, index=[0])

In [125]:
def get_stimulation_paradigms_df(path_to_recordings_dir: Path) -> pd.DataFrame:
    filepaths_stimulation_recordings = list()
    for elem in path_to_recordings_dir.iterdir():
        if 'datapoints' not in elem.name:
            if elem.name.endswith('.csv'):
                filepaths_stimulation_recordings.append(elem)
    stimulation_paradigms = {'stimulation_string': list(),
                             'stimulation_frequency-Hz': list(),
                             'stimulation_duration-ms': list(), 
                             'filepath_detected_events': list()}
    for filepath in filepaths_stimulation_recordings:
        filename = filepath.name
        filename = filename.replace('.csv', '')
        yyyy_mm_dd_ = filename[:11]
        filename = filename.replace(yyyy_mm_dd_, '')
        cell_id = filename[:filename.index('_')]
        filename = filename.replace(cell_id + '_', '')
        if 'Hz' in filename:
            stimulation_frequency = int(filename[:filename.index('Hz')])
            stimulation_duration = filename[filename.index('Hz') + 2 :]
            if 'ms' not in stimulation_duration:
                stimulation_duration = int(stimulation_duration[:stimulation_duration.find('s')]) * 1000
            else:
                stimulation_duration = int(stimulation_duration[:stimulation_duration.find('ms')])
            stimulation_paradigm = f'{stimulation_frequency}-Hz_for_{stimulation_duration}-ms'
        elif 'Bsl' in filename:
            stimulation_frequency = np.NaN
            stimulation_duration = np.NaN
            stimulation_paradigm = 'baseline'
        else:
            print(f'Warning: stimulation paradigm could not be identified for: {filepath.name}')
            stimulation_frequency = np.NaN
            stimulation_duration = np.NaN
            stimulation_paradigm = 'unknown'
        stimulation_paradigms['stimulation_string'].append(stimulation_paradigm)
        stimulation_paradigms['stimulation_frequency-Hz'].append(stimulation_frequency)
        stimulation_paradigms['stimulation_duration-ms'].append(stimulation_duration)
        stimulation_paradigms['filepath_detected_events'].append(filepath)
    return pd.DataFrame(stimulation_paradigms)

In [143]:
global_cell_id = 1

In [145]:
str(global_cell_id).zfill(4)

'0001'

In [160]:
def get
    general_metadata = get_general_metadata_df(cell_recording_dir)
    stimulation_paradigms = get_stimulation_paradigms_df(cell_recording_dir)
    stimulations_adjusted_general_metadata = pd.concat([general_metadata]*stimulation_paradigms.shape[0], ignore_index=True)
    recordings_overview = pd.concat([stimulations_adjusted_general_metadata, stimulation_paradigms], axis=1)
    recordings_overview['global_cell_id'] = str(global_cell_id).zfill(4)


In [161]:
columns = ['global_cell_id', 'date', 'session_cell_id', 'mouse_line', 'sex', 'brain_region', 'cell_type', 
           'stimulation_string', 'stimulation_frequency-Hz', 'stimulation_duration-ms', 'filepath_detected_events']

In [162]:
recordings_overview[columns]

In [163]:
recordings_overview

Unnamed: 0,global_cell_id,date,session_cell_id,mouse_line,sex,brain_region,cell_type,stimulation_string,stimulation_frequency-Hz,stimulation_duration-ms,filepath_detected_events
0,1,2021_12_09,C2,NET-cre:vGat-flp,Female,vlPAG,Unknown,20-Hz_for_100-ms,20.0,100.0,/mnt/c/Users/dsege/TEMP/Alexia/NET-cre_vGat-fl...
1,1,2021_12_09,C2,NET-cre:vGat-flp,Female,vlPAG,Unknown,20-Hz_for_500-ms,20.0,500.0,/mnt/c/Users/dsege/TEMP/Alexia/NET-cre_vGat-fl...
2,1,2021_12_09,C2,NET-cre:vGat-flp,Female,vlPAG,Unknown,4-Hz_for_1000-ms,4.0,1000.0,/mnt/c/Users/dsege/TEMP/Alexia/NET-cre_vGat-fl...
3,1,2021_12_09,C2,NET-cre:vGat-flp,Female,vlPAG,Unknown,4-Hz_for_5000-ms,4.0,5000.0,/mnt/c/Users/dsege/TEMP/Alexia/NET-cre_vGat-fl...
4,1,2021_12_09,C2,NET-cre:vGat-flp,Female,vlPAG,Unknown,baseline,,,/mnt/c/Users/dsege/TEMP/Alexia/NET-cre_vGat-fl...


In [137]:
?pd.concat

[0;31mSignature:[0m
[0mpd[0m[0;34m.[0m[0mconcat[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mobjs[0m[0;34m:[0m [0;34m'Iterable[NDFrame] | Mapping[Hashable, NDFrame]'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0maxis[0m[0;34m=[0m[0;36m0[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mjoin[0m[0;34m=[0m[0;34m'outer'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mignore_index[0m[0;34m:[0m [0;34m'bool'[0m [0;34m=[0m [0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mkeys[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mlevels[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mnames[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mverify_integrity[0m[0;34m:[0m [0;34m'bool'[0m [0;34m=[0m [0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0msort[0m[0;34m:[0m [0;34m'bool'[0m [0;34m=[0m [0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mcopy[0m[0;34m:[0m [0;34m'b

In [103]:
stimulation_paradigms

{'stimulation_string': ['20-Hz_for_100-ms',
  '20-Hz_for_500-ms',
  '4-Hz_for_1000-ms',
  '4-Hz_for_5000-ms',
  'baseline'],
 'stimulation_frequency': [20, 20, 4, 4, nan],
 'stimulation_duration': [100, 500, 1000, 5000, nan],
 'filepath_detected_events': [PosixPath('/mnt/c/Users/dsege/TEMP/Alexia/NET-cre_vGat-flp/2021_12_09_C2/2021_12_09_C2_20Hz100ms.csv'),
  PosixPath('/mnt/c/Users/dsege/TEMP/Alexia/NET-cre_vGat-flp/2021_12_09_C2/2021_12_09_C2_20Hz500ms.csv'),
  PosixPath('/mnt/c/Users/dsege/TEMP/Alexia/NET-cre_vGat-flp/2021_12_09_C2/2021_12_09_C2_4Hz1s.csv'),
  PosixPath('/mnt/c/Users/dsege/TEMP/Alexia/NET-cre_vGat-flp/2021_12_09_C2/2021_12_09_C2_4Hz5s.csv'),
  PosixPath('/mnt/c/Users/dsege/TEMP/Alexia/NET-cre_vGat-flp/2021_12_09_C2/2021_12_09_C2_Bsl.csv')]}

In [86]:
df_cell = pd.DataFrame(recording_info)

In [87]:
df_cell

Unnamed: 0,mouse_line,brain_region,cell_type,sex,stimulation_paradigms,date,session_cell_id
20Hz_for_100ms,NET-cre:vGat-flp,vlPAG,Unknown,Female,/mnt/c/Users/dsege/TEMP/Alexia/NET-cre_vGat-fl...,2021_12_09,C2
20Hz_for_500ms,NET-cre:vGat-flp,vlPAG,Unknown,Female,/mnt/c/Users/dsege/TEMP/Alexia/NET-cre_vGat-fl...,2021_12_09,C2
4Hz_for_1s,NET-cre:vGat-flp,vlPAG,Unknown,Female,/mnt/c/Users/dsege/TEMP/Alexia/NET-cre_vGat-fl...,2021_12_09,C2
4Hz_for_5s,NET-cre:vGat-flp,vlPAG,Unknown,Female,/mnt/c/Users/dsege/TEMP/Alexia/NET-cre_vGat-fl...,2021_12_09,C2
baseline,NET-cre:vGat-flp,vlPAG,Unknown,Female,/mnt/c/Users/dsege/TEMP/Alexia/NET-cre_vGat-fl...,2021_12_09,C2


In [57]:
cell_recording_dir = Path('/mnt/c/Users/dsege/TEMP/Alexia/NET-cre_vGat-flp/2021_12_09_C2/')



metadata_df = pd.read_excel(cell_recording_dir.joinpath(f'{cell_recording_dir.name}.xlsx'),
                            sheet_name = 'General information')

mix_and_matchable_categories = dict()

general_metadata = {'mouse_line': metadata_df['Animal line'][0],
                    'brain_region': metadata_df['Region'][0],
                    'cell_type': metadata_df['Type'][0],
                    'sex': metadata_df['Sex'][0]}

filepaths_stimulation_recordings = list()
for elem in cell_recording_dir.iterdir():
    if 'datapoints' not in elem.name:
        if elem.name.endswith('.csv'):
            filepaths_stimulation_recordings.append(elem)


stimulation_paradigms = dict()
for filepath in filepaths_stimulation_recordings:
    filename = filepath.name
    filename = filename.replace('.csv', '')
    yyyy_mm_dd_ = filename[:11]
    filename = filename.replace(yyyy_mm_dd_, '')
    cell_id = filename[:filename.index('_')]
    filename = filename.replace(cell_id + '_', '')
    if 'Hz' in filename:
        stimulation_frequency = filename[:filename.index('Hz') + 2]
        stimulation_duration = filename[filename.index('Hz') + 2 :]
        stimulation_paradigm = f'{stimulation_frequency}_for_{stimulation_duration}'
    elif 'Bsl' in filename:
        stimulation_paradigm = 'baseline'
    else:
        print(f'Warning: stimulation paradigm could not be identified for: {filepath.name}')
        stimulation_paradigm = 'unknown'
    stimulation_paradigms[stimulation_paradigm] = filepath

In [23]:
filepaths_stimulation_recordings

[PosixPath('/mnt/c/Users/dsege/TEMP/Alexia/NET-cre_vGat-flp/2021_12_09_C2/2021_12_09_C2_20Hz100ms.csv'),
 PosixPath('/mnt/c/Users/dsege/TEMP/Alexia/NET-cre_vGat-flp/2021_12_09_C2/2021_12_09_C2_20Hz500ms.csv'),
 PosixPath('/mnt/c/Users/dsege/TEMP/Alexia/NET-cre_vGat-flp/2021_12_09_C2/2021_12_09_C2_4Hz1s.csv'),
 PosixPath('/mnt/c/Users/dsege/TEMP/Alexia/NET-cre_vGat-flp/2021_12_09_C2/2021_12_09_C2_4Hz5s.csv'),
 PosixPath('/mnt/c/Users/dsege/TEMP/Alexia/NET-cre_vGat-flp/2021_12_09_C2/2021_12_09_C2_Bsl.csv')]

In [56]:
stimulation_paradigms

{'20Hz_for_100ms': PosixPath('/mnt/c/Users/dsege/TEMP/Alexia/NET-cre_vGat-flp/2021_12_09_C2/2021_12_09_C2_20Hz100ms.csv'),
 '20Hz_for_500ms': PosixPath('/mnt/c/Users/dsege/TEMP/Alexia/NET-cre_vGat-flp/2021_12_09_C2/2021_12_09_C2_20Hz500ms.csv'),
 '4Hz_for_1s': PosixPath('/mnt/c/Users/dsege/TEMP/Alexia/NET-cre_vGat-flp/2021_12_09_C2/2021_12_09_C2_4Hz1s.csv'),
 '4Hz_for_5s': PosixPath('/mnt/c/Users/dsege/TEMP/Alexia/NET-cre_vGat-flp/2021_12_09_C2/2021_12_09_C2_4Hz5s.csv'),
 'baseline': PosixPath('/mnt/c/Users/dsege/TEMP/Alexia/NET-cre_vGat-flp/2021_12_09_C2/2021_12_09_C2_Bsl.csv')}

In [36]:
?filename.replace

[0;31mSignature:[0m [0mfilename[0m[0;34m.[0m[0mreplace[0m[0;34m([0m[0mold[0m[0;34m,[0m [0mnew[0m[0;34m,[0m [0mcount[0m[0;34m=[0m[0;34m-[0m[0;36m1[0m[0;34m,[0m [0;34m/[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m
Return a copy with all occurrences of substring old replaced by new.

  count
    Maximum number of occurrences to replace.
    -1 (the default value) means replace all occurrences.

If the optional argument count is given, only the first count occurrences are
replaced.
[0;31mType:[0m      builtin_function_or_method


In [12]:
metadata_df['Animal line'][0]

'NET-cre:vGat-flp'

In [13]:
metadata_df

Unnamed: 0,Recording Folder,Animal line,Sex,DOS,DOB,Age (days),Time of sacrifice,DOI,Cell number,Type,Region,Stimulation (%),Internal used
0,2021_12_09,NET-cre:vGat-flp,Female,2021-12-09,2021-07-15,147,11:30:00,2021-11-11,2,Unknown,vlPAG,15,Kglu + 1.73ug bio


In [49]:
df = pd.read_excel(cell_recording_dir.as_posix() + '/' + cell_recording_dir.name + '.xlsx', sheet_name = 'General information')
df

Unnamed: 0,Recording Folder,Animal line,Sex,DOS,DOB,Age (days),Time of sacrifice,DOI,Cell number,Type,Region,Stimulation (%),Internal used
0,2021_12_09,NET-cre:vGat-flp,Female,2021-12-09,2021-07-15,147,11:30:00,2021-11-11,2,Unknown,vlPAG,15,Kglu + 1.73ug bio


In [59]:
df = pd.read_excel(cell_recording_dir.as_posix() + '/' + cell_recording_dir.name + '.xlsx', sheet_name = 'Recordings and cell properties', skiprows=2)
df.drop(['Unnamed: 0'], axis=1, inplace=True)
df.columns

Index(['Recording file', 'Time of protocol', 'Time since cutting', 'Vh (mV)',
       'Type of protocol', 'Timing of stim (s)', 'Leak (pA)', 'Cp Fast (pF)',
       'Series R (MΩ)', 'Pharmacology', 'Cap I (pA)', 'Tau (ms)', 'Rs (MΩ)',
       'Cell Cap (pF)', 'Iinf(nA)', 'Rm (MΩ)', 'Access change (%)',
       'Total time (s)', 'Time of counting (s) ', 'Nb of  sweeps',
       'Mean amp (pA)', 'Rise time 10-90 (ms)', 'Decay time 63% (ms)',
       'Half width (ms)'],
      dtype='object')

In [47]:
dfs['General information']

Unnamed: 0,Recording Folder,Animal line,Sex,DOS,DOB,Age (days),Time of sacrifice,DOI,Cell number,Type,Region,Stimulation (%),Internal used
0,2021_12_09,NET-cre:vGat-flp,Female,2021-12-09,2021-07-15,147,11:30:00,2021-11-11,2,Unknown,vlPAG,15,Kglu + 1.73ug bio


In [48]:
dfs['Recordings and cell properties']

Unnamed: 0,Notes:,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,...,535 to 590,Unnamed: 16,Unnamed: 17,650 - 20000,Unnamed: 19,Unnamed: 20,Peak 2 - 1 - 6,Bsln 2.5- win 4 - Skip 3,Unnamed: 23,650 - 20000.1
0,Notes:,,,,,,,Noted from the oscilloscope,,,...,Avg plateau,,,,,,Threshold 3,,,
1,,Recording file,Time of protocol,Time since cutting,Vh (mV),Type of protocol,Timing of stim (s),Leak (pA),Cp Fast (pF),Series R (MΩ),...,Iinf(nA),Rm (MΩ),Access change (%),Total time (s),Time of counting (s),Nb of sweeps,Mean amp (pA),Rise time 10-90 (ms),Decay time 63% (ms),Half width (ms)
2,,2021_12_09_008,14:46:00,03:16:00,-70,Bsl,,-2,7,15,...,5.93,4.467127,,272,34,8,8.59,0.82,1.85,3.66
3,,2021_12_09_009,14:52:00,03:22:00,-70,4 Hz,1,-2,6,15,...,6.58,7.206247,-29.481263,313,31.3,10,8.71,1.21,1.71,3.56
4,,2021_12_09_010,15:02:00,03:32:00,-70,4 Hz,5,3,6,15,...,6.71,7.273929,-29.788936,270,27,10,8.38,1.25,1.98,3.93
5,,2021_12_09_011,15:13:00,03:43:00,-70,20 Hz,0.1,-6,6,15,...,7.6,7.370798,-29.161282,321,32.1,10,8,1.3,2.17,4.12
6,,2021_12_09_012,15:24:00,03:54:00,-70,20 Hz,0.5,4,6,15,...,11.79,7.590642,-27.08141,222.6,31.8,7,7.75,1.36,2.22,4.32
7,,,,1899-12-29 12:30:00,,,,,,,...,,,,0,,,,,,
8,,,,,,,,,,,...,,,,0,,,,,,
9,,,,,,,,,,,...,,,,0,,,,,,


In [37]:
for elem in cell_recording_dir.iterdir():
    print(elem)

/mnt/c/Users/dsege/TEMP/Alexia/NET-cre_vGat-flp/2021_12_09_C2/2021_12_09_C2.xlsx
/mnt/c/Users/dsege/TEMP/Alexia/NET-cre_vGat-flp/2021_12_09_C2/2021_12_09_C2_20Hz100ms.csv
/mnt/c/Users/dsege/TEMP/Alexia/NET-cre_vGat-flp/2021_12_09_C2/2021_12_09_C2_20Hz100ms_AVG.png
/mnt/c/Users/dsege/TEMP/Alexia/NET-cre_vGat-flp/2021_12_09_C2/2021_12_09_C2_20Hz100ms_datapoints.csv
/mnt/c/Users/dsege/TEMP/Alexia/NET-cre_vGat-flp/2021_12_09_C2/2021_12_09_C2_20Hz100ms_EVENTS.png
/mnt/c/Users/dsege/TEMP/Alexia/NET-cre_vGat-flp/2021_12_09_C2/2021_12_09_C2_20Hz500ms.csv
/mnt/c/Users/dsege/TEMP/Alexia/NET-cre_vGat-flp/2021_12_09_C2/2021_12_09_C2_20Hz500ms_AVG.png
/mnt/c/Users/dsege/TEMP/Alexia/NET-cre_vGat-flp/2021_12_09_C2/2021_12_09_C2_20Hz500ms_datapoints.csv
/mnt/c/Users/dsege/TEMP/Alexia/NET-cre_vGat-flp/2021_12_09_C2/2021_12_09_C2_20Hz500ms_EVENTS.pdf
/mnt/c/Users/dsege/TEMP/Alexia/NET-cre_vGat-flp/2021_12_09_C2/2021_12_09_C2_4Hz1s.csv
/mnt/c/Users/dsege/TEMP/Alexia/NET-cre_vGat-flp/2021_12_09_C2/2021_12

In [35]:
pd.read_csv(filepath)

Unnamed: 0,EV_WaveN_RS1_A0,EV_ThreshT_RS1_A0,EV_ThreshY_RS1_A0,EV_PeakT_RS1_A0,EV_PeakY_RS1_A0,EV_BaseY_RS1_A0,EV_AmpY_RS1_A0
0,0,3441.7,-4.473760,3442.4,-10.65330,-1.331690,-9.32157
1,0,3534.8,-5.068700,3535.8,-9.64638,-1.932240,-7.71414
2,0,3575.0,-4.977810,3575.6,-10.43300,-1.432070,-9.00091
3,0,3607.5,-4.416650,3608.2,-9.70576,-1.292380,-8.41338
4,0,3753.6,-5.167740,3754.4,-17.78960,-1.402520,-16.38710
...,...,...,...,...,...,...,...
816,9,19229.1,-2.210830,19230.3,-3.21420,0.829616,-4.04381
817,9,19369.7,-0.066215,19370.2,-2.55783,3.129000,-5.68683
818,9,19485.4,-1.393950,19487.3,-10.27540,2.042480,-12.31790
819,9,19616.2,-5.867590,19616.8,-8.44465,-2.214410,-6.23025


In [4]:
path = Path('/mnt/c/Users/dsege/TEMP/Alexia/')

In [7]:
for elem in path.iterdir():
    print(elem)

/mnt/c/Users/dsege/TEMP/Alexia/analysis_with_dcl_patch
/mnt/c/Users/dsege/TEMP/Alexia/NET-cre_vGat-flp


In [12]:
for elem in path.glob(pattern='*'):
    print(elem)

/mnt/c/Users/dsege/TEMP/Alexia/analysis_with_dcl_patch
/mnt/c/Users/dsege/TEMP/Alexia/NET-cre_vGat-flp


In [13]:
path.parts

('/', 'mnt', 'c', 'Users', 'dsege', 'TEMP', 'Alexia')

In [20]:
database = Database(path)

In [19]:
import os
from pathlib import Path
from abc import ABC, abstractmethod

class Feature(ABC):
    
    @abstractmethod
    def add_to_database(self):
        pass
    

class CellRecording(Feature):
    
    def add_to_database(self):
        pass
    
    
class Subdirectories:
    
    def __init__(self, root_dir: Path):
        self.root_dir = root_dir
        self.create_missing_subdirectories()
        self.assign_subdirectories_as_attributes()
        
    def create_missing_subdirectories(self):
        # check for each element in a list of subdirs, whether they exist --> create if not
        pass
    
    def assign_subdirectories_as_attributes(self):
        # use list of subdirs and set the path to each as attribute
        pass   


class Database:
    
    """
    The database is supposed to hold all information about all recorded cells that were added to it.
    These information exclude the raw data (only contain filepaths to raw data),
    but include general metadata like celltype, stimulus type, brain region, pharamacological treatment.
    Potentially, some intermediate data could be added (like how the cell reacted upon stimulation, see dashboard).
    Overall, the database should allow the selection of cells based on mix-and-match criteria 
    for further (statistical) analyses and plotting.
    """
    
    def __init__(self, root_dir: Path):
        self.root_dir = root_dir
        self.subdirectories = Subdirectories(root_dir = root_dir)

    def load_database_from_disk(self):
        # Option to load previously created and saved database
        pass
    
    def save_database_to_disk(self):
        # Save all information to disk
        pass
    
    def add_new_feature(self, new_feature: Feature):
        # By providing the Path to a new directory, the user has the option to add new recordings to the database
        # should trigger the database to update itself (e.g. categories dictionary)
        pass


    

        
