In [5]:
# The First Step is to Activate the Kernal or conda environment within this file
# In VS Code, click "Select Kernal" in the top right
# Select Python Environments then activate the Task2 Kernal (you should now see "Task2 (Python 3.10.15)")

In [2]:
# Next you must make sure you can run Jupyter Notebooks
# Run this cell, you may be prompted to install necessary software. Please do so.

print("hello world")

hello world


In [3]:
# Import Modules
# You previously install libraries into your conda environment. You must now import those libraries into this script.

import numpy as np
from matplotlib import pyplot as plt
import spikeinterface as si
from spikeinterface import widgets, exporters, postprocessing, qualitymetrics, sorters
import pickle
import os
import pandas as pd

In [4]:
# Get Directory information
# Next you must establish the directories used. This will allow python to accesss the recording and sorting data.

current_directory = "\\Users\\ivank\\OneDrive\\Desktop\\Task 2" # different for each device
output_folder = "utah_organoids_output"

In [5]:
# Understanding how to navigate the output folder
# This folder is organized by organoid_id/drug_name/paramset_idx/sorter_name/
# In the scope of this Task, some of these organization folders are unesscesary
# I also understand we aren't dealing with organoids. But this is how our sessions are organized so please use this format.

organoid_id = "MB01" # Mouse Brain 1
drug_name = "Control" # All sessions are controlled in this data (no drugs used)
sorter_name = "spykingcircus2"

In [6]:
# Create output directories to access the data

def get_output_dir(organoid_id, drug_name, sorter_name):

    sorter_to_paramset = { 
        "spykingcircus2":30,
        "tridesclous2":26
    } # Each paramset index contains the default parameters for each of the spike sorters. 26 and 30 are arbitrary numbers to call upon those parameters.
    paramset_idx = sorter_to_paramset[sorter_name]    

    output_dir = "/".join([current_directory, output_folder, organoid_id, drug_name, str(paramset_idx), sorter_name])
    return output_dir

print(get_output_dir(organoid_id, drug_name, sorter_name))

\Users\ivank\OneDrive\Desktop\Task 2/utah_organoids_output/MB01/Control/30/spykingcircus2


In [8]:
# The data avaiable for you to access is in npy and csv files. Functions will be avaiable to fetch this data given a directory. 
# Combine the output dir with one listed below to access these files.

# Different sorters store different information in different formats. Here are dictionaries with dirs for all accesible information

# spykingcircus2
spykingcircus_dirs = {
    "spikes_dir": "spike_sorting/sorter_output/sorting", # spikes.npy
    "peaks_dir": "spike_sorting/sorter_output/motion", # peaks.npy , peak_locations.npy
    "motion_dir": "spike_sorting/sorter_output/motion/motion", # displacement_seg0.npy , spatial_bins_um.npy , temporal_bins_s_seg0.npy
    "sparsity_dir": "sorting_analyzer", # sparsity_mask.npy
}
tridesclous_dirs = {
    "spikes_dir": "spike_sorting/sorter_output/sorting", # spikes.npy
    "peaks_dir": "spike_sorting/sorter_output", # all_peaks.npy, clustering_label.npy, noise_levels.npy, peaks.npy, spikes.npy
    "sparsity_dir": "spike_sorting/sorter_output/features", # peaks.npy, sparse_mask.npy, sparse_tsvd.npy, sparse_wfs.npy
    "pre-peeler_dir": "spike_sorting/sorter_output/sorting_pre_peeler" # spikes.npy
}

# We also ran a sorting analyzer (done by spikeinterface) to extract important information. This format is global across sorters.
def extensions_dir(extension: str): # both sorters

    extensions = [
        "amplitude_scalings", # amplitude_scalings.npy , collision_mask.npy
        "correlograms", # bins.npy , ccgs.npy
        "isi_histograms", # bins.npy , isi_histograms.npy
        "noise_levels", # noise_levels.npy
        "principal_components", # pca_projection.npy
        "quality_metrics", # metrics.csv
        "random_spikes", # random_spikes_indices.npy
        "spike_amplitudes", # amplitudes.npy
        "spike_locations", # spike_locations.npy
        "template_metrics", # metrics.csv
        "template_similarity", # simlarity.npy
        "templates", # average.npy , std.npy
        "unit_locations", # unit_locations.npy
        "waveforms", # waveforms.npy
    ]

    if extension in extensions:
        return f"sorting_analyzer/extensions/{extension}"
    
    else:
        raise("Enter a Valid Extension")


In [9]:
# Fetch data functions
# Use these functions to access the needed data:
# You need to input a datapath (made from the options above)
# The function will output the files contents. In the event there are two files within the directory, the function will output a dictionary of both
# files. If you specify a specific file it will output that files contents.

def fetch_npy(path_to_folder: str , file=None):

    file_names = os.listdir(path_to_folder)
    npy_files = [file for file in file_names if file.endswith('.npy')]

    if len(npy_files) == 1: 
        # If only one numpy file, return the numpy array
        return np.load(path_to_folder + "/" + npy_files[0])
    
    elif len(npy_files) > 1:

        if file is None:
            
            numpy_dict = {}
            for npy_file in npy_files:
                numpy_dict[npy_file] = np.load(path_to_folder + "/" + npy_file)
            return numpy_dict    
        
        else:
            return np.load(path_to_folder + "/" + file)
        
    else:
        raise("No npy files in the given directory")


def fetch_csv(path_to_folder: str , file=None):

    file_names = os.listdir(path_to_folder)
    csv_files = [file for file in file_names if file.endswith('.csv')]

    if len(csv_files) == 1: 
        # If only one numpy file, return the numpy array
        return np.load(path_to_folder + "/" + csv_files[0])
    
    elif len(csv_files) > 1:

        if file is None:
            
            csv_dict = {}
            for csv_file in csv_files:
                csv_dict[csv_file] = pd.read_csv(path_to_folder + "/" + csv_file)
            return csv_dict    
        
        else:
            return pd.read_csv(path_to_folder + "/" + file)
        
    else:
        raise("No csv files in the given directory")



In [38]:
# You must now analyze the data provided. Good luck!