In [None]:
# Install necessary packages

!pip install netCDF4

Collecting netCDF4
  Downloading netCDF4-1.7.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.8 kB)
Collecting cftime (from netCDF4)
  Downloading cftime-1.6.4.post1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (8.7 kB)
Downloading netCDF4-1.7.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (9.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.3/9.3 MB[0m [31m52.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading cftime-1.6.4.post1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.4/1.4 MB[0m [31m47.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: cftime, netCDF4
Successfully installed cftime-1.6.4.post1 netCDF4-1.7.2


In [None]:
import os
import netCDF4 as nc  # Importing netCDF4 library
import pandas as pd
import numpy as np
from netCDF4 import Dataset
import h5py

# Align cellular + behavior data

At this point in the pipeline, the behavior and the miniscope timestamps should be aligned and concatened with the EZtrack output. Here we will add the miniscope data to this file, to give all project information (columns) across all frames recorded (rows).

In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [None]:
# Define where the behavior folder (post-alignment) and the cell data folder are stored

behavior_folder = "/content/drive/MyDrive/Restraint  Ketamine/Stress Ket G2 (n=6) Feb 2024/eztrackwithalignment G2"

cell_data_folder = "/content/drive/MyDrive/Restraint  Ketamine/Stress Ket G2 (n=6) Feb 2024/MiniAn/5358/Recombination/nc files"

## Double check file sizes match

In [None]:
# Function to extract animal ID and session number from filename.
def extract_info(filename):
    base = os.path.basename(filename)
    parts = os.path.splitext(base)[0].split('_')
    animal_id = parts[0].replace('minian', '')
    session_number = parts[1]
    return int(animal_id), session_number


def check_variable_lengths(folder_path):
    file_lengths = []
    for file_name in os.listdir(folder_path):
        if file_name.endswith('.nc'):
            try:
                # Extract animal ID and session number from filename
                animal_id, session_number = extract_info(file_name)

                # Load the .nc file
                file_path = os.path.join(folder_path, file_name)
                dataset = nc.Dataset(file_path)

                # Load C, A, and S variables
                C = dataset.variables['C'][:]
                A = dataset.variables['A'][:]
                S = dataset.variables['S'][:]

                # Get lengths of C, A, and S
                C_length = C.shape[1]  # Assuming C is a 2D array, get its width (#frames)
                A_length = A.shape[0]  # Assuming A is a 3D array, get its length (# of cells)
                S_length = S.shape[1]  # Assuming S is a 2D array, get its width (#frames)

                # Close the dataset
                dataset.close()

                # Append information to list
                file_lengths.append((file_name, animal_id, session_number, C_length, A_length, S_length))
            except Exception as e:
                print(f"Error processing {file_name}: {e}")

    return file_lengths

# Get lengths of C, A, and S variables for each .nc file in the folder
variable_lengths = check_variable_lengths(cell_data_folder)

# Print the results
for file_name, animal_id, session_number, C_length, A_length, S_length in variable_lengths:
    print(f"File: {file_name}, Animal ID: {animal_id}, Session Number: {session_number}")
    print(f"Length of C: {C_length}, Length of A: {A_length}, Length of S: {S_length}")
    print()

File: minian5358_s1_minian_dataset_recombined.nc, Animal ID: 5358, Session Number: s1
Length of C: 9058, Length of A: 16, Length of S: 9058

File: minian5358_s4_minian_dataset_recombined.nc, Animal ID: 5358, Session Number: s4
Length of C: 9012, Length of A: 15, Length of S: 9012

File: minian5358_s6_minian_dataset_recombined.nc, Animal ID: 5358, Session Number: s6
Length of C: 8565, Length of A: 16, Length of S: 8565



In [None]:
# Directory containing the .nc files
folder_path = behavior_folder

for filename in os.listdir(folder_path):
    if filename.endswith('.csv'):
        file_path = os.path.join(folder_path, filename)
        df = pd.read_csv(file_path)
        df = df.sort_values(by=['Corresponding_Miniscope_Frame'], ascending=True)
        final_value = df['Corresponding_Miniscope_Frame'].iloc[-1]
        print(f"Final value in {filename}: {final_value}, size is {df.shape[0]}x{df.shape[1]}")

Final value in 5358_1_eztrackwithalignment_NAB.csv: 7493, size is 9058x26
Final value in 5358_4_eztrackwithalignment_NAB.csv: 9011, size is 9012x26
Final value in 5358_6_eztrackwithalignment_NAB.csv: 8564, size is 8565x26


## Combine cell + behavior data into one csv file

In [None]:
# Function to extract animal ID and session number from filename
def extract_info(filename):
    base = os.path.basename(filename)  # Get the base filename
    parts = os.path.splitext(base)[0].split('_')  # Split by '_' and remove extension
    animal_id = parts[0].replace('minian', '')  # Extract animal ID
    session_number = parts[1].lstrip('s')  # Remove 's' from the beginning of session number
    return int(animal_id), session_number

# Function to read behavior data from CSV
def read_behavior_data(csv_file):
    df = pd.read_csv(csv_file)
    return df

# Function to find matching NC file
def find_matching_nc_file(animal_id, session_number, nc_files):
    for nc_file in nc_files:
        animal_id_nc, session_number_nc = extract_info(nc_file)
        if animal_id == animal_id_nc and session_number == session_number_nc:
            return nc_file
    return None

# Function to read neural activity data from NC
def read_neural_activity(nc_file, cors):
    with Dataset(nc_file, 'r') as nc:
        neural_data = nc.variables[cors][:]
    return neural_data

# Define folders
target_folder = "/content/drive/MyDrive/Restraint  Ketamine/Stress Ket G2 (n=6) Feb 2024/C + S "  # Specify the folder where you want to save the combined CSV files

# Get list of files in both folders
csv_files = os.listdir(behavior_folder)
nc_files = os.listdir(cell_data_folder)

# Assuming file names are structured similarly for matching
for csv_file in csv_files:
    animal_id, session_number = extract_info(csv_file)
    matching_nc_file = find_matching_nc_file(animal_id, session_number, nc_files)

    if matching_nc_file is not None:
        # Read behavior data
        behavior_data = read_behavior_data(os.path.join(behavior_folder, csv_file))

        # Read neural activity data
        neural_data_s = read_neural_activity(os.path.join(cell_data_folder, matching_nc_file), 'S')
        neural_data_c = read_neural_activity(os.path.join(cell_data_folder, matching_nc_file), 'C')

        # Ensure the alignment based on frame indices
        aligned_neural_data_s = []
        aligned_neural_data_c = []
        for frame_idx in behavior_data['Corresponding_Miniscope_Frame']:
            if frame_idx < neural_data_s.shape[1]:
                aligned_neural_data_s.append(neural_data_s[:, frame_idx])
                aligned_neural_data_c.append(neural_data_c[:, frame_idx])
            else:
                # Handle case where frame index exceeds neural data length
                aligned_neural_data_s.append([np.nan] * neural_data_s.shape[0])  # Placeholder or NaN
                aligned_neural_data_c.append([np.nan] * neural_data_c.shape[0])  # Placeholder or NaN

        # Convert aligned neural data to DataFrame
        aligned_neural_df_s = pd.DataFrame(aligned_neural_data_s, columns=[f'neuron_{i+1}' for i in range(neural_data_s.shape[0])])
        aligned_neural_df_c = pd.DataFrame(aligned_neural_data_c, columns=[f'neuron_{i+1}' for i in range(neural_data_c.shape[0])])

        # Combine behavior_data and aligned_neural_df into a single DataFrame
        combined_data_s = pd.concat([behavior_data.reset_index(drop=True), aligned_neural_df_s], axis=1)
        combined_data_c = pd.concat([behavior_data.reset_index(drop=True), aligned_neural_df_c], axis=1)

        # Construct the full path to the target CSV file
        target_csv_file_s = os.path.join(target_folder, f'{animal_id}_{session_number}_combined_S+beh_NAB.csv')
        target_csv_file_c = os.path.join(target_folder, f'{animal_id}_{session_number}_combined_C+beh_NAB.csv')

        # Save combined data to the target folder
        combined_data_s.to_csv(target_csv_file_s, index=False)
        combined_data_c.to_csv(target_csv_file_c, index=False)
    else:
        print(f"No matching NC file found for {csv_file}.")


##Rename neuron columns based on CellReg output

Before this step, CellReg has been run (by NAB). The entire cellreg folder was uploaded to Drive for each animal.

^^ **9/18/24 note, NAB cellreg outpit does not have cell score**

- The alignment file (.mat) should be renamed to have the animal id. It it important to note you will need to reference the session order from the log file and manually assign these to the corresponding .csv file that we just generated.


###Duplicate this for each animal. Update the
- subject_id
-session_order
- cellreg file

Do not run more than once for each animal. If you accidentally run twice, re-run the previous step.

In [None]:
# Define the subject ID, session numbers, and the folder path
subject_id = ['5358']  # Replace with the  subject ID
# ^^^ use the cellreg log to get this information
folder_path = '/content/drive/MyDrive/Restraint  Ketamine/Stress Ket G2 (n=6) Feb 2024/C + S '  # folder path with cell+beh data
cellreg_path = '/content/drive/MyDrive/Restraint  Ketamine/Stress Ket G2 (n=6) Feb 2024/CellReg/5358'

for subject in subject_id:
  #Loop through the cellsregistered matrix
  cellreg_filename = os.path.join(cellreg_path, f'{subject}_cellRegistered.mat')
  cellreg = h5py.File(cellreg_filename, 'r')
  group = cellreg['cell_registered_struct']
  cell_to_index_map = group['cell_to_index_map']
  cellsregistered = cell_to_index_map[:]
  if subject == '5358':
    session_order = [1, 4, 6]
  else:
    session_order = [1, 2, 4, 6]

  for session_index in range(cellsregistered.shape[0]):  # Iterate over the rows
      session_num = session_order[session_index]  # Get the corresponding session number
      csv_filename_s = os.path.join(folder_path, f"{subject}_{session_num}_combined_S+beh_NAB.csv")

      if os.path.exists(csv_filename_s):
          # Read the CSV file
          df = pd.read_csv(csv_filename_s)

          # Identify existing cell columns
          cell_columns = [col for col in df.columns if col.startswith('neuron_')]
          new_header = df.columns.tolist()  # Start with the existing headers

          # Update only the cell columns based on cellsregistered for the current session
          for index in range(len(cellsregistered[session_index])):  # Iterate over the number of registered cells
              cell_index = int(cellsregistered[session_index][index])  # Get the corresponding cell index (which column we need to rename)
              #print(cell_index)
              if cell_index != 0 and cell_index < len(cell_columns):  # Only rename if the cell is active and within bounds
                  #Create a new name for the active cell
                  original_column = cell_columns[cell_index-1]  # Get the original column name
                  #print (original_column)
                  new_header[df.columns.get_loc(original_column)] = f"neuron_{index + 1}"  # Update with new name
                  #print (new_header[df.columns.get_loc(original_column)])
          # Assign the new header to the DataFrame
          df.columns = new_header

          # Save the updated DataFrame back to the CSV
          df.to_csv(csv_filename_s, index=False)
      else:
          print(f"File {csv_filename_s} does not exist.")

  #Loop through the cellsregistered matrix
  for session_index in range(cellsregistered.shape[0]):  # Iterate over the rows
      session_num = session_order[session_index]  # Get the corresponding session number
      csv_filename_c = os.path.join(folder_path, f"{subject}_{session_num}_combined_C+beh_NAB.csv")

      if os.path.exists(csv_filename_c):
          # Read the CSV file
          df = pd.read_csv(csv_filename_c)

          # Identify existing cell columns
          cell_columns = [col for col in df.columns if col.startswith('neuron_')]
          new_header = df.columns.tolist()  # Start with the existing headers

          # Update only the cell columns based on cellsregistered for the current session
          for index in range(len(cellsregistered[session_index])):  # Iterate over the number of registered cells
              cell_index = int(cellsregistered[session_index][index])  # Get the corresponding cell index (which column we need to rename)
              #print(cell_index)
              if cell_index != 0 and cell_index < len(cell_columns):  # Only rename if the cell is active and within bounds
                  #Create a new name for the active cell
                  original_column = cell_columns[cell_index-1]  # Get the original column name
                  #print (original_column)
                  new_header[df.columns.get_loc(original_column)] = f"neuron_{index + 1}"  # Update with new name
                  #print (new_header[df.columns.get_loc(original_column)])
          # Assign the new header to the DataFrame
          df.columns = new_header

          # Save the updated DataFrame back to the CSV
          df.to_csv(csv_filename_c, index=False)
      else:
          print(f"File {csv_filename_c} does not exist.")