# Loop through all subfolders in the inputfolder

# Mecp2 Split

In [1]:
import os
import shutil
import numpy as np
import pandas as pd
import re

In [3]:
# CSV Version

# Define paths
inputfolder_path = r'Z:\Yin (yy433)\BatchSuite2p'
input_folder = r'C:\Users\sandbox\Downloads\splitNPY'
CellTypeExcel_path = r'C:\Users\sandbox\Downloads\splitNPY\CellType_Priority2'

# Get all CellType CSV files
csv_files = [f for f in os.listdir(CellTypeExcel_path) if f.endswith('.csv')]

# Keep track of folder names encountered
folder_names_in_suite2p = set()
# List to store summary info
summary_data = []

# Traverse all subdirectories in the input folder
for root, dirs, files in os.walk(inputfolder_path):
    if 'suite2p' in root and 'plane0' in root:
        suite2pfolder = root  # The path to suite2p/plane0
        
        # Extract OPME folder name from the path
        folder_name = os.path.basename(os.path.dirname(os.path.dirname(suite2pfolder)))
        folder_names_in_suite2p.add(folder_name)
        # Extract DIV (age) from folder_name
        match = re.search(r'DIV(\d+)', folder_name)
        DIV = int(match.group(1)) if match else None

        # Match folder_name with any CSV file
        matched_csv = None
        for csv in csv_files:
            if folder_name in csv:  # Exact match check
                matched_csv = os.path.join(CellTypeExcel_path, csv)
                break  # Stop searching after finding the first match
        
        # If no match found, skip this suite2p folder
        if not matched_csv:
            continue

        print(f"Processing: {folder_name} using {matched_csv}")

        # Paths for .npy files
        iscell_path = os.path.join(suite2pfolder, 'iscell.npy')

        # Ensure iscell.npy exists before proceeding
        if not os.path.exists(iscell_path):
            print(f"Skipping {folder_name}, iscell.npy not found.")
            continue

        # Load iscell.npy
        iscell = np.load(iscell_path, allow_pickle=True)
        iscell_index = np.where(iscell[:, 0] == 1)[0]  # Get indices of active cells

        # Read "Mecp2_Positive" column from CSV
        print(matched_csv)
        print("Exists?", os.path.exists(matched_csv))

        try:
            # Read entire CSV first to check columns
            df_csv = pd.read_csv(matched_csv)
            print("Available columns:", df_csv.columns.tolist())

            if "Mecp2_Positive" not in df_csv.columns or "Mecp2_Negative" not in df_csv.columns:
                print(f"Skipping {folder_name}: Required columns not found.")
                continue  # Skip this folder and go to next

            # Only now safe to extract columns
            iscell_Mecp2Positive_index = df_csv["Mecp2_Positive"].dropna().values.flatten().astype(int)
            iscell_Mecp2Negative_index = df_csv["Mecp2_Negative"].dropna().values.flatten().astype(int)
        except Exception as e:
                    print(f"Skipping {folder_name}: Error reading CSV: {e}")
                    continue  # Skip this folder safely

        # Record the counts and their minimum
        Mecp2Positive_count = len(iscell_Mecp2Positive_index)
        Mecp2Negative_count = len(iscell_Mecp2Negative_index)

        summary_data.append({
            "Folder": folder_name,
            "DIV": DIV,
            "Mecp2Positive": Mecp2Positive_count,
            "Mecp2Negative": Mecp2Negative_count
        })


        # Create Mecp2 Positive & Negative masks
        iscell_Mecp2Positive = iscell.copy()
        iscell_Mecp2Positive[:, 0] = 0
        iscell_Mecp2Positive[iscell_Mecp2Positive_index, 0] = 1

        iscell_Mecp2Negative = iscell.copy()
        iscell_Mecp2Negative[:, 0] = 0
        iscell_Mecp2Negative[iscell_Mecp2Negative_index, 0] = 1

        # Define output folders
        output_base = os.path.join(input_folder, 'HETMecp2SplitNPY')
        Mecp2_Positive_folder = os.path.join(output_base, folder_name + '_Mecp2Positive')
        Mecp2_Negative_folder = os.path.join(output_base, folder_name + '_Mecp2Negative')

        # Create and clean existing folders
        for output_folder in [Mecp2_Positive_folder, Mecp2_Negative_folder]:
            if os.path.exists(output_folder):
                shutil.rmtree(output_folder)
            os.makedirs(os.path.join(output_folder, 'suite2p', 'plane0'), exist_ok=True)

        # Save iscell.npy in respective folders
        np.save(os.path.join(Mecp2_Positive_folder, 'suite2p', 'plane0', "iscell.npy"), iscell_Mecp2Positive)
        np.save(os.path.join(Mecp2_Negative_folder, 'suite2p', 'plane0', "iscell.npy"), iscell_Mecp2Negative)

        # List of files to copy
        files_to_copy = ['stat.npy', 'F.npy', 'Fneu.npy', 'spks.npy', 'ops.npy']

        for output_folder in [Mecp2_Positive_folder, Mecp2_Negative_folder]:
            for file_name in files_to_copy:
                source_file = os.path.join(suite2pfolder, file_name)
                destination_file = os.path.join(output_folder, 'suite2p', 'plane0', file_name)
                if os.path.exists(source_file):  # Check if the file exists before copying
                    shutil.copy(source_file, destination_file)

        print(f"Created Mecp2 Positive & Negative folders for {folder_name}\n")

# Check which CSVs don't match any folder_name
unmatched_csvs = []
for csv in csv_files:
    matched = False
    for folder_name in folder_names_in_suite2p:
        if folder_name in csv:
            matched = True
            break
    if not matched:
        unmatched_csvs.append(csv)

# Print unmatched CSV files
if unmatched_csvs:
    print("\nCSV files that do NOT match any folder_name:")
    for csv in unmatched_csvs:
        print(f" - {csv}")
else:
    print("\nAll CSV files matched a folder_name.")

# Save the summary CSV
summary_df = pd.DataFrame(summary_data)
summary_csv_path = os.path.join(input_folder, 'HETMecp2SplitNPY', 'HETMecp2Split_CellCounts_Summary.csv')
summary_df.to_csv(summary_csv_path, index=False)
print(f"\nSaved summary CSV to {summary_csv_path}")


Processing: OPME230825_2_20230915_P1_pup4A_Het_MOI50000_DIV21 using C:\Users\sandbox\Downloads\splitNPY\CellType_Priority2\OPME230825_2_20230915_P1_pup4A_Het_MOI50000_DIV21.csv
C:\Users\sandbox\Downloads\splitNPY\CellType_Priority2\OPME230825_2_20230915_P1_pup4A_Het_MOI50000_DIV21.csv
Exists? True
Available columns: ['NeuN_Positive', 'NeuN_Negative', 'Comments on NeuN', 'Mecp2_Positive', 'Mecp2_Negative', 'Comments on Mecp2', 'PV_Positive', 'PV_Negative', 'Comments on PV', 'SST_Positive', 'SST_Negative', 'Comments on SST', 'GAD_Positive', 'GAD_Negative', 'Comments on GAD']
Created Mecp2 Positive & Negative folders for OPME230825_2_20230915_P1_pup4A_Het_MOI50000_DIV21

Processing: OPME230825_2_20230923_P1_pup4A_Het_MOI50000_DIV29 using C:\Users\sandbox\Downloads\splitNPY\CellType_Priority2\OPME230825_2_20230923_P1_pup4A_Het_MOI50000_DIV29.csv
C:\Users\sandbox\Downloads\splitNPY\CellType_Priority2\OPME230825_2_20230923_P1_pup4A_Het_MOI50000_DIV29.csv
Exists? True
Available columns: ['Neu

In [4]:
# Automatically generate a input CSV file with metadata for MEANAP from recording folders
import os
import re
import pandas as pd

# === CONFIG ===
input_path = r'C:\Users\sandbox\Downloads\splitNPY\HETMecp2SplitNPY'  # CHANGE THIS
output_csv_path = os.path.join(input_path, 'Metadata_full_Suite2p - All_NoTTX&8m_Good&Okay_Chapter5_HetMecp2Split.csv')

# === Helper functions ===

def extract_div(folder_name):
    match = re.search(r'DIV(\d+)', folder_name)
    if match:
        div = int(match.group(1))
        if div in [13, 14, 15, 16, 17, 18]:
            return 14
        elif div in [20, 21, 22, 23, 24]:
            return 21
        elif div in [27, 28, 29, 30]:
            return 28
        elif div in [33, 34, 35, 36, 37, 38]:
            return 35
        elif div in [41, 42, 43, 44]:
            return 42
    return None


def extract_genotype(folder_name):
    name = folder_name.lower()
    base = 'Unknown'

    if 'het' in name:
        base = 'Mecp2_Het'
    elif 'ko' in name:
        base = 'Mecp2_KO'
    elif 'wt' in name:
        base = 'Wildtype'

    if 'mecp2positive' in name:
        return f'{base}_Mecp2Positive'
    elif 'mecp2negative' in name:
        return f'{base}_Mecp2Negative'
    else:
        return base

# === Main script ===

data = []

for folder in os.listdir(input_path):
    full_path = os.path.join(input_path, folder)
    if os.path.isdir(full_path):
        div_group = extract_div(folder)
        genotype = extract_genotype(folder)
        if div_group is not None:
            data.append({
                'Recording Filename': folder,
                'DIV group': div_group,
                'Genotype': genotype
            })

# Convert to DataFrame and save
df = pd.DataFrame(data)
df = df.sort_values(by=['Recording Filename', 'DIV group', 'Genotype' ])
df.to_csv(output_csv_path, index=False)

print(f"CSV saved to: {output_csv_path}")


CSV saved to: C:\Users\sandbox\Downloads\splitNPY\HETMecp2SplitNPY\Metadata_full_Suite2p - All_NoTTX&8m_Good&Okay_Chapter5_HetMecp2Split.csv


In [None]:
print("Max index in iscell_Mecp2Negative_index:", max(iscell_Mecp2Negative_index))
print("Size of iscell_Mecp2Negative:", len(iscell_Mecp2Negative))

In [3]:
# Excel Version
# Define paths
inputfolder_path = r'Z:\Yin (yy433)\BatchSuite2p'
input_folder = r'C:\Users\sandbox\Downloads\splitNPY'
CellTypeExcel_path = r'C:\Users\sandbox\Downloads\splitNPY\CellType_Priority1'

# Get all CellType Excel files
excel_files = [f for f in os.listdir(CellTypeExcel_path) if f.endswith('.xlsx')]

# Traverse all subdirectories in the input folder
for root, dirs, files in os.walk(inputfolder_path):
    if 'suite2p' in root and 'plane0' in root:
        suite2pfolder = root  # The path to suite2p/plane0
        
        # Extract OPME folder name from the path
        folder_name = os.path.basename(os.path.dirname(os.path.dirname(suite2pfolder)))

        # Match folder_name with any Excel file
        matched_excel = None
        for excel in excel_files:
            if folder_name in excel:  # Exact match check
                matched_excel = os.path.join(CellTypeExcel_path, excel)
                break  # Stop searching after finding the first match
        
        # If no match found, skip this suite2p folder
        if not matched_excel:
            continue

        print(f"Processing: {folder_name} using {matched_excel}")

        # Paths for .npy files
        iscell_path = os.path.join(suite2pfolder, 'iscell.npy')

        # Ensure iscell.npy exists before proceeding
        if not os.path.exists(iscell_path):
            print(f"Skipping {folder_name}, iscell.npy not found.")
            continue

        # Load iscell.npy
        iscell = np.load(iscell_path, allow_pickle=True)
        iscell_index = np.where(iscell[:, 0] == 1)[0]  # Get indices of active cells

        # Read "Mecp2_Positive" column from Excel
        print(matched_excel)
        print("Exists?", os.path.exists(matched_excel))

        try:
            # Read entire Excel first to check columns
            df_excel = pd.read_excel(matched_excel, engine='openpyxl')
            print("Available columns:", df_excel.columns.tolist())

            if "Mecp2_Positive" not in df_excel.columns or "Mecp2_Negative" not in df_excel.columns:
                print(f"Skipping {folder_name}: Required columns not found.")
                continue  # Skip this folder and go to next

            # Only now safe to extract columns
            iscell_Mecp2Positive_index = df_excel["Mecp2_Positive"].dropna().values.flatten().astype(int)
            iscell_Mecp2Negative_index = df_excel["Mecp2_Negative"].dropna().values.flatten().astype(int)
        except Exception as e:
                    print(f"Skipping {folder_name}: Error reading Excel: {e}")
                    continue  # Skip this folder safely

        # iscell_Mecp2Positive_index = pd.read_excel(matched_excel, usecols=["Mecp2_Positive"])
        # iscell_Mecp2Positive_index = iscell_Mecp2Positive_index.dropna().values.flatten().astype(int)

        # iscell_Mecp2Negative_index = pd.read_excel(matched_excel, usecols=["Mecp2_Negative"])
        # iscell_Mecp2Negative_index = iscell_Mecp2Negative_index.dropna().values.flatten().astype(int)

        # Create Mecp2 Positive & Negative masks
        iscell_Mecp2Positive = iscell.copy()
        iscell_Mecp2Positive[:, 0] = 0
        iscell_Mecp2Positive[iscell_Mecp2Positive_index, 0] = 1

        iscell_Mecp2Negative = iscell.copy()
        iscell_Mecp2Negative[:, 0] = 0
        iscell_Mecp2Negative[iscell_Mecp2Negative_index, 0] = 1

        # Define output folders
        output_base = os.path.join(input_folder, 'HETMecp2SplitNPY')
        Mecp2_Positive_folder = os.path.join(output_base, folder_name + '_Mecp2Positive')
        Mecp2_Negative_folder = os.path.join(output_base, folder_name + '_Mecp2Negative')

        # Create and clean existing folders
        for output_folder in [Mecp2_Positive_folder, Mecp2_Negative_folder]:
            if os.path.exists(output_folder):
                shutil.rmtree(output_folder)
            os.makedirs(os.path.join(output_folder, 'suite2p', 'plane0'), exist_ok=True)

        # Save iscell.npy in respective folders
        np.save(os.path.join(Mecp2_Positive_folder, 'suite2p', 'plane0', "iscell.npy"), iscell_Mecp2Positive)
        np.save(os.path.join(Mecp2_Negative_folder, 'suite2p', 'plane0', "iscell.npy"), iscell_Mecp2Negative)

        # List of files to copy
        files_to_copy = ['stat.npy', 'F.npy', 'Fneu.npy', 'spks.npy', 'ops.npy']

        for output_folder in [Mecp2_Positive_folder, Mecp2_Negative_folder]:
            for file_name in files_to_copy:
                source_file = os.path.join(suite2pfolder, file_name)
                destination_file = os.path.join(output_folder, 'suite2p', 'plane0', file_name)
                if os.path.exists(source_file):  # Check if the file exists before copying
                    shutil.copy(source_file, destination_file)

        print(f"Created Mecp2 Positive & Negative folders for {folder_name}\n")


Processing: 20230528_1_OPME230514_P1_pup1A_Het_MOI50000_DIV14 using C:\Users\sandbox\Downloads\splitNPY\CellType_Priority1\20230528_1_OPME230514_P1_pup1A_Het_MOI50000_DIV14.xlsx
C:\Users\sandbox\Downloads\splitNPY\CellType_Priority1\20230528_1_OPME230514_P1_pup1A_Het_MOI50000_DIV14.xlsx
Exists? True
Available columns: ['NeuN_Positive', 'NeuN_Negative', 'Comments on NeuN', 'Mecp2_Positive', 'Mecp2_Negative', 'Comments on Mecp2', 'PV_Positive', 'PV_Negative', 'Comments on PV', 'SST_Positive', 'SST_Negative', 'Comments on SST', 'GAD_Positive', 'GAD_Negative', 'Comments on GAD']
Created Mecp2 Positive & Negative folders for 20230528_1_OPME230514_P1_pup1A_Het_MOI50000_DIV14

Processing: 20230618_1_OPME230514_P1_pup1A_Het_MOI50000_DIV35 using C:\Users\sandbox\Downloads\splitNPY\CellType_Priority1\20230618_1_OPME230514_P1_pup1A_Het_MOI50000_DIV35.xlsx
C:\Users\sandbox\Downloads\splitNPY\CellType_Priority1\20230618_1_OPME230514_P1_pup1A_Het_MOI50000_DIV35.xlsx
Exists? True
Available columns: [

  warn(msg)


Processing: OPME230825_12_20230908_P1_pup5D_Het_MOI50000_DIV14 using C:\Users\sandbox\Downloads\splitNPY\CellType_Priority1\OPME230825_12_20230908_P1_pup5D_Het_MOI50000_DIV14.xlsx
C:\Users\sandbox\Downloads\splitNPY\CellType_Priority1\OPME230825_12_20230908_P1_pup5D_Het_MOI50000_DIV14.xlsx
Exists? True
Skipping OPME230825_12_20230908_P1_pup5D_Het_MOI50000_DIV14: Error reading Excel: Worksheet index 0 is invalid, 0 worksheets found
Processing: OPME230825_12_20230915_P1_pup5D_Het_MOI50000_DIV21 using C:\Users\sandbox\Downloads\splitNPY\CellType_Priority1\OPME230825_12_20230915_P1_pup5D_Het_MOI50000_DIV21.xlsx
C:\Users\sandbox\Downloads\splitNPY\CellType_Priority1\OPME230825_12_20230915_P1_pup5D_Het_MOI50000_DIV21.xlsx
Exists? True
Skipping OPME230825_12_20230915_P1_pup5D_Het_MOI50000_DIV21: Error reading Excel: Worksheet index 0 is invalid, 0 worksheets found
Processing: OPME230825_12_20230923_P1_pup5D_Het_MOI50000_DIV29 using C:\Users\sandbox\Downloads\splitNPY\CellType_Priority1\OPME23

IndexError: index 220 is out of bounds for axis 0 with size 220

# NeuN Split

In [None]:
# Define paths
inputfolder_path = r'Z:\Yin (yy433)\BatchSuite2p'
input_folder = r'C:\Users\sandbox\Downloads\splitNPY'
CellTypeExcel_path = r'C:\Users\sandbox\Downloads\splitNPY\CellType_Priority1'

# Get all CellType Excel files
excel_files = [f for f in os.listdir(CellTypeExcel_path) if f.endswith('.xlsx')]

# Traverse all subdirectories in the input folder
for root, dirs, files in os.walk(inputfolder_path):
    if 'suite2p' in root and 'plane0' in root:
        suite2pfolder = root  # The path to suite2p/plane0
        
        # Extract OPME folder name from the path
        folder_name = os.path.basename(os.path.dirname(os.path.dirname(suite2pfolder)))

        # Match folder_name with any Excel file
        matched_excel = None
        for excel in excel_files:
            if folder_name in excel:  # Exact match check
                matched_excel = os.path.join(CellTypeExcel_path, excel)
                break  # Stop searching after finding the first match
        
        # If no match found, skip this suite2p folder
        if not matched_excel:
            continue

        print(f"Processing: {folder_name} using {matched_excel}")

        # Paths for .npy files
        iscell_path = os.path.join(suite2pfolder, 'iscell.npy')

        # Ensure iscell.npy exists before proceeding
        if not os.path.exists(iscell_path):
            print(f"Skipping {folder_name}, iscell.npy not found.")
            continue

        # Load iscell.npy
        iscell = np.load(iscell_path, allow_pickle=True)
        iscell_index = np.where(iscell[:, 0] == 1)[0]  # Get indices of active cells

        # Read "NeuN_Positive" column from Excel
        iscell_NeuNPositive_index = pd.read_excel(matched_excel, usecols=["NeuN_Positive"])
        iscell_NeuNPositive_index = iscell_NeuNPositive_index.dropna().values.flatten().astype(int)

        iscell_NeuNNegative_index = pd.read_excel(matched_excel, usecols=["NeuN_Negative"])
        iscell_NeuNNegative_index = iscell_NeuNNegative_index.dropna().values.flatten().astype(int)

        # Create NeuN Positive & Negative masks
        iscell_NeuNPositive = iscell.copy()
        iscell_NeuNPositive[:, 0] = 0
        iscell_NeuNPositive[iscell_NeuNPositive_index, 0] = 1

        iscell_NeuNNegative = iscell.copy()
        iscell_NeuNNegative[:, 0] = 0
        iscell_NeuNNegative[iscell_NeuNNegative_index, 0] = 1

        # Define output folders
        output_base = os.path.join(input_folder, 'NeuNSplitNPY')
        NeuN_Positive_folder = os.path.join(output_base, folder_name + '_NeuNPositive')
        NeuN_Negative_folder = os.path.join(output_base, folder_name + '_NeuNNegative')

        # Create and clean existing folders
        for output_folder in [NeuN_Positive_folder, NeuN_Negative_folder]:
            if os.path.exists(output_folder):
                shutil.rmtree(output_folder)
            os.makedirs(os.path.join(output_folder, 'suite2p', 'plane0'), exist_ok=True)

        # Save iscell.npy in respective folders
        np.save(os.path.join(NeuN_Positive_folder, 'suite2p', 'plane0', "iscell.npy"), iscell_NeuNPositive)
        np.save(os.path.join(NeuN_Negative_folder, 'suite2p', 'plane0', "iscell.npy"), iscell_NeuNNegative)

        # List of files to copy
        files_to_copy = ['stat.npy', 'F.npy', 'Fneu.npy', 'spks.npy', 'ops.npy']

        for output_folder in [NeuN_Positive_folder, NeuN_Negative_folder]:
            for file_name in files_to_copy:
                source_file = os.path.join(suite2pfolder, file_name)
                destination_file = os.path.join(output_folder, 'suite2p', 'plane0', file_name)
                if os.path.exists(source_file):  # Check if the file exists before copying
                    shutil.copy(source_file, destination_file)

        print(f"Created NeuN Positive & Negative folders for {folder_name}\n")


Processing: OPME231206_11_20231220_P1_pup2E_Het_MOI50000_DIV14 using C:\Users\sandbox\Downloads\splitNPY\CellType_OPME231206_11_20231220_P1_pup2E_Het_MOI50000_DIV14.xlsx
Created NeuN Positive & Negative folders for OPME231206_11_20231220_P1_pup2E_Het_MOI50000_DIV14

Processing: OPME231206_11_20231227_P1_pup2E_Het_MOI50000_DIV21 using C:\Users\sandbox\Downloads\splitNPY\CellType_OPME231206_11_20231227_P1_pup2E_Het_MOI50000_DIV21.xlsx
Created NeuN Positive & Negative folders for OPME231206_11_20231227_P1_pup2E_Het_MOI50000_DIV21

Processing: OPME231206_11_20240103_P1_pup2E_Het_MOI50000_DIV28 using C:\Users\sandbox\Downloads\splitNPY\CellType_OPME231206_11_20240103_P1_pup2E_Het_MOI50000_DIV28.xlsx
Created NeuN Positive & Negative folders for OPME231206_11_20240103_P1_pup2E_Het_MOI50000_DIV28

Processing: OPME231206_11_20240110_P1_pup2E_Het_MOI50000_DIV35 using C:\Users\sandbox\Downloads\splitNPY\CellType_OPME231206_11_20240110_P1_pup2E_Het_MOI50000_DIV35.xlsx
Created NeuN Positive & Negat

# PV Split

In [None]:
# Define paths
inputfolder_path = r'Z:\Yin (yy433)\BatchSuite2p'
input_folder = r'C:\Users\sandbox\Downloads\splitNPY'
CellTypeExcel_path = r'C:\Users\sandbox\Downloads\splitNPY\CellType_Priority1'

# Get all CellType Excel files
excel_files = [f for f in os.listdir(CellTypeExcel_path) if f.endswith('.xlsx')]

# Traverse all subdirectories in the input folder
for root, dirs, files in os.walk(inputfolder_path):
    if 'suite2p' in root and 'plane0' in root:
        suite2pfolder = root  # The path to suite2p/plane0
        
        # Extract OPME folder name from the path
        folder_name = os.path.basename(os.path.dirname(os.path.dirname(suite2pfolder)))

        # Match folder_name with any Excel file
        matched_excel = None
        for excel in excel_files:
            if folder_name in excel:  # Exact match check
                matched_excel = os.path.join(CellTypeExcel_path, excel)
                break  # Stop searching after finding the first match
        
        # If no match found, skip this suite2p folder
        if not matched_excel:
            continue

        print(f"Processing: {folder_name} using {matched_excel}")

        # Paths for .npy files
        iscell_path = os.path.join(suite2pfolder, 'iscell.npy')

        # Ensure iscell.npy exists before proceeding
        if not os.path.exists(iscell_path):
            print(f"Skipping {folder_name}, iscell.npy not found.")
            continue

        # Load iscell.npy
        iscell = np.load(iscell_path, allow_pickle=True)
        iscell_index = np.where(iscell[:, 0] == 1)[0]  # Get indices of active cells

        # Read "PV_Positive" column from Excel
        iscell_PVPositive_index = pd.read_excel(matched_excel, usecols=["PV_Positive"])
        iscell_PVPositive_index = iscell_PVPositive_index.dropna().values.flatten().astype(int)

        iscell_PVNegative_index = pd.read_excel(matched_excel, usecols=["PV_Negative"])
        iscell_PVNegative_index = iscell_PVNegative_index.dropna().values.flatten().astype(int)

        # Create PV Positive & Negative masks
        iscell_PVPositive = iscell.copy()
        iscell_PVPositive[:, 0] = 0
        iscell_PVPositive[iscell_PVPositive_index, 0] = 1

        iscell_PVNegative = iscell.copy()
        iscell_PVNegative[:, 0] = 0
        iscell_PVNegative[iscell_PVNegative_index, 0] = 1

        # Define output folders
        output_base = os.path.join(input_folder, 'PVSplitNPY')
        PV_Positive_folder = os.path.join(output_base, folder_name + '_PVPositive')
        PV_Negative_folder = os.path.join(output_base, folder_name + '_PVNegative')

        # Create and clean existing folders
        for output_folder in [PV_Positive_folder, PV_Negative_folder]:
            if os.path.exists(output_folder):
                shutil.rmtree(output_folder)
            os.makedirs(os.path.join(output_folder, 'suite2p', 'plane0'), exist_ok=True)

        # Save iscell.npy in respective folders
        np.save(os.path.join(PV_Positive_folder, 'suite2p', 'plane0', "iscell.npy"), iscell_PVPositive)
        np.save(os.path.join(PV_Negative_folder, 'suite2p', 'plane0', "iscell.npy"), iscell_PVNegative)

        # List of files to copy
        files_to_copy = ['stat.npy', 'F.npy', 'Fneu.npy', 'spks.npy', 'ops.npy']

        for output_folder in [PV_Positive_folder, PV_Negative_folder]:
            for file_name in files_to_copy:
                source_file = os.path.join(suite2pfolder, file_name)
                destination_file = os.path.join(output_folder, 'suite2p', 'plane0', file_name)
                if os.path.exists(source_file):  # Check if the file exists before copying
                    shutil.copy(source_file, destination_file)

        print(f"Created PV Positive & Negative folders for {folder_name}\n")


Processing: OPME231206_11_20231220_P1_pup2E_Het_MOI50000_DIV14 using C:\Users\sandbox\Downloads\splitNPY\CellType_OPME231206_11_20231220_P1_pup2E_Het_MOI50000_DIV14.xlsx
Created PV Positive & Negative folders for OPME231206_11_20231220_P1_pup2E_Het_MOI50000_DIV14

Processing: OPME231206_11_20231227_P1_pup2E_Het_MOI50000_DIV21 using C:\Users\sandbox\Downloads\splitNPY\CellType_OPME231206_11_20231227_P1_pup2E_Het_MOI50000_DIV21.xlsx
Created PV Positive & Negative folders for OPME231206_11_20231227_P1_pup2E_Het_MOI50000_DIV21

Processing: OPME231206_11_20240103_P1_pup2E_Het_MOI50000_DIV28 using C:\Users\sandbox\Downloads\splitNPY\CellType_OPME231206_11_20240103_P1_pup2E_Het_MOI50000_DIV28.xlsx
Created PV Positive & Negative folders for OPME231206_11_20240103_P1_pup2E_Het_MOI50000_DIV28

Processing: OPME231206_11_20240110_P1_pup2E_Het_MOI50000_DIV35 using C:\Users\sandbox\Downloads\splitNPY\CellType_OPME231206_11_20240110_P1_pup2E_Het_MOI50000_DIV35.xlsx
Created PV Positive & Negative fold

# GAD Split

In [None]:
# Define paths
inputfolder_path = r'Z:\Yin (yy433)\BatchSuite2p'
input_folder = r'C:\Users\sandbox\Downloads\splitNPY'
CellTypeExcel_path = r'C:\Users\sandbox\Downloads\splitNPY\CellType_Priority1'

# Get all CellType Excel files
excel_files = [f for f in os.listdir(CellTypeExcel_path) if f.endswith('.xlsx')]

# Traverse all subdirectories in the input folder
for root, dirs, files in os.walk(inputfolder_path):
    if 'suite2p' in root and 'plane0' in root:
        suite2pfolder = root  # The path to suite2p/plane0
        
        # Extract OPME folder name from the path
        folder_name = os.path.basename(os.path.dirname(os.path.dirname(suite2pfolder)))

        # Match folder_name with any Excel file
        matched_excel = None
        for excel in excel_files:
            if folder_name in excel:  # Exact match check
                matched_excel = os.path.join(CellTypeExcel_path, excel)
                break  # Stop searching after finding the first match
        
        # If no match found, skip this suite2p folder
        if not matched_excel:
            continue

        print(f"Processing: {folder_name} using {matched_excel}")

        # Paths for .npy files
        iscell_path = os.path.join(suite2pfolder, 'iscell.npy')

        # Ensure iscell.npy exists before proceeding
        if not os.path.exists(iscell_path):
            print(f"Skipping {folder_name}, iscell.npy not found.")
            continue

        # Load iscell.npy
        iscell = np.load(iscell_path, allow_pickle=True)
        iscell_index = np.where(iscell[:, 0] == 1)[0]  # Get indices of active cells

        # Read "GAD_Positive" column from Excel
        iscell_GADPositive_index = pd.read_excel(matched_excel, usecols=["GAD_Positive"])
        iscell_GADPositive_index = iscell_GADPositive_index.dropna().values.flatten().astype(int)

        iscell_GADNegative_index = pd.read_excel(matched_excel, usecols=["GAD_Negative"])
        iscell_GADNegative_index = iscell_GADNegative_index.dropna().values.flatten().astype(int)

        # Create GAD Positive & Negative masks
        iscell_GADPositive = iscell.copy()
        iscell_GADPositive[:, 0] = 0
        iscell_GADPositive[iscell_GADPositive_index, 0] = 1

        iscell_GADNegative = iscell.copy()
        iscell_GADNegative[:, 0] = 0
        iscell_GADNegative[iscell_GADNegative_index, 0] = 1

        # Define output folders
        output_base = os.path.join(input_folder, 'GADSplitNPY')
        GAD_Positive_folder = os.path.join(output_base, folder_name + '_GADPositive')
        GAD_Negative_folder = os.path.join(output_base, folder_name + '_GADNegative')

        # Create and clean existing folders
        for output_folder in [GAD_Positive_folder, GAD_Negative_folder]:
            if os.path.exists(output_folder):
                shutil.rmtree(output_folder)
            os.makedirs(os.path.join(output_folder, 'suite2p', 'plane0'), exist_ok=True)

        # Save iscell.npy in respective folders
        np.save(os.path.join(GAD_Positive_folder, 'suite2p', 'plane0', "iscell.npy"), iscell_GADPositive)
        np.save(os.path.join(GAD_Negative_folder, 'suite2p', 'plane0', "iscell.npy"), iscell_GADNegative)

        # List of files to copy
        files_to_copy = ['stat.npy', 'F.npy', 'Fneu.npy', 'spks.npy', 'ops.npy']

        for output_folder in [GAD_Positive_folder, GAD_Negative_folder]:
            for file_name in files_to_copy:
                source_file = os.path.join(suite2pfolder, file_name)
                destination_file = os.path.join(output_folder, 'suite2p', 'plane0', file_name)
                if os.path.exists(source_file):  # Check if the file exists before copying
                    shutil.copy(source_file, destination_file)

        print(f"Created GAD Positive & Negative folders for {folder_name}\n")


Processing: OPME231206_11_20231220_P1_pup2E_Het_MOI50000_DIV14 using C:\Users\sandbox\Downloads\splitNPY\CellType_OPME231206_11_20231220_P1_pup2E_Het_MOI50000_DIV14.xlsx
Created GAD Positive & Negative folders for OPME231206_11_20231220_P1_pup2E_Het_MOI50000_DIV14

Processing: OPME231206_11_20231227_P1_pup2E_Het_MOI50000_DIV21 using C:\Users\sandbox\Downloads\splitNPY\CellType_OPME231206_11_20231227_P1_pup2E_Het_MOI50000_DIV21.xlsx
Created GAD Positive & Negative folders for OPME231206_11_20231227_P1_pup2E_Het_MOI50000_DIV21

Processing: OPME231206_11_20240103_P1_pup2E_Het_MOI50000_DIV28 using C:\Users\sandbox\Downloads\splitNPY\CellType_OPME231206_11_20240103_P1_pup2E_Het_MOI50000_DIV28.xlsx
Created GAD Positive & Negative folders for OPME231206_11_20240103_P1_pup2E_Het_MOI50000_DIV28

Processing: OPME231206_11_20240110_P1_pup2E_Het_MOI50000_DIV35 using C:\Users\sandbox\Downloads\splitNPY\CellType_OPME231206_11_20240110_P1_pup2E_Het_MOI50000_DIV35.xlsx
Created GAD Positive & Negative 

# SST Split

In [None]:
# Define paths
inputfolder_path = r'Z:\Yin (yy433)\BatchSuite2p'
input_folder = r'C:\Users\sandbox\Downloads\splitNPY'
CellTypeExcel_path = r'C:\Users\sandbox\Downloads\splitNPY\CellType_Priority1'

# Get all CellType Excel files
excel_files = [f for f in os.listdir(CellTypeExcel_path) if f.endswith('.xlsx')]

# Traverse all subdirectories in the input folder
for root, dirs, files in os.walk(inputfolder_path):
    if 'suite2p' in root and 'plane0' in root:
        suite2pfolder = root  # The path to suite2p/plane0
        
        # Extract OPME folder name from the path
        folder_name = os.path.basename(os.path.dirname(os.path.dirname(suite2pfolder)))

        # Match folder_name with any Excel file
        matched_excel = None
        for excel in excel_files:
            if folder_name in excel:  # Exact match check
                matched_excel = os.path.join(CellTypeExcel_path, excel)
                break  # Stop searching after finding the first match
        
        # If no match found, skip this suite2p folder
        if not matched_excel:
            continue

        print(f"Processing: {folder_name} using {matched_excel}")

        # Paths for .npy files
        iscell_path = os.path.join(suite2pfolder, 'iscell.npy')

        # Ensure iscell.npy exists before proceeding
        if not os.path.exists(iscell_path):
            print(f"Skipping {folder_name}, iscell.npy not found.")
            continue

        # Load iscell.npy
        iscell = np.load(iscell_path, allow_pickle=True)
        iscell_index = np.where(iscell[:, 0] == 1)[0]  # Get indices of active cells

        # Read "SST_Positive" column from Excel
        iscell_SSTPositive_index = pd.read_excel(matched_excel, usecols=["SST_Positive"])
        iscell_SSTPositive_index = iscell_SSTPositive_index.dropna().values.flatten().astype(int)

        iscell_SSTNegative_index = pd.read_excel(matched_excel, usecols=["SST_Negative"])
        iscell_SSTNegative_index = iscell_SSTNegative_index.dropna().values.flatten().astype(int)

        # Create SST Positive & Negative masks
        iscell_SSTPositive = iscell.copy()
        iscell_SSTPositive[:, 0] = 0
        iscell_SSTPositive[iscell_SSTPositive_index, 0] = 1

        iscell_SSTNegative = iscell.copy()
        iscell_SSTNegative[:, 0] = 0
        iscell_SSTNegative[iscell_SSTNegative_index, 0] = 1

        # Define output folders
        output_base = os.path.join(input_folder, 'SSTSplitNPY')
        SST_Positive_folder = os.path.join(output_base, folder_name + '_SSTPositive')
        SST_Negative_folder = os.path.join(output_base, folder_name + '_SSTNegative')

        # Create and clean existing folders
        for output_folder in [SST_Positive_folder, SST_Negative_folder]:
            if os.path.exists(output_folder):
                shutil.rmtree(output_folder)
            os.makedirs(os.path.join(output_folder, 'suite2p', 'plane0'), exist_ok=True)

        # Save iscell.npy in respective folders
        np.save(os.path.join(SST_Positive_folder, 'suite2p', 'plane0', "iscell.npy"), iscell_SSTPositive)
        np.save(os.path.join(SST_Negative_folder, 'suite2p', 'plane0', "iscell.npy"), iscell_SSTNegative)

        # List of files to copy
        files_to_copy = ['stat.npy', 'F.npy', 'Fneu.npy', 'spks.npy', 'ops.npy']

        for output_folder in [SST_Positive_folder, SST_Negative_folder]:
            for file_name in files_to_copy:
                source_file = os.path.join(suite2pfolder, file_name)
                destination_file = os.path.join(output_folder, 'suite2p', 'plane0', file_name)
                if os.path.exists(source_file):  # Check if the file exists before copying
                    shutil.copy(source_file, destination_file)

        print(f"Created SST Positive & Negative folders for {folder_name}\n")


Processing: OPME231206_11_20231220_P1_pup2E_Het_MOI50000_DIV14 using C:\Users\sandbox\Downloads\splitNPY\CellType_OPME231206_11_20231220_P1_pup2E_Het_MOI50000_DIV14.xlsx
Created SST Positive & Negative folders for OPME231206_11_20231220_P1_pup2E_Het_MOI50000_DIV14

Processing: OPME231206_11_20231227_P1_pup2E_Het_MOI50000_DIV21 using C:\Users\sandbox\Downloads\splitNPY\CellType_OPME231206_11_20231227_P1_pup2E_Het_MOI50000_DIV21.xlsx
Created SST Positive & Negative folders for OPME231206_11_20231227_P1_pup2E_Het_MOI50000_DIV21

Processing: OPME231206_11_20240103_P1_pup2E_Het_MOI50000_DIV28 using C:\Users\sandbox\Downloads\splitNPY\CellType_OPME231206_11_20240103_P1_pup2E_Het_MOI50000_DIV28.xlsx
Created SST Positive & Negative folders for OPME231206_11_20240103_P1_pup2E_Het_MOI50000_DIV28

Processing: OPME231206_11_20240110_P1_pup2E_Het_MOI50000_DIV35 using C:\Users\sandbox\Downloads\splitNPY\CellType_OPME231206_11_20240110_P1_pup2E_Het_MOI50000_DIV35.xlsx
Created SST Positive & Negative 

# Progression codes:

In [None]:
import pandas as pd
import numpy as np
import shutil
import os

In [None]:
iscell = np.load(r"Z:\Yin (yy433)\BatchSuite2p\OPME230825_1_20230915_P1_pup4A_Het_MOI50000_DIV21\suite2p\plane0\iscell.npy")
F = np.load(r"Z:\Yin (yy433)\BatchSuite2p\OPME230825_1_20230915_P1_pup4A_Het_MOI50000_DIV21\suite2p\plane0\F.npy")
stat = np.load(r"Z:\Yin (yy433)\BatchSuite2p\OPME230825_1_20230915_P1_pup4A_Het_MOI50000_DIV21\suite2p\plane0\stat.npy", allow_pickle=True)
ops = np.load(r"Z:\Yin (yy433)\BatchSuite2p\OPME230825_1_20230915_P1_pup4A_Het_MOI50000_DIV21\suite2p\plane0\ops.npy", allow_pickle=True)
# iscell = np.load(r"Z:\Yin (yy433)\BatchSuite2p\OPME230825_1_20230915_P1_pup4A_Het_MOI50000_DIV21\suite2p\plane0\Fneu.npy")
# iscell = np.load(r"Z:\Yin (yy433)\BatchSuite2p\OPME230825_1_20230915_P1_pup4A_Het_MOI50000_DIV21\suite2p\plane0\spks.npy")


In [None]:
iscell_index = np.where(iscell[:, 0] == 1)[0]

In [None]:
# Read only the 'Mecp2_Positive' column from the Excel file
iscell_Mecp2Positive_index = pd.read_excel(
    r"D:\Cambridge University Dropbox\Susanna B. Mierau\Suite2p&IF_Image_Analysis\PutativeCellType_OPME230825_1_20230915_P1_pup4A_Het_MOI50000_DIV21_PositiveOnly.xlsx",
    usecols=["Mecp2_Positive"]
)

# Drop missing values (NaN) from the column
iscell_Mecp2Positive_index = iscell_Mecp2Positive_index.dropna()

# Convert to numpy.ndarray and flatten to 1D
iscell_Mecp2Positive_index = iscell_Mecp2Positive_index.values.flatten().astype(int)

# Filter rows in iscell_index that are NOT in iscell_Mecp2Positive_index
iscell_Mecp2Negative_index = iscell_index[~np.isin(iscell_index, iscell_Mecp2Positive_index)]

print(iscell_Mecp2Positive_index)
print(iscell_Mecp2Negative_index)

In [None]:
# Save rows of iscell where the indices are in iscell_Mecp2Positive_index, the first column is 1, and the first column is 0 otherwise
iscell_Mecp2Positive = iscell

# Update the first column of iscell
iscell_Mecp2Positive[:, 0] = 0  # Set all rows in the first column to 0
iscell_Mecp2Positive[iscell_Mecp2Positive_index, 0] = 1  # Set the first column to 1 for rows in valid_indices

# Save the result to a file
np.save("iscell.npy", iscell_Mecp2Positive)

In [None]:
# Define the main input folder
inputfolder_path = r'C:\Users\sandbox\Downloads\BatchSuite2pTest'
CellTypeExcel_path = r'C:\Users\sandbox\Downloads\splitNPY'

# Traverse all subdirectories in the input folder
for root, dirs, files in os.walk(inputfolder_path):
    # Look for the specific 'suite2p/plane0' structure in each subdirectory
    if 'suite2p' in root and 'plane0' in root:
        suite2pfolder = root

        # Paths for stat.npy and iscell.npy
        iscell_path = os.path.join(suite2pfolder, 'iscell.npy')
        # stat_path = os.path.join(suite2pfolder, 'stat.npy')
        # F_path = os.path.join(suite2pfolder, 'F.npy')
        # Fneu_path = os.path.join(suite2pfolder, 'Fneu.npy')
        # spks_path = os.path.join(suite2pfolder, 'spks.npy')
        # ops_path = os.path.join(suite2pfolder, 'ops.npy')

        # Check if the required files exist
        # if os.path.exists(stat_path) and os.path.exists(iscell_path):
        if os.path.exists(iscell_path):
    
            # Load the ops dictionary & .npy files
            iscell = np.load(iscell_path, allow_pickle=True)
            iscell_index = np.where(iscell[:, 0] == 1)[0]





            # Mecp2 Positive
            # Read only the 'Mecp2_Positive' column from the Excel file
            iscell_Mecp2Positive_index = pd.read_excel(
                r"C:\Users\sandbox\Downloads\splitNPY\CellType_OPME231206_11_20231220_P1_pup2E_Het_MOI50000_DIV14.xlsx",
                usecols=["Mecp2_Positive"]
            )

            # Drop missing values (NaN) from the column
            iscell_Mecp2Positive_index = iscell_Mecp2Positive_index.dropna()
            # Convert to numpy.ndarray and flatten to 1D
            iscell_Mecp2Positive_index = iscell_Mecp2Positive_index.values.flatten().astype(int)

            # Save rows of iscell where the indices are in iscell_Mecp2Positive_index, the first column is 1, and the first column is 0 otherwise
            iscell_Mecp2Positive = iscell

            # Update the first column of iscell
            iscell_Mecp2Positive[:, 0] = 0  # Set all rows in the first column to 0
            iscell_Mecp2Positive[iscell_Mecp2Positive_index, 0] = 1  # Set the first column to 1 for rows in valid_indices

            # Create output folder for Mecp2Positive
            # Extract the folder name from the input folder path
            folder_name = os.path.basename(os.path.dirname(os.path.dirname(suite2pfolder)))
            # Create a new folder name by appending '_Mecp2Positive'
            Mecp2Positive_folder_name = folder_name + '_Mecp2Positive'
            # Define the path for the new folder
            Mecp2Positive_folder_path = os.path.join(inputfolder_path, 'HETSplitNPY', Mecp2Positive_folder_name)
            # Check if the folder exists and remove it if it does
            if os.path.exists(Mecp2Positive_folder_path):
                shutil.rmtree(Mecp2Positive_folder_path)
            # Create the new folder
            os.makedirs(Mecp2Positive_folder_path, exist_ok=True)

            # Replicate the subfolder structure (suite2p/plane0) in the new folder
            Mecp2Positive_suite2p_path = os.path.join(Mecp2Positive_folder_path, 'suite2p', 'plane0')
            os.makedirs(Mecp2Positive_suite2p_path, exist_ok=True)

            # Save the result to the Mecp2Positive subfolder
            np.save(os.path.join(Mecp2Positive_suite2p_path, "iscell.npy"), iscell_Mecp2Positive)


            # List of files to copy
            files_to_copy = ['stat.npy', 'F.npy', 'Fneu.npy', 'spks.npy', 'ops.npy']

            # Copy each file from suite2pfolder to Mecp2Positive_suite2p_path
            for file_name in files_to_copy:
                source_file = os.path.join(suite2pfolder, file_name)
                destination_file = os.path.join(Mecp2Positive_suite2p_path, file_name)
                if os.path.exists(source_file):  # Check if the file exists before copying
                    shutil.copy(source_file, destination_file)





            # Mecp2 Negative
            # Filter rows in iscell_index that are NOT in iscell_Mecp2Positive_index
            iscell_Mecp2Negative_index = iscell_index[~np.isin(iscell_index, iscell_Mecp2Positive_index)]

            # Save rows of iscell where the indices are in iscell_Mecp2Positive_index, the first column is 1, and the first column is 0 otherwise
            iscell_Mecp2Negative = iscell

            # Update the first column of iscell
            iscell_Mecp2Negative[:, 0] = 0  # Set all rows in the first column to 0
            iscell_Mecp2Negative[iscell_Mecp2Negative_index, 0] = 1  # Set the first column to 1 for rows in valid_indices

            # Create output folder for Mecp2Negative
            # Create a new folder name by appending '_Mecp2Negative'
            Mecp2Negative_folder_name = folder_name + '_Mecp2Negative'
            # Define the path for the new folder
            Mecp2Negative_folder_path = os.path.join(inputfolder_path, 'HETSplitNPY', Mecp2Negative_folder_name)
            # Check if the folder exists and remove it if it does
            if os.path.exists(Mecp2Negative_folder_path):
                shutil.rmtree(Mecp2Negative_folder_path)
            # Create the new folder
            os.makedirs(Mecp2Negative_folder_path, exist_ok=True)

            # Replicate the subfolder structure (suite2p/plane0) in the new folder
            Mecp2Negative_suite2p_path = os.path.join(Mecp2Negative_folder_path, 'suite2p', 'plane0')
            os.makedirs(Mecp2Negative_suite2p_path, exist_ok=True)

            # Save the result to the Mecp2Negative subfolder
            np.save(os.path.join(Mecp2Negative_suite2p_path, "iscell.npy"), iscell_Mecp2Negative)

            # List of files to copy
            files_to_copy = ['stat.npy', 'F.npy', 'Fneu.npy', 'spks.npy', 'ops.npy']

            # Copy each file from suite2pfolder to Mecp2Negative_suite2p_path
            for file_name in files_to_copy:
                source_file = os.path.join(suite2pfolder, file_name)
                destination_file = os.path.join(Mecp2Negative_suite2p_path, file_name)
                if os.path.exists(source_file):  # Check if the file exists before copying
                    shutil.copy(source_file, destination_file)