## Select folders for analysis

In [1]:
# Initial folder for selection
initial_folder = 'D:/imaging/jm/'

from select_multiple import prompt_user_to_select_folder
from select_actual_folder import select_and_update_paths

#selected_folders = None
#selected_folders = prompt_user_to_select_folder(initial_folder)
#If you prefer select paths one by one, replace the line above by the next four lines
#selected_folders = None
from select_one_by_one import prompt_user_to_select_one_by_one
selected_folders = prompt_user_to_select_one_by_one(initial_folder)

data paths:
D:/imaging/jm/jm038


In [2]:
updated_paths = select_and_update_paths(selected_folders)

Dossiers uniques trouvés dans les derniers dossiers indiqués :
1. 2024-05-01
2. 2024-05-02
3. 2024-05-04
4. 2024-05-03
5. 2024-05-06
6. 2024-05-05
7. 2024-04-30


Sélectionnez le numéro du dossier que vous souhaitez ajouter :  1


Dossier sélectionné : 2024-05-01
Chemins mis à jour :
D:/imaging/jm\jm038\2024-05-01


In [14]:
import os

# Initialisation des listes pour stocker les résultats
stat_paths = []
iscell_paths = []
F_paths = []
error_paths = []

for k, path in enumerate(updated_paths):
    try:
        # Vérifie si les fichiers stat.npy et iscell.npy existent dans le chemin fourni
        stat_file = os.path.join(path, 'stat.npy')
        iscell_file = os.path.join(path, 'iscell.npy')
        F_file = os.path.join(path, 'F.npy')

        if os.path.exists(stat_file) and os.path.exists(iscell_file):
            # Si les deux fichiers existent, on les ajoute aux listes correspondantes
            stat_paths.append(stat_file)
            F_paths.append(F_file) 
            iscell_paths.append(iscell_file)
        else:
            # Si l'un des fichiers manque, ajoutez le chemin aux erreurs
            print(f"Error: Missing file(s) in '{path}'.")
            error_paths.append(path)

    except Exception as e:
        print(f"An error occurred while processing {path}: {e}")
        error_paths.append(path)

# Résultats : affichage des fichiers trouvés ou manquants
print("Stat Paths:")
for path in stat_paths:
    print(path)

print("\nIscell Paths:")
for path in iscell_paths:
    print(path)

print("\nError Paths:")
for path in error_paths:
    print(path)


Stat Paths:
D:/imaging/jm\jm038\2024-05-01\stat.npy

Iscell Paths:
D:/imaging/jm\jm038\2024-05-01\iscell.npy

Error Paths:


In [15]:
import numpy as np

for k, path in enumerate(updated_paths):
    stat = np.load(stat_paths[k], allow_pickle=True)
    F = np.load(F_paths[k], allow_pickle=True)
    iscell = np.load(iscell_paths[k], allow_pickle=True)
    ncells = np.sum(iscell[:, 0] == 1)

In [16]:
DF = F[iscell[:, 1] > 0, :].astype(float)

## Do some analysis on data

In [None]:
# Initialize lists to store results
ncell_list = []
num_sces_list = []
sce_n_cells_threshold_list = []
sce_frequencies = []
avg_active_cell_list = []
ratio_list = []
avg_frequency_of_activity_list = []
avg_isis_list = []

num_clusters_list = []
all_cells_per_cluster = []
avg_active_cells_not_in_SCEs_list = []

cell_sizes_mean_list = []
cell_sizes_std_list = []
avg_spike_frequency_list = []

# Main loop over valid paths
for k, path in enumerate(updated_paths):

    try: 
        # Load SCE and clustering data
        try:
            data_SCEs = scipy.io.loadmat(os.path.join(path, 'results.mat'))
            data_clustering = scipy.io.loadmat(os.path.join(path, 'results_clustering.mat'))
        except Exception as e:
            raise Exception(f"Error loading .mat files: {str(e)}")

        # Number of cells
        try:
            F = data_SCEs['F']
            NCell, Nz = F.shape
            ncell_list.append(NCell)
        except Exception as e:
            raise Exception(f"Error processing F matrix: {str(e)}")

        # Threshold for SCEs detection
        try:
            sce_n_cells_threshold = data_SCEs['sce_n_cells_threshold'][0][0]
            sce_n_cells_threshold_list.append(sce_n_cells_threshold)
        except Exception as e:
            raise Exception(f"Error processing sce_n_cells_threshold: {str(e)}")
            
        # Number of SCEs
        try:
            TRace = data_SCEs['TRace']
            TRace = TRace.flatten()  # Flatten to 1D if necessary
            num_sces = len(TRace)
            num_sces_list.append(num_sces)
        except Exception as e:
            raise Exception(f"Error processing TRace and SCE metrics: {str(e)}")

        # SCE frequency in minutes
        try:
            nb_seconds = Nz / sampling_rate
            sce_frequency_seconds = num_sces / nb_seconds
            sce_frequency_minutes = sce_frequency_seconds * 60
            sce_frequencies.append(sce_frequency_minutes)
        except Exception as e:
            raise Exception(f"Error calculating SCE frequency: {str(e)}")
        
        # Inter-SCEs time in minutes
        try:
            if len(TRace) > 1:  # Ensure there's more than one event to calculate intervals
                isis = np.diff(TRace)
                isis_minutes = (isis / nb_seconds) * 60
                mean_isis = np.mean(isis_minutes)
                avg_isis_list.append(mean_isis)
            else:
                avg_isis_list.append(None)
        except Exception as e:
            raise Exception(f"Error calculating Interspike Intervals (ISIs): {str(e)}")
        
        # Average number of active cells in SCEs
        try:
            Race = data_SCEs['Race']
            if Race.shape[1] >= num_sces:  # Ensure correct indexing
                avg_active_cell_SCEs = np.mean([Race[:, i].sum() for i in range(num_sces)])
                avg_active_cell_list.append(avg_active_cell_SCEs)
            else:
                avg_active_cell_list.append(None)
        except Exception as e:
            raise Exception(f"Error calculating average number of active cells: {str(e)}")

        # Frequency of activity
        try:
            Raster = data_SCEs['Raster']
            _, num_columns = Raster.shape
            avg_activity = np.mean([Raster[:, i].sum() for i in range(num_columns)])
            avg_frequency_of_activity_seconds = avg_activity / nb_seconds
            avg_frequency_of_activity_list.append(avg_frequency_of_activity_seconds)

        except Exception as e:
            raise Exception(f"Error calculating frequency of activity: {str(e)}")

        # Ratio of active cells in SCEs/outside SCEs
        try:
            # Créez une séquence d'indices pour toutes les colonnes de Raster
            inds = np.arange(num_columns)
            indices_not_SCEs = np.array([i for i in inds if i not in TRace])
        
            if len(indices_not_in_SCEs) > 0:  # Ensure there are valid indices to compare
                avg_active_cells_not_in_SCEs = np.mean([Raster[:, i].sum() for i in indices_not_SCEs])
                ratio = avg_active_cell_SCEs / avg_active_cells_not_in_SCEs
            else:
                ratio = np.nan
                
            ratio_list.append(ratio)
            
        except Exception as e:
            raise Exception(f"Error calculating ratio of active cells in SCEs/outside SCEs: {str(e)}")

        # Cell sizes calculation
        try:
            largeur_champ_micro = 750
            resolution_pixels = 512
            taille_pixel_micro = largeur_champ_micro / resolution_pixels
            taille_pixel_micro_carré = taille_pixel_micro ** 2
            cell_sizes_list = []
            
            stat = np.load(stat_paths[k], allow_pickle=True)
            iscell = np.load(iscell_paths[k], allow_pickle=True)
            ncells = np.sum(iscell[:, 0] == 1)
            for n in range(ncells):
                cellsize = stat[n]['npix']
                cell_size_microns_squared = cellsize * taille_pixel_micro_carré
                cell_sizes_list.append(cell_size_microns_squared)
                
            if cell_sizes_list:
                cell_sizes_array = np.array(cell_sizes_list)
                mean_cell_size = np.mean(cell_sizes_array)
                std_cell_size = np.std(cell_sizes_array)
                cell_sizes_mean_list.append(mean_cell_size)
                cell_sizes_std_list.append(std_cell_size)
            else:
                cell_sizes_mean_list.append(None)
                cell_sizes_std_list.append(None)
        except Exception as e:
            raise Exception(f"Error calculating cell sizes: {str(e)}")
            

        # Clusters number and active cells per clusters
        try:
            NClOK = data_clustering['NClOK'].item()  # Assuming it's a single value
            if NClOK >= 1:
                clusterMatrix = data_clustering['clusterMatrix']
                num_clusters = np.unique(clusterMatrix[:, 1]).size
                num_clusters_list.append(num_clusters)
                
                cells_per_cluster = [(cluster_id, np.sum(clusterMatrix[:, 1] == cluster_id)) for cluster_id in np.unique(clusterMatrix[:, 1])]
                all_cells_per_cluster.append(cells_per_cluster)
            else:
                num_clusters_list.append(0)
                all_cells_per_cluster.append(None)
        except Exception as e:
            raise Exception(f"Error processing clustering information: {str(e)}")

    
    except FileNotFoundError as e:
        print(f"File not found: {e}")
        # Append None for each list if file is not found
        num_sces_list.append(None)
        sce_n_cells_threshold_list.append(None)
        sce_frequencies.append(None)
        avg_active_cell_list.append(None)
        ratio_list.append(None)
        avg_frequency_of_activity_list.append(None)
        avg_isis_list.append(None)
        num_clusters_list.append(None)
        all_cells_per_cluster.append(None)
        cell_sizes_mean_list.append(None)
        cell_sizes_std_list.append(None)
        continue

    except Exception as e:
        print(f"Unexpected error in path {path}: {e}")
        continue

In [None]:
# Initialiser une liste pour stocker les deux dernières parties des chemins
last_two_parts = []

for path in updated_paths:
    # Remplacer tous les séparateurs par '/'
    path = path.replace('\\', '/')
    
    # Séparer le chemin en parties
    path_parts = path.split('/')
    
    # Conserver les deux dernières parties
    if len(path_parts) >= 2:
        last_two = '/'.join(path_parts[-2:])
    else:
        last_two = path  # Si le chemin est trop court, conserver le chemin entier
    
    # Ajouter les deux dernières parties à la liste
    last_two_parts.append(last_two)

In [None]:
import pandas as pd

# Initialize the final DataFrame
df_final = pd.DataFrame()

# List to collect rows
rows = []

# Loop over each folder to populate the DataFrame
for i in range(len(updated_paths)):
    folder = last_two_parts[i]
    num_cells = ncell_list[i]
    num_sces = num_sces_list[i]
    sce_threshold = sce_n_cells_threshold_list[i]
    sce_freq = sce_frequencies[i]
    avg_active_cells = avg_active_cell_list[i]
    ratio = ratio_list[i]
    avg_freq_activity = avg_frequency_of_activity_list[i]
    avg_isi = avg_isis_list[i]
    num_clusters = num_clusters_list[i]
    cells_per_cluster = all_cells_per_cluster[i]
    avg_cell_sizes = cell_sizes_mean_list[i]
    std_cell_sizes = cell_sizes_std_list[i]

    # Ensure that cells_per_cluster is a list
    if cells_per_cluster is None:
        cells_per_cluster = [(None, None)] * num_clusters
    
    # Create the base row for the first cluster
    base_row = {
        'Folder': folder,
        'Number of cells': num_cells,
        'Number of SCEs': num_sces,
        'Threshold of cell numbers for SCEs detection': sce_threshold,
        'SCE frequency (minutes)': sce_freq,
        'Average number of cells in SCES': avg_active_cells,
        'Ratio of number of cells in SCES/outside SCES': ratio,
        'Averaged frequency of cell activity (Hz)': avg_freq_activity,
        'Averaged cell sizes (microns squared)': avg_cell_sizes,
        'Std of cell sizes (microns squared)': std_cell_sizes,
        'Averaged time intervals between consecutive SCEs (minutes)': avg_isi,
        'Cluster number': cells_per_cluster[0][0],
        'Number of cells per cluster': cells_per_cluster[0][1],
     
    }

    # Append the base row
    rows.append(base_row)

    # Add rows for the remaining clusters with only cluster-specific data
    for j in range(1, num_clusters):
        rows.append({
            'Folder': '',
            'Number of cells': '',
            'Number of SCEs': '',
            'Threshold of cell numbers for SCEs detection': '',
            'SCE frequency (minutes)': '',
            'Average number of cells in SCES': '',
            'Ratio of number of cells in SCES/outside SCES': '',
            'Averaged frequency of cell activity (Hz)': '',
            'Averaged cell sizes (microns squared)': '',
            'Std of cell sizes (microns squared)': '',
            'Averaged time intervals between consecutive SCEs (minutes)': '',
            'Cluster number': cells_per_cluster[j][0],
            'Number of cells per cluster': cells_per_cluster[j][1],
        })

# Convert list of rows to DataFrame
df_final = pd.DataFrame(rows)

In [None]:
from openpyxl import Workbook
from openpyxl import load_workbook
from openpyxl.utils.dataframe import dataframe_to_rows

# Extraire le nom du répertoire
folder_name = os.path.basename(data_path)

# Définir le chemin de sortie pour le fichier Excel
output_file = os.path.join(PathSave, 'results_summary.xlsx')

# Vérifier si le fichier Excel existe déjà
if os.path.exists(output_file):
    # Lire le fichier Excel existant
    with pd.ExcelFile(output_file, engine='openpyxl') as xls:
        # Charger toutes les feuilles existantes dans un dictionnaire de DataFrames
        sheet_dict = {sheet_name: xls.parse(sheet_name) for sheet_name in xls.sheet_names}
        
    # Ajouter la nouvelle feuille au dictionnaire de DataFrames
    sheet_dict[folder_name] = df_final
    
    # Écrire toutes les feuilles, y compris la nouvelle
    with pd.ExcelWriter(output_file, engine='xlsxwriter') as writer:
        for sheet_name, df in sheet_dict.items():
            df.to_excel(writer, sheet_name=sheet_name, index=False)
else:
    # Si le fichier n'existe pas, créer un nouveau fichier avec la feuille
    with pd.ExcelWriter(output_file, engine='xlsxwriter') as writer:
        df_final.to_excel(writer, sheet_name=folder_name, index=False)

print(f"Data exported successfully to {output_file} with sheet named '{folder_name}'")