In [1]:
import pandas as pd
import numpy as np
import glob
import re


In [2]:
columns_name=['Project_ID', 'Image_Name', 'Distance_to_midline',
       'Analyze', 'Exclude', 'Comment', 'hemisphere(L/R)',
       'Qupath_project_path', 'dataframe_path', 'ML_type(GT/FP)']

In [3]:
main_df = pd.read_csv('/gpfs/bbp.cscs.ch/project/proj53/LayerBoundariesProject/Production/20231102/datasets/Exported_Features/Dataframes/metadata.csv')

In [4]:
def get_qupath_project_by_image_name(_dataframe, image_name : str) ->str:
    return list(_dataframe[_dataframe['Image_Name'].str.contains(image_name)]['Project_ID'])

In [5]:
get_qupath_project_by_image_name(main_df, 'SLD_0000560.vsi-20x_01')

['ProjectQuPath_1443460_RH_Nissl_4']

In [6]:
def get_dataframe_path_form_project_id(image_ids,
                                       for_pred_path='/gpfs/bbp.cscs.ch/project/proj53/LayerBoundariesProject/Production/20231102/datasets/Exported_Features/Dataframes/For_prediction/01122024',
                                       gt_path = '/gpfs/bbp.cscs.ch/project/proj53/LayerBoundariesProject/Production/20231102/datasets/Exported_Features/Dataframes/Ground_Truth/'):
    
    dataframe_pathes = []
    ml_types = []
    for image_id in image_ids:
        pattern = for_pred_path + '/Feature*' + image_id + '*'
        result =  glob.glob(pattern)
        if len(result) == 1:
            dataframe_pathes.append(result[0])
            ml_types.append('FP')
        else:
            pattern = gt_path + '/Feature*' + image_id + '*'
            result = glob.glob(pattern)
            if len(result) == 1:
                dataframe_pathes.append(result[0])
                ml_types.append('GT')
            else:
                dataframe_pathes.append(None)
                ml_types.append(None)
    return dataframe_pathes, ml_types

In [7]:
def get_hemisphere_from_projet_id(project_id):
    if 'RH' in project_id:
        return 'right'
    elif 'LH' in project_id:
        return  'left'
    elif 'GroundTruth' in project_id:
        return 'N/A'
    else:
        raise ValueError

In [8]:
def project_id_from_path(path):
    last_slash_pos = path.rfind('/')+1
    qpproj_pos = path.find('qpproj')
    return path[last_slash_pos:qpproj_pos-1]
    

In [9]:
# S1HL

#meta_root_pattern = '/gpfs/bbp.cscs.ch/project/proj53/LayerBoundariesProject/Analysis/Nissl_*/*/*_Metadata_information.csv'
#meta_list = glob.glob(meta_root_pattern, recursive=True)
#meta_list.append('/gpfs/bbp.cscs.ch/project/proj53/LayerBoundariesProject/LBR_Algorithm/Cellpose/QuPath_LayerBoundaries_GroundTruth_20231222/QuPath_LayerBoundaries_GroundTruth_20231222.qpproj_Metadata_information.csv')

In [12]:
# MPtA

meta_root_pattern = '/gpfs/bbp.cscs.ch/project/proj53/LayerBoundariesProject/Analysis/Nissl_*/*_MPtA/*_Metadata_information.csv'
meta_list = glob.glob(meta_root_pattern, recursive=True)
print(meta_list)
#meta_list.append('/gpfs/bbp.cscs.ch/project/proj53/LayerBoundariesProject/LBR_Algorithm/Cellpose/QuPath_LayerBoundaries_GroundTruth_20231222/QuPath_LayerBoundaries_GroundTruth_20231222.qpproj_Metadata_information.csv')

['/gpfs/bbp.cscs.ch/project/proj53/LayerBoundariesProject/Analysis/Nissl_4/ProjectQuPath_1443459_RH_Nissl_4_MPtA/ProjectQuPath_1443459_RH_Nissl_4_WithMPtA.qpproj_Metadata_information.csv', '/gpfs/bbp.cscs.ch/project/proj53/LayerBoundariesProject/Analysis/Nissl_6/ProjectQuPath_1449922_RH_Nissl_6_MPtA/ProjectQuPath_1449922_RH_Nissl_6_WithMPtA.qpproj_Metadata_information.csv']


In [10]:
main_df = pd.DataFrame()


for meta_file_path in meta_list:
    project_pathes=[]
    project_ids=[]
    image_names = []
    distances = []
    analyses=[]
    exludes=[]
    comments=[]
    hemispheres=[]
    qupath_project_pathes=[]
    ml_types=[]
    print(f'meta_file_path {meta_file_path} \r', end='')
    project_path = meta_file_path[:-25]
    project_id = project_id_from_path(meta_file_path)
    project_meta_df = pd.read_csv(meta_file_path, index_col=0)
    nb_images = len(project_meta_df)
    
    project_ids.extend([project_id] * nb_images)
    image_names = list( project_meta_df['Image Name'])
    distances = list( project_meta_df['Distance to midline'])
    analyses= list( project_meta_df['Analyze'])
    exludes= list( project_meta_df['Exclude'])
    comments= list( project_meta_df['Comment'])
    hemispheres.extend([get_hemisphere_from_projet_id(project_id)] * nb_images)
    qupath_project_pathes.extend([project_path] * nb_images)
    dataframe_path, ml_types = get_dataframe_path_form_project_id(image_names)
    data = np.array([project_ids, image_names, distances, analyses, exludes,
                  comments, hemispheres, qupath_project_pathes,
                  dataframe_path, ml_types]).T

    df = pd.DataFrame(data = data,
                      columns=columns_name)

    
    main_df = pd.concat([main_df, df])
    
print('\nDone')
    
main_df.to_csv('/gpfs/bbp.cscs.ch/project/proj53/LayerBoundariesProject/Production/20231102/datasets/Exported_Features/Dataframes/metadata_MPtA.csv')

meta_file_path /gpfs/bbp.cscs.ch/project/proj53/LayerBoundariesProject/Analysis/Nissl_4/ProjectQuPath_1443459_RH_Nissl_4_MPtA/ProjectQuPath_1443459_RH_Nissl_4_WithMPtA.qpproj_Metadata_information.csv meta_file_path /gpfs/bbp.cscs.ch/project/proj53/LayerBoundariesProject/Analysis/Nissl_6/ProjectQuPath_1449922_RH_Nissl_6_MPtA/ProjectQuPath_1449922_RH_Nissl_6_WithMPtA.qpproj_Metadata_information.csv 
Done


In [36]:
#metadata

In [10]:
meta = pd.read_csv('/gpfs/bbp.cscs.ch/project/proj53/LayerBoundariesProject/Production/20231102/datasets/Exported_Features/Dataframes/metadata.csv')

In [11]:
meta

Unnamed: 0.1,Unnamed: 0,Project_ID,Image_Name,Distance_to_midline,Analyze,Exclude,Comment,hemisphere(L/R),Qupath_project_path,dataframe_path,ML_type(GT/FP)
0,0,ProjectQuPath_01413827_RH_Nissl_1,SLD_0000386.vsi-20x_01,0,False,True,0,right,/gpfs/bbp.cscs.ch/project/proj53/LayerBoundari...,,
1,1,ProjectQuPath_01413827_RH_Nissl_1,SLD_0000386.vsi-20x_02,0,False,True,0,right,/gpfs/bbp.cscs.ch/project/proj53/LayerBoundari...,,
2,2,ProjectQuPath_01413827_RH_Nissl_1,SLD_0000386.vsi-20x_03,0,False,True,0,right,/gpfs/bbp.cscs.ch/project/proj53/LayerBoundari...,,
3,3,ProjectQuPath_01413827_RH_Nissl_1,SLD_0000386.vsi-20x_04,0,False,True,0,right,/gpfs/bbp.cscs.ch/project/proj53/LayerBoundari...,,
4,4,ProjectQuPath_01413827_RH_Nissl_1,SLD_0000386.vsi-20x_05,0,False,True,0,right,/gpfs/bbp.cscs.ch/project/proj53/LayerBoundari...,,
...,...,...,...,...,...,...,...,...,...,...,...
2213,33,QuPath_LayerBoundaries_GroundTruth_20231222,SLD_0000749.vsi-20x_05,2.90mm,,False,0,,/gpfs/bbp.cscs.ch/project/proj53/LayerBoundari...,/gpfs/bbp.cscs.ch/project/proj53/LayerBoundari...,GT
2214,34,QuPath_LayerBoundaries_GroundTruth_20231222,SLD_0000682.vsi-20x_03,3.05mm,,False,0,,/gpfs/bbp.cscs.ch/project/proj53/LayerBoundari...,/gpfs/bbp.cscs.ch/project/proj53/LayerBoundari...,GT
2215,35,QuPath_LayerBoundaries_GroundTruth_20231222,SLD_0000431.vsi-20x_03,around 2.90mm,,False,0,,/gpfs/bbp.cscs.ch/project/proj53/LayerBoundari...,/gpfs/bbp.cscs.ch/project/proj53/LayerBoundari...,GT
2216,36,QuPath_LayerBoundaries_GroundTruth_20231222,SLD_0000463.vsi-20x_01,around 2.40mm,,False,,,/gpfs/bbp.cscs.ch/project/proj53/LayerBoundari...,/gpfs/bbp.cscs.ch/project/proj53/LayerBoundari...,GT


In [36]:
projets = np.unique(meta.Project_ID)

In [47]:
total = 0

for projet in projets:
    
    projet_df = meta[meta.Project_ID == projet]
    analyse_flag = meta.Analyze == True
    #print(analyse_flag)
    nb_images = len(projet_df[analyse_flag])
    total+=nb_images
    print(f'Projet {projet} {nb_images}')
print('Nb images:', total)
    

Projet ProjectQuPath_01413827_LH_Nissl_2 22
Projet ProjectQuPath_01413827_RH_Nissl_1 9
Projet ProjectQuPath_01413828_RH_Nissl_2 16
Projet ProjectQuPath_01413829_RH_Nissl_2 23
Projet ProjectQuPath_1413828_LH_Nissl_3 22
Projet ProjectQuPath_1443459_LH_Nissl_5 0
Projet ProjectQuPath_1443459_RH_Nissl_4 22
Projet ProjectQuPath_1443460_LH_Nissl_5 17
Projet ProjectQuPath_1443460_RH_Nissl_4 21
Projet ProjectQuPath_1449920_LH_Nissl_6 26
Projet ProjectQuPath_1449920_RH_Nissl_6 0
Projet ProjectQuPath_1449920_RH_Nissl_6_EXCLUDE 0
Projet ProjectQuPath_1449921_LH_Nissl_6 16
Projet ProjectQuPath_1449921_RH_Nissl_6 24
Projet ProjectQuPath_1449922_LH_Nissl_6 23
Projet ProjectQuPath_1449922_RH_Nissl_6 22
Projet QuPath_LayerBoundaries_GroundTruth_20231222 0
Nb images: 263


  


In [67]:
with open('/gpfs/bbp.cscs.ch/project/proj53/scratch/foo', 'r') as fp:
    data = fp.read()
    images = data.replace('Features_', '').replace('.csv', '').split("\n")
image_in_gpfs = set(images)


In [66]:
projet_df = meta[meta.Analyze == True]
image_in_df = set(projet_df.Image_Name.to_list())

In [68]:
image_in_gpfs - image_in_df

{''}

In [69]:
image_in_df - image_in_gpfs

{'SLD_0000402.vsi-20x_01'}

In [70]:
meta[meta.Image_Name == 'SLD_0000402.vsi-20x_01']

Unnamed: 0.1,Unnamed: 0,Project_ID,Image_Name,Distance_to_midline,Analyze,Exclude,Comment,hemisphere(L/R),Qupath_project_path,dataframe_path,ML_type(GT/FP)
97,97,ProjectQuPath_01413827_RH_Nissl_1,SLD_0000402.vsi-20x_01,around 3.18-3.40mm,True,False,0,right,/gpfs/bbp.cscs.ch/project/proj53/LayerBoundari...,/gpfs/bbp.cscs.ch/project/proj53/LayerBoundari...,GT
2193,13,QuPath_LayerBoundaries_GroundTruth_20231222,SLD_0000402.vsi-20x_01,3.18-3.40mm,,False,0,,/gpfs/bbp.cscs.ch/project/proj53/LayerBoundari...,/gpfs/bbp.cscs.ch/project/proj53/LayerBoundari...,GT


In [8]:
filelist = glob.glob('/gpfs/bbp.cscs.ch/project/proj53/CellDensitiesProject/Production/2024/Density/02022024/Per_percentage/*.csv')
len(filelist)

218

In [39]:
from collections import defaultdict
counts = defaultdict(int)
for file in filelist:

    image_id = file.split('/')[-1][:-4]
    project = get_qupath_project_by_image_name(main_df, image_id)[0]
    counts[project]+=1
    

In [40]:
counts

defaultdict(int,
            {'ProjectQuPath_1449922_LH_Nissl_6': 18,
             'ProjectQuPath_1443460_RH_Nissl_4': 17,
             'ProjectQuPath_1449921_LH_Nissl_6': 14,
             'ProjectQuPath_1443459_RH_Nissl_4': 17,
             'ProjectQuPath_1449922_RH_Nissl_6': 19,
             'ProjectQuPath_01413827_RH_Nissl_1': 7,
             'ProjectQuPath_1443460_LH_Nissl_5': 16,
             'ProjectQuPath_01413828_RH_Nissl_2': 12,
             'ProjectQuPath_1413828_LH_Nissl_3': 19,
             'ProjectQuPath_1449921_RH_Nissl_6': 19,
             'ProjectQuPath_01413827_LH_Nissl_2': 18,
             'ProjectQuPath_01413829_RH_Nissl_2': 20,
             'ProjectQuPath_1449920_LH_Nissl_6': 22})

In [45]:
indice = np.argsort(list(counts.keys()))

In [47]:
list(counts.values())[indice]

TypeError: only integer scalar arrays can be converted to a scalar index

In [59]:
for index in indice:
    print(np.array(list(counts.keys()))[index],np.array(list(counts.values()))[index] )

ProjectQuPath_01413827_LH_Nissl_2 18
ProjectQuPath_01413827_RH_Nissl_1 7
ProjectQuPath_01413828_RH_Nissl_2 12
ProjectQuPath_01413829_RH_Nissl_2 20
ProjectQuPath_1413828_LH_Nissl_3 19
ProjectQuPath_1443459_RH_Nissl_4 17
ProjectQuPath_1443460_LH_Nissl_5 16
ProjectQuPath_1443460_RH_Nissl_4 17
ProjectQuPath_1449920_LH_Nissl_6 22
ProjectQuPath_1449921_LH_Nissl_6 14
ProjectQuPath_1449921_RH_Nissl_6 19
ProjectQuPath_1449922_LH_Nissl_6 18
ProjectQuPath_1449922_RH_Nissl_6 19
