In [1]:
import omero.clients
import pandas as pd
from tqdm import tqdm

client = omero.client('localhost')
session = client.createSession(USERNAME, PASSWORD)
client.enableKeepAlive(60)
qs = client.getSession().getQueryService()

def batchgen(arr, size):
    for n in range(0, len(arr), size):
        yield arr[n:n + size]

In [2]:
datasets = omero.rtypes.unwrap(qs.projection("""
SELECT pdl.parent.name, pdl.child.name, pdl.child.id
FROM ProjectDatasetLink pdl
WHERE pdl.parent.name LIKE 'idr0071%'
""", None))
print(f'Datasets: [{len(datasets)}]')

Datasets: [512]


In [3]:
rs = []
for pname, dname, did in tqdm(datasets):
    r = omero.rtypes.unwrap(qs.projection(f"""
SELECT
    SUBSTRING(dlink.parent.name, 1, 1),
    SUBSTRING(dlink.parent.name, 3),
    uf.clientPath,
    image.name as image_name,
    pix.sizeX, pix.sizeY, pix.sizeZ, pix.sizeC, pix.sizeT
FROM Image image
LEFT OUTER JOIN image.pixels pix
JOIN image.fileset fs
JOIN fs.usedFiles uf
JOIN image.datasetLinks dlink
WHERE dlink.parent.id = {did}
""", None))
    rs.append(r)


100%|██████████| 512/512 [03:03<00:00,  2.79it/s]


In [4]:
flattened = [item for sublist in rs for item in sublist]
df = pd.DataFrame(
    flattened,
    columns=('Experiment', 'Dataset', 'File', 'Image', 'X', 'Y', 'Z', 'C', 'T'))
df

Unnamed: 0,Experiment,Dataset,File,Image,X,Y,Z,C,T
0,B,phenotype_aligned B3 process,uod/idr/filesets/idr0071-feldman-crisprko/2019...,10X_B3_Tile-10.phenotype_aligned,1024,1024,1,2,1
1,B,phenotype_aligned B3 process,uod/idr/filesets/idr0071-feldman-crisprko/2019...,10X_B3_Tile-102.phenotype_aligned,1024,1024,1,2,1
2,B,phenotype_aligned B3 process,uod/idr/filesets/idr0071-feldman-crisprko/2019...,10X_B3_Tile-104.phenotype_aligned,1024,1024,1,2,1
3,B,phenotype_aligned B3 process,uod/idr/filesets/idr0071-feldman-crisprko/2019...,10X_B3_Tile-103.phenotype_aligned,1024,1024,1,2,1
4,B,phenotype_aligned B3 process,uod/idr/filesets/idr0071-feldman-crisprko/2019...,10X_B3_Tile-116.phenotype_aligned,1024,1024,1,2,1
...,...,...,...,...,...,...,...,...,...
205901,D,c0-DAPI-p65ab phenotype A1 raw,uod/idr/filesets/idr0071-feldman-crisprko/2019...,10X_c0-DAPI-p65ab_A1_Tile-9.phenotype,1024,1024,1,1,2
205902,D,c0-DAPI-p65ab phenotype A1 raw,uod/idr/filesets/idr0071-feldman-crisprko/2019...,10X_c0-DAPI-p65ab_A1_Tile-91.phenotype,1024,1024,1,1,2
205903,D,c0-DAPI-p65ab phenotype A1 raw,uod/idr/filesets/idr0071-feldman-crisprko/2019...,10X_c0-DAPI-p65ab_A1_Tile-96.phenotype,1024,1024,1,1,2
205904,D,c0-DAPI-p65ab phenotype A1 raw,uod/idr/filesets/idr0071-feldman-crisprko/2019...,10X_c0-DAPI-p65ab_A1_Tile-93.phenotype,1024,1024,1,1,2


## Count images/dataset that have `C!=5` _and_ `Z>1` or `T>1`, suggesting the dimensions have been swapped.

In [5]:
# for d in 'XYZCT':
#     g = df.groupby([d, 'Experiment', 'Dataset'])['Image']
#     display(g.count())

cond = (df['C'] != 5) & ((df['Z'] > 1) | (df['T'] > 1))
g = df.loc[cond].groupby(['C', 'Experiment', 'Dataset'])['Image']
with pd.option_context('display.max_rows', None):
    display(g.count().to_frame())

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Image
C,Experiment,Dataset,Unnamed: 3_level_1
1,B,c10-SBS-10 sbs A1 raw,533
1,B,c10-SBS-10 sbs A2 raw,533
1,B,c10-SBS-10 sbs A3 raw,533
1,B,c10-SBS-10 sbs B1 raw,533
1,B,c10-SBS-10 sbs B2 raw,533
1,B,c10-SBS-10 sbs B3 raw,532
1,B,c2-SBS-2 sbs A1 raw,533
1,B,c2-SBS-2 sbs A2 raw,533
1,B,c2-SBS-2 sbs A3 raw,533
1,B,c2-SBS-2 sbs B1 raw,533


## Invert the previous selection, i.e. these are the images we think are fine

In [6]:
g = df.loc[~cond].groupby(['C', 'Experiment', 'Dataset'])['Image']
with pd.option_context('display.max_rows', None):
    display(g.count().to_frame())

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Image
C,Experiment,Dataset,Unnamed: 3_level_1
1,A,cells A1 process,533
1,A,cells A2 process,533
1,A,cells A3 process,533
1,A,cells B1 process,533
1,A,cells B2 process,533
1,A,cells B3 process,533
1,A,nuclei A1 process,533
1,A,nuclei A2 process,533
1,A,nuclei A3 process,533
1,A,nuclei B1 process,533
