In [None]:
sm = snakemake

In [None]:
import spherpro.bro as spb
import spherpro.db as db
from sqlalchemy import func

import os

## Aim: Quick overview of the dataset
- Number of blocks
- Number of slides
- Number of sites
- Number of sphere sections
- Number of cell sections

In [None]:
class Config:
    fn_config = sm.input.fn_config
    fol_plts = sm.output.fol_plots

    
C = Config        

In [None]:
os.makedirs(C.fol_plts, exist_ok=True)

In [None]:
bro = spb.get_bro(C.fn_config)

Get all the measurement metadata


# Get basic statistics

In [None]:
def get_stats(bro):
    query = bro.session.query    
    well_ct = (query(func.count(db.conditions.condition_id).label('count'))
            .join(db.images)
            .join(db.valid_images)
            .group_by(db.conditions.condition_id).subquery().c.count)
    
    cond_ct = (query(func.count(db.conditions.condition_id.distinct()).label('count'))
            .join(db.images)
            .join(db.valid_images)
            .group_by(db.conditions.condition_name).subquery().c.count)
    
    cond_obj_ct = (query(func.count(db.objects.object_id.distinct()).label('count'))
            .join(db.images)
            .join(db.valid_images)
             .join(db.conditions)
             .join(db.valid_objects)
            .filter(db.objects.object_type=='cell')
            .group_by(db.conditions.condition_name).subquery().c.count)
    
    def get_stats(col):
        return (query(func.min(col),
                              func.avg(col),
                             func.max(col))
                            .all())
        
    return {'nslides': query(db.slides).count(),
     'nsites': query(db.sites).count(),
     'images': query(db.images).count(),
     'valid_images': query(db.valid_images).count(),
     'valid_objects': query(db.valid_objects)
            .join(db.objects)
            .filter(db.objects.object_type=='cell')
            .join(db.images)
            .join(db.valid_images)\
            .count(),
     'total_wells': query(db.conditions).count(),
     'measured_wells': query(db.conditions.condition_id)
            .join(db.images)
            .join(db.valid_images)
            .distinct().count(),
     'measured_wells_per_block': query(
         db.sampleblocks.sampleblock_name,
         func.count(db.conditions.condition_id.distinct()))
            .join(db.images)
            .join(db.sampleblocks)
            .join(db.valid_images)
            .group_by(db.sampleblocks.sampleblock_name)
            .all(),
     'avg_images_well': get_stats(well_ct),
    'total_conditions': query(db.conditions.condition_name.distinct())
            .count(),
    'measured_conditions': query(db.conditions.condition_name.distinct())
            .join(db.images)
            .join(db.valid_images)
            .count(),
    'avg_well_cond': get_stats(cond_ct),
    'avg_obj_ct': get_stats(cond_obj_ct),
    
    'sites_per_block': (bro.session.query(
         db.sampleblocks.sampleblock_name,
         func.count(db.sites.site_id))
            .join(db.slides)
            .join(db.slideacs)
            .join(db.sites)
            .group_by(db.sampleblocks.sampleblock_name)
            .all()),
    'slides_per_block': (bro.session.query(
         db.sampleblocks.sampleblock_name,
         func.count(db.slides.slide_id))
            .join(db.slides)
            .group_by(db.sampleblocks.sampleblock_name)
            .all()),
    'images_per_block': 
      (bro.session.query(
         db.sampleblocks.sampleblock_name,
         func.count(db.images.image_id))
            .join(db.slides)
            .join(db.slideacs)
            .join(db.sites)
            .join(db.acquisitions)
            .join(db.images)
            .group_by(db.sampleblocks.sampleblock_name)
            .all()),
    'valid_images_per_block': 
      (bro.session.query(
         db.sampleblocks.sampleblock_name,
         func.count(db.images.image_id))
            .join(db.slides)
            .join(db.slideacs)
            .join(db.sites)
            .join(db.acquisitions)
            .join(db.images)
            .join(db.valid_images)
            .group_by(db.sampleblocks.sampleblock_name)
            .all()),
    'valid_cells_per_block': 
      (bro.session.query(
         db.sampleblocks.sampleblock_name,
          db.objects.object_type,
         func.count(db.objects.object_id))
            .join(db.slides)
            .join(db.slideacs)
            .join(db.sites)
            .join(db.acquisitions)
            .join(db.images)
            .join(db.valid_images)
            .join(db.objects)
            .join(db.valid_objects)
            .group_by(db.sampleblocks.sampleblock_name,
                     db.objects.object_type)
            .all()),
            
           }

Prints basic statistics

In [None]:
g = get_stats(bro)
g