# Problem:
I have a task that gets performed for certain image channels. There is a table `Channel()` that holds all the imaging channels and a `Task()` table that holds the channels for which the task is performed. An insert into the `Task()` table happens when the task is started, so in `Task()` there is a column called `task_performed` to keep track whether the task has been performed to completion. When the task is completed this column value is flipped from 0 (the initial value) to 1.

The image channels belong to different samples and I want to keep track of which samples have all channels, some channels or no channels for which the task has started or been completed. 

In [3]:
import os
import datajoint as dj

In [4]:
# connect to db
dj.config['database.host'] = 'datajoint00.pni.princeton.edu'
dj.config['database.user'] = os.environ.get('DJ_DB_USER')
dj.config['database.password'] = os.environ.get('DJ_DB_PASS')
dj.conn()

Connecting ahoag@datajoint00.pni.princeton.edu:3306


DataJoint connection (connected) ahoag@datajoint00.pni.princeton.edu:3306

In [23]:
# Define minimal working test schema
schema = dj.Schema('ahoag_test')
@schema
class Task(dj.Manual):
    definition = """
    sample_name: varchar(32)
    channel_name: varchar(32)
    ---
    task_performed: boolean 
    """
@schema
class Channel(dj.Manual):
    definition = """
    sample_name: varchar(32)
    channel_name: varchar(32) 
    ---
    """

In [24]:
# Make test inserts
channel_insert_list = [
    {'sample_name':'sample1',
     'channel_name':'channel1'},
    {'sample_name':'sample1',
     'channel_name':'channel2'},
    {'sample_name':'sample1',
     'channel_name':'channel3'}
]
task_insert_list = [
    {'sample_name':'sample1',
     'channel_name':'channel1',
     'task_performed':0},
    {'sample_name':'sample1',
     'channel_name':'channel2',
     'task_performed':1}
]
Channel().insert(channel_insert_list,skip_duplicates=True)
Task().insert(task_insert_list,skip_duplicates=True)

In [25]:
# Have 1 sample with 3 channels
Channel()

sample_name,channel_name
sample1,channel1
sample1,channel2
sample1,channel3


In [15]:
# Two channels have task entries. channel1 has task started but not completed. channel2 has task completed.
# channel3 is not in this table because the task has not been started for it yet.
Task()

sample_name,channel_name,task_performed
sample1,channel1,0
sample1,channel2,1


In [29]:
# aggr Channel() by sample_name into single entry
imaged_aggr_contents = dj.U('sample_name').aggr(Channel(),n_channels_imaged='count(*)')
imaged_aggr_contents

sample_name,n_channels_imaged  calculated attribute
sample1,3


In [30]:
# Join with Task() to figure out how many of the 3 channels that were imaged has the task been performed for
combined_contents = imaged_aggr_contents * Task()
combined_contents

sample_name,channel_name,n_channels_imaged  calculated attribute,task_performed
sample1,channel1,3,0
sample1,channel2,3,1


In [33]:
# For this sample want to know if all channels had the task performed 
# and also if there are >0 channels in this sample for which the task has at least been started. 
result = dj.U('sample_name').aggr(combined_contents,
   sample_name='sample_name',
   n_channels_imaged='n_channels_imaged',
   n_channels_task_performed='sum(task_performed)',
   n_channels_in_task_table='count(*)',
  )
result

sample_name,n_channels_imaged  calculated attribute,n_channels_task_performed  calculated attribute,n_channels_in_task_table  calculated attribute
sample1,3,1,2


In [None]:
# This looks correct - of the 3 total channels, 2 are in the task table because the task has at least
# been started for them. 1 of them has the task completely performed 

In [36]:
# Get samples for which all channels have had the task performed -- there should be none
result & 'n_channels_task_performed=n_channels_imaged'

sample_name,n_channels_imaged  calculated attribute,n_channels_task_performed  calculated attribute,n_channels_in_task_table  calculated attribute
,,,


In [37]:
# Get samples for which all channels are not done with the task but there are at least some channels
# that have been started
result & 'n_channels_task_performed!=n_channels_imaged' & 'n_channels_in_task_table > 0'

sample_name,n_channels_imaged  calculated attribute,n_channels_task_performed  calculated attribute,n_channels_in_task_table  calculated attribute
sample1,3,1,2


In [41]:
# So far this looks fine. However, if we do a proj and then a restriction there is an unexpected result
result_proj = result.proj('sample_name','n_channels_imaged','n_channels_task_performed',
           'n_channels_in_task_table',
            all_channels_task_complete = 'n_channels_task_performed=n_channels_imaged',
            some_channels_started='n_channels_in_task_table>0')
result_proj

sample_name,n_channels_imaged  calculated attribute,n_channels_task_performed  calculated attribute,n_channels_in_task_table  calculated attribute,all_channels_task_complete  calculated attribute,some_channels_started  calculated attribute
sample1,3,1,2,0,1


In [43]:
# works fine
result_proj & 'all_channels_task_complete=0'

sample_name,n_channels_imaged  calculated attribute,n_channels_task_performed  calculated attribute,n_channels_in_task_table  calculated attribute,all_channels_task_complete  calculated attribute,some_channels_started  calculated attribute
sample1,3,1,2,0,1


In [47]:
# Does NOT work
result_proj & 'some_channels_started=1'

sample_name,n_channels_imaged  calculated attribute,n_channels_task_performed  calculated attribute,n_channels_in_task_table  calculated attribute,all_channels_task_complete  calculated attribute,some_channels_started  calculated attribute
,,,,,
