In [None]:
sm = snakemake

In [None]:
import pandas as pd

import spherpro.bro as spb
import spherpro.db as db

import numpy as np
import pathlib

In [None]:
from src.variables import Vars

### Aim: The goal is to find if there are markers which are significantly different with and without overexpression.

First it is investigated for the infected cells themselves. Approach:
- Fit a model to calculate:

    log(y_marker) ~ dist-to-rim + site (a random effect)
    
    
- Is is_overexpressing a singificant covariate?
- Calculate the residuals after applying this model, visualize the z-score 


#### Data requirements

- Data:
    - dat_meas:
        - imageid+objectid ~ measurement_id
    - dat_d2rim:
        - objectid ~ d2rim
    - dat_pos:
        - ojectid ~ is_FLAG+is_GFP
    
    
- Metadata:
    - dat_condition:
        - conditionid ~ gene+is_FLAG+is_GFP
    - dat_image:
        - imageid ~ site + conditionid
           

In [None]:
fn_config =  sm.input.fn_config

In [None]:
bro = spb.get_bro(fn_config)

In [None]:
V = Vars

In [None]:
class Config:
    fn_constructs = pathlib.Path(sm.input.fn_constructs)
    REF_COND = 'ctrl'
    SUFFIX_NB = '-NB'
    FIL_FLAGPOS = 'is-flagpos'
    FIL_FLAGPOSNB = FIL_FLAGPOS+V.SUFFIX_FILNB
    FIL_GFPPOS = 'is-gfppos'
    FIL_GFPPOSNB = FIL_GFPPOS+V.SUFFIX_FILNB
    FIL_LM = 'modelfitcond_v1'
    FIL_LM_ONEFLAG = 'modelfitcondflag_v1'
    FIL_LM_ALLCTRL = 'modelfitcondallctrl_v1'
    DIC_FILS = {FIL_GFPPOS: V.COL_GFPPOS , FIL_FLAGPOS: V.COL_FLAGPOS,
                FIL_FLAGPOSNB: V.COL_FLAGPOSNB,
                FIL_GFPPOSNB: V.COL_GFPPOSNB}
    FIL_LM_CLASSES = ['doubt', 'ctrl', 'oexp-NB', 'oexp']

C = Config

In [None]:

fil_gfppos = (bro.session.query(db.object_filters)
               .join(db.object_filter_names)
               .filter(db.object_filter_names.object_filter_name == C.FIL_GFPPOS)).subquery()


q_obj = (bro.data.get_objectmeta_query()
             .filter(db.objects.object_type == 'cell')
            # .filter(db.objects.image_id == 101)
            .join(db.conditions, db.images.condition_id == db.conditions.condition_id)
            .add_columns(db.conditions.condition_id, db.conditions.plate_id)
        )

for fil_name in C.DIC_FILS.keys():
    fil = (bro.session.query(db.object_filters)
               .join(db.object_filter_names)
               .filter(db.object_filter_names.object_filter_name == fil_name)).subquery()
    q_obj = (q_obj
         .join(fil, fil.c.object_id == db.objects.object_id)
         .add_columns(fil.c.filter_value.label(fil_name))
        )

In [None]:
%%time
dat_obj = bro.doquery(q_obj)

In [None]:
dat_obj

In [None]:
dat_cond = bro.doquery(bro.session.query(db.conditions))

In [None]:
dat_constructs = pd.read_csv(C.fn_constructs)

In [None]:
dat_constructs = dat_constructs.merge(dat_cond[[V.COL_CONDID, V.COL_CONDNAME]])

In [None]:
def get_fitcond(dat):
    dat[V.COL_FITCONDITIONNAME] = C.FIL_LM_CLASSES[1] # ref
    #fil = (((dat[V.COL_TAGGFP] == 1) & (dat[C.FIL_GFPPOS] == 0)) |
    #       ((dat[V.COL_TAGFLAG] == 1) & (dat[C.FIL_GFPPOS] == 0) )|
    #       ((dat[V.COL_TAGGFP] == 0) &(dat[V.COL_TAGFLAG] == 0)))
    
    fil = (((dat[V.COL_TAGGFP] == 1) & (dat[C.FIL_GFPPOSNB] > 0)) | 
           ((dat[V.COL_TAGFLAG] == 1) & (dat[C.FIL_FLAGPOSNB] >0) ))
    
    dat.loc[fil, V.COL_FITCONDITIONNAME] = C.FIL_LM_CLASSES[2]
    
    fil = (((dat[V.COL_TAGGFP] == 1) & (dat[C.FIL_GFPPOS] == 2)) | 
           ((dat[V.COL_TAGFLAG] == 1) & (dat[C.FIL_FLAGPOS] == 2) ))
    
    dat.loc[fil, V.COL_FITCONDITIONNAME] = C.FIL_LM_CLASSES[3]
    
    fil = (((dat[V.COL_TAGGFP] == 1) & (dat[C.FIL_GFPPOS] == 1)) | 
           ((dat[V.COL_TAGFLAG] == 1) & (dat[C.FIL_FLAGPOS] == 1) ))
    
    dat.loc[fil, V.COL_FITCONDITIONNAME] = C.FIL_LM_CLASSES[0]
    dat[V.COL_FITCONDITIONNAME] = pd.Categorical(dat[V.COL_FITCONDITIONNAME], categories=C.FIL_LM_CLASSES)
    
    return dat[[V.COL_FITCONDITIONNAME, V.COL_OBJID]]

In [None]:
dat_fitcond = get_fitcond(dat_cond.merge(dat_constructs).merge(dat_obj))
dat_fitcond[db.object_filters.filter_value.key] = dat_fitcond[V.COL_FITCONDITIONNAME].cat.codes

In [None]:
fil = dat_fitcond[V.COL_FITCONDITIONNAME].cat.codes == 0
dat_fitcond.loc[fil]

In [None]:
bro.filters.objectfilterlib.write_filter_to_db(dat_fitcond, C.FIL_LM)

Same but in this filter all FLAGS are 'ctrl' cells

In [None]:
def get_fitcond(dat):
    dat[V.COL_FITCONDITIONNAME] = C.FIL_LM_CLASSES[1] # ref
    #fil = (((dat[V.COL_TAGGFP] == 1) & (dat[C.FIL_GFPPOS] == 0)) | 
    #       ((dat[V.COL_TAGFLAG] == 1) & (dat[C.FIL_GFPPOS] == 0) )|
    #       ((dat[V.COL_TAGGFP] == 0) &(dat[V.COL_TAGFLAG] == 0)))
    
    fil = (((dat[V.COL_TAGGFP] == 1) & (dat[C.FIL_GFPPOSNB] > 0)))
    
    dat.loc[fil, V.COL_FITCONDITIONNAME] = C.FIL_LM_CLASSES[2]
    
    fil = (((dat[V.COL_TAGGFP] == 1) & (dat[C.FIL_GFPPOS] == 2)))
    
    dat.loc[fil, V.COL_FITCONDITIONNAME] = C.FIL_LM_CLASSES[3]
    
    fil = (((dat[V.COL_TAGGFP] == 1) & (dat[C.FIL_GFPPOS] == 1)) )
    
    dat.loc[fil, V.COL_FITCONDITIONNAME] = C.FIL_LM_CLASSES[0]
    dat[V.COL_FITCONDITIONNAME] = pd.Categorical(dat[V.COL_FITCONDITIONNAME], categories=C.FIL_LM_CLASSES)
    
    return dat[[V.COL_FITCONDITIONNAME, V.COL_OBJID]]

In [None]:
dat_fitcond = get_fitcond(dat_cond.merge(dat_constructs).merge(dat_obj))
dat_fitcond[db.object_filters.filter_value.key] = dat_fitcond[V.COL_FITCONDITIONNAME].cat.codes

In [None]:
fil = dat_fitcond[V.COL_FITCONDITIONNAME].cat.codes == 0
dat_fitcond.loc[fil]

In [None]:
bro.filters.objectfilterlib.write_filter_to_db(dat_fitcond, C.FIL_LM_ONEFLAG)

Same but in this filter all cells are 'ctrl' cells

In [None]:
def get_fitcond(dat):
    dat[V.COL_FITCONDITIONNAME] =  C.FIL_LM_CLASSES[1]
    dat[V.COL_FITCONDITIONNAME] = pd.Categorical(dat[V.COL_FITCONDITIONNAME], categories=C.FIL_LM_CLASSES)
    
    return dat[[V.COL_FITCONDITIONNAME, V.COL_OBJID]]

In [None]:
dat_fitcond = get_fitcond(dat_cond.merge(dat_constructs).merge(dat_obj))
dat_fitcond[db.object_filters.filter_value.key] = dat_fitcond[V.COL_FITCONDITIONNAME].cat.codes

In [None]:
fil = dat_fitcond[V.COL_FITCONDITIONNAME].cat.codes == 0
dat_fitcond.loc[fil]

In [None]:
bro.filters.objectfilterlib.write_filter_to_db(dat_fitcond, C.FIL_LM_ALLCTRL)