In [3]:
# update bad images status 

In [4]:
import os

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from scipy.stats import gamma
from scipy.optimize import curve_fit
from scipy import special

from configparser import ConfigParser, ExtendedInterpolation

import utils as utils

from tqdm import tqdm_notebook as tqdm

In [5]:
pd.set_option('display.max_columns', 50)

In [11]:
# read config file 
config = ConfigParser(interpolation=ExtendedInterpolation())
config.read('config.ini')

['config.ini']

In [12]:
FOLDER = config.get("all", "FOLDER")
DB_FILENAME = config.get("all", "DB_FILENAME")
DB_NEW_FILENAME = config.get("all", "DB_NEW_FILENAME")
# EXPERIMENTS = ["N2", "SEA-12", "MK4", "CB428", "RNAi"]
CHANNELS = ["C0-", "C1-", "C2-"]
SMFISH_COLUMNS = ["#c0_smfish", "#c1_smfish", "#c2_smfish"]

In [9]:
EXPERIMENTS = ['N2', 'SEA-12', 'MK4', 'CB428', 'RNAi']
CHANNELS = ['C0-', 'C1-', 'C2-', 'C3-', 'C4-']
SMFISH_COLUMNS = ['#c0_smfish', '#c1_smfish', '#c2_smfish', '#c3_smfish', '#c4_smfish']

In [14]:
# read the db and parse images that we want to process
df_path = os.path.join(FOLDER, "smFISH-database", DB_FILENAME)
df = pd.read_csv(df_path, 
                 sep=',',
                 na_values=[''],
                 # index_col=['cropped_image_file'],
            )


In [15]:
df = df[[
    '#c0_smfish',
    '#c0_smfish_adj',
    '#c1_smfish',
    '#c1_smfish_adj',
    '#c2_smfish',
    '#c2_smfish_adj',
    '#channels',
    '#nuclei',
    '#nucs_predicted',
    'DAPI channel',
    'GFP channel',
    'c0',
    'c0_lambda',
    'c0_type',
    'c1',
    'c1_lambda',
    'c1_type',
    'c2',
    'c2_lambda',
    'c2_type',
    'c3',
    'c3_lambda',
    'c4',
    'c4_lambda',
    'crop_offset_x',
    'crop_offset_y',
    'cropped_image_file',
    'cropped_mask_file',
    'ellipse',
    'filename',
    'is_dapi_stack',
    'is_male',
    'is_male_batch',
    'is_too_bleached',
    'is_valid_final',
    'is_z_cropped',
    'num_z_planes',
    'original filename',
    'signal',
    'status',
    'tx',
    'tx_desc',
    'unique_id',
]]

In [16]:
df.head()

Unnamed: 0,#c0_smfish,#c0_smfish_adj,#c1_smfish,#c1_smfish_adj,#c2_smfish,#c2_smfish_adj,#channels,#nuclei,#nucs_predicted,DAPI channel,GFP channel,c0,c0_lambda,c0_type,c1,c1_lambda,c1_type,c2,c2_lambda,c2_type,c3,c3_lambda,c4,c4_lambda,crop_offset_x,crop_offset_y,cropped_image_file,cropped_mask_file,ellipse,filename,is_dapi_stack,is_male,is_male_batch,is_too_bleached,is_valid_final,is_z_cropped,num_z_planes,original filename,signal,status,tx,tx_desc,unique_id
0,1706,1735.83,-1,-1.0,1336,1408.86,5,-1.0,11.0,4.0,3.0,Cy5,670.0,dpy23.ex,GoldFISH,566.0,dpy23.int,mCherry,610.0,mdh1.ex,GFP,507.0,DAPI,461.0,367.0,123.0,RNAi_set1_625_cropped_3229.tif,RNAi_set1_625_cropped_3229.mask.tif,R_485x470_523x506_591x506_648x466_676x387_659x...,RNAi_set1_625,1.0,-1.0,0.0,-1.0,1.0,-1.0,91.0,180815_n2_rnai.set1_dpy23.ex_dpy23.int_mdh1.ex...,-1.0,1.0,-1.0,,-1
1,102,537.07,884,838.14,-1,-1.0,5,-1.0,1.0,4.0,3.0,Cy5,670.0,sdc2.ex,mCherry,610.0,mdh1.ex,GoldFISH,566.0,sdc2.int,GFP,507.0,DAPI,461.0,271.0,44.0,N2_885_cropped_3912.tif,N2_885_cropped_3912.mask.tif,E_-0.44319727940142284_0.18643130312400863_-0....,N2_885,1.0,-1.0,0.0,-1.0,1.0,1.0,81.0,180219_n2_sdc2.ex_sdc2.int_mdh1.ex_005 series_19,-1.0,1.0,-1.0,,-1
2,1184,1296.55,261,323.36,-1,-1.0,5,-1.0,1.0,4.0,3.0,Cy5,670.0,dpy23.ex,mCherry,610.0,mdh1.ex,GoldFISH,566.0,dpy23.int,GFP,507.0,DAPI,461.0,372.0,450.0,N2_1646_cropped_3983.tif,N2_1646_cropped_3983.mask.tif,R_457x581_412x626_417x704_478x764_529x772_601x...,N2_1646,1.0,-1.0,0.0,-1.0,1.0,1.0,81.0,180214_n2_dpy23.ex_dpy23.int_mdh1.ex_001 series1,-1.0,1.0,-1.0,,-1
3,1440,1681.77,17,3289.43,-1,-1.0,5,-1.0,11.0,4.0,3.0,Cy5,670.0,sdc2.ex,mCherry,610.0,mdh1.ex,GoldFISH,566.0,sdc2.int,GFP,507.0,DAPI,461.0,330.0,313.0,N2_1645_cropped_3982.tif,N2_1645_cropped_3982.mask.tif,E_-0.4658776016654055_-0.2404532809755947_-0.7...,N2_1645,1.0,-1.0,0.0,-1.0,1.0,1.0,81.0,180219_n2_sdc2.ex_sdc2.int_mdh1.ex_006 series41,-1.0,1.0,-1.0,,-1
4,47,92.8,1372,1740.47,-1,-1.0,5,-1.0,11.0,4.0,3.0,Cy5,670.0,sdc2.ex,mCherry,610.0,mdh1.ex,GoldFISH,566.0,sdc2.int,GFP,507.0,DAPI,461.0,349.0,437.0,N2_1644_cropped_3981.tif,N2_1644_cropped_3981.mask.tif,E_-0.4269997130563113_0.2566874426238051_-0.74...,N2_1644,1.0,-1.0,0.0,-1.0,1.0,1.0,81.0,180219_n2_sdc2.ex_sdc2.int_mdh1.ex_006 series40,-1.0,1.0,-1.0,,-1


In [19]:
# update happens here

In [20]:
dataset = []
for r, d, f in os.walk(os.path.join(FOLDER, 'bad')):
    for file in f:
        if '.csv' in file:
            dataset.append(file)

In [22]:
dff = df.copy()

In [23]:
pbar = tqdm(total=len(dataset))
for f in dataset:
    pbar.update(1)
    
    smfish_column = '#' + f[:3].lower()[:2] + '_smfish'    
    index = df[df['cropped_image_file'] == f[3:-4] + '.tif'].index
    
    
    print(smfish_column, index)
    
    df.at[index, smfish_column] = -3
    
pbar.close()

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))




In [24]:
df

Unnamed: 0,#c0_smfish,#c0_smfish_adj,#c1_smfish,#c1_smfish_adj,#c2_smfish,#c2_smfish_adj,#channels,#nuclei,#nucs_predicted,DAPI channel,GFP channel,c0,c0_lambda,c0_type,c1,c1_lambda,c1_type,c2,c2_lambda,c2_type,c3,c3_lambda,c4,c4_lambda,crop_offset_x,crop_offset_y,cropped_image_file,cropped_mask_file,ellipse,filename,is_dapi_stack,is_male,is_male_batch,is_too_bleached,is_valid_final,is_z_cropped,num_z_planes,original filename,signal,status,tx,tx_desc,unique_id
0,1706,1735.83,-1,-1.00,1336,1408.86,5,-1.0,11.0,4.0,3.0,Cy5,670.0,dpy23.ex,GoldFISH,566.0,dpy23.int,mCherry,610.0,mdh1.ex,GFP,507.0,DAPI,461.0,367.0,123.0,RNAi_set1_625_cropped_3229.tif,RNAi_set1_625_cropped_3229.mask.tif,R_485x470_523x506_591x506_648x466_676x387_659x...,RNAi_set1_625,1.0,-1.0,0.0,-1.0,1.0,-1.0,91.0,180815_n2_rnai.set1_dpy23.ex_dpy23.int_mdh1.ex...,-1.0,1.0,-1.0,,-1
1,102,537.07,884,838.14,-1,-1.00,5,-1.0,1.0,4.0,3.0,Cy5,670.0,sdc2.ex,mCherry,610.0,mdh1.ex,GoldFISH,566.0,sdc2.int,GFP,507.0,DAPI,461.0,271.0,44.0,N2_885_cropped_3912.tif,N2_885_cropped_3912.mask.tif,E_-0.44319727940142284_0.18643130312400863_-0....,N2_885,1.0,-1.0,0.0,-1.0,1.0,1.0,81.0,180219_n2_sdc2.ex_sdc2.int_mdh1.ex_005 series_19,-1.0,1.0,-1.0,,-1
2,1184,1296.55,261,323.36,-1,-1.00,5,-1.0,1.0,4.0,3.0,Cy5,670.0,dpy23.ex,mCherry,610.0,mdh1.ex,GoldFISH,566.0,dpy23.int,GFP,507.0,DAPI,461.0,372.0,450.0,N2_1646_cropped_3983.tif,N2_1646_cropped_3983.mask.tif,R_457x581_412x626_417x704_478x764_529x772_601x...,N2_1646,1.0,-1.0,0.0,-1.0,1.0,1.0,81.0,180214_n2_dpy23.ex_dpy23.int_mdh1.ex_001 series1,-1.0,1.0,-1.0,,-1
3,1440,1681.77,17,3289.43,-1,-1.00,5,-1.0,11.0,4.0,3.0,Cy5,670.0,sdc2.ex,mCherry,610.0,mdh1.ex,GoldFISH,566.0,sdc2.int,GFP,507.0,DAPI,461.0,330.0,313.0,N2_1645_cropped_3982.tif,N2_1645_cropped_3982.mask.tif,E_-0.4658776016654055_-0.2404532809755947_-0.7...,N2_1645,1.0,-1.0,0.0,-1.0,1.0,1.0,81.0,180219_n2_sdc2.ex_sdc2.int_mdh1.ex_006 series41,-1.0,1.0,-1.0,,-1
4,47,92.80,1372,1740.47,-1,-1.00,5,-1.0,11.0,4.0,3.0,Cy5,670.0,sdc2.ex,mCherry,610.0,mdh1.ex,GoldFISH,566.0,sdc2.int,GFP,507.0,DAPI,461.0,349.0,437.0,N2_1644_cropped_3981.tif,N2_1644_cropped_3981.mask.tif,E_-0.4269997130563113_0.2566874426238051_-0.74...,N2_1644,1.0,-1.0,0.0,-1.0,1.0,1.0,81.0,180219_n2_sdc2.ex_sdc2.int_mdh1.ex_006 series40,-1.0,1.0,-1.0,,-1
5,38,61.99,146,151.40,-1,-1.00,5,-1.0,3.0,4.0,3.0,Cy5,670.0,sdc2.ex,mCherry,610.0,mdh1.ex,GoldFISH,566.0,sdc2.int,GFP,507.0,DAPI,461.0,570.0,195.0,N2_1643_cropped_3980.tif,N2_1643_cropped_3980.mask.tif,R_640x373_617x460_610x530_640x602_703x614_767x...,N2_1643,1.0,-1.0,0.0,-1.0,1.0,1.0,81.0,180219_n2_sdc2.ex_sdc2.int_mdh1.ex_006 series39,-1.0,1.0,-1.0,,-1
6,-1,-1.00,-1,-1.00,-1,-1.00,5,-1.0,1.0,4.0,3.0,Cy5,670.0,sdc2.ex,mCherry,610.0,mdh1.ex,GoldFISH,566.0,sdc2.int,GFP,507.0,DAPI,461.0,285.0,356.0,N2_1643_cropped_3978.tif,N2_1643_cropped_3978.mask.tif,E_-0.851799589448422_-0.17386590452664857_-0.3...,N2_1643,1.0,-1.0,0.0,-1.0,1.0,1.0,81.0,180219_n2_sdc2.ex_sdc2.int_mdh1.ex_006 series39,-1.0,1.0,-1.0,,-1
7,141,151.76,3025,3972.97,-1,-1.00,5,-1.0,1.0,4.0,3.0,Cy5,670.0,sdc2.ex,mCherry,610.0,mdh1.ex,GoldFISH,566.0,sdc2.int,GFP,507.0,DAPI,461.0,398.0,314.0,N2_1642_cropped_3977.tif,N2_1642_cropped_3977.mask.tif,E_0.7635698480055423_-0.24064825421703068_0.43...,N2_1642,1.0,-1.0,0.0,-1.0,1.0,1.0,81.0,180219_n2_sdc2.ex_sdc2.int_mdh1.ex_006 series38,-1.0,1.0,-1.0,,-1
8,1482,1670.29,385,396.35,-1,-1.00,5,-1.0,11.0,4.0,3.0,Cy5,670.0,sdc2.ex,mCherry,610.0,mdh1.ex,GoldFISH,566.0,sdc2.int,GFP,507.0,DAPI,461.0,370.0,458.0,N2_1641_cropped_3976.tif,N2_1641_cropped_3976.mask.tif,E_-0.3349090029751302_-0.1430839406164421_-0.8...,N2_1641,1.0,-1.0,0.0,-1.0,1.0,1.0,81.0,180219_n2_sdc2.ex_sdc2.int_mdh1.ex_006 series37,-1.0,1.0,-1.0,,-1
9,126,216.00,289,545.59,-1,-1.00,5,-1.0,1.0,4.0,3.0,Cy5,670.0,sdc2.ex,mCherry,610.0,mdh1.ex,GoldFISH,566.0,sdc2.int,GFP,507.0,DAPI,461.0,335.0,475.0,N2_1640_cropped_3975.tif,N2_1640_cropped_3975.mask.tif,E_-0.9348175476052261_-0.05309834067203547_-0....,N2_1640,1.0,-1.0,0.0,-1.0,1.0,0.0,81.0,180219_n2_sdc2.ex_sdc2.int_mdh1.ex_006 series36,-1.0,1.0,-1.0,,-1


In [32]:
for col in [
        # '#channels',
        '#nuclei',
        '#nucs_predicted',
        'DAPI channel',
        'GFP channel',
        'c4_lambda',
        'crop_offset_x',
        'crop_offset_y',
        'is_dapi_stack',
        'is_male',
        'is_male_batch',
        'is_too_bleached',
        'is_valid_final',
        'is_z_cropped',
        'num_z_planes',
        'signal',
        'status',
        'tx',
        'tx_desc',
        # 'unique_id',
    ]:
    dff[col] = dff[col].astype(np.float32)

In [33]:
for col in [
        'unique_id',
    ]:
    dff[col] = dff[col].astype(np.int32)

In [34]:
dff.to_csv(
    os.path.join(FOLDER, "smFISH-database", DB_NEW_FILENAME), 
    index=False,
    columns=[
        '#c0_smfish',
        '#c0_smfish_adj',
        '#c1_smfish',
        '#c1_smfish_adj',
        '#c2_smfish',
        '#c2_smfish_adj',
        '#channels',
        '#nuclei',
        '#nucs_predicted',
        'DAPI channel',
        'GFP channel',
        'c0',
        'c0_lambda',
        'c0_type',
        'c1',
        'c1_lambda',
        'c1_type',
        'c2',
        'c2_lambda',
        'c2_type',
        'c3',
        'c3_lambda',
        'c4',
        'c4_lambda',
        'crop_offset_x',
        'crop_offset_y',
        'cropped_image_file',
        'cropped_mask_file',
        'ellipse',
        'filename',
        'is_dapi_stack',
        'is_male',
        'is_male_batch',
        'is_too_bleached',
        'is_valid_final',
        'is_z_cropped',
        'num_z_planes',
        'original filename',
        'signal',
        'status',
        'tx',
        'tx_desc',
        'unique_id',
    ]
)