In [1]:
import os
import glob
import pickle
import numpy as np
import pandas as pd
import datasetdatabase as dsdb

os.environ["DSDB_PROCESS_LIMIT"] = "16"

# MEM, DNA and FOV Datasets

Connecting to dsdb

In [2]:
prod = dsdb.DatasetDatabase(config="/allen/aics/assay-dev/Analysis/QCB_database/prod_config.json")

Loading metadata

In [3]:
with open("../../data-raw/QCB_cell_meta.pkl", "rb") as fp:
    df_meta = pickle.load(fp)

df_meta.head()

Unnamed: 0,cell_id,cell_ver,czi_filename,idx_in_stack,mitosis,mode,roi,src_csv,src_csv_row,str_ver,structure_name
0,c17a53d0-7ff6-4106-ab53-d2be3240f168,0.1.0,20161209_C01_002.czi,3,0,n,"[19, 195, 332, 513, 65, 295]",//allen/aics/assay-dev/StashSpreadsheets/assay...,7,0.1.0,FBL
1,77f11f0d-66d9-49c5-b2b8-be40777780ed,0.1.0,20161209_C01_002.czi,4,0,n,"[19, 187, 264, 453, 226, 444]",//allen/aics/assay-dev/StashSpreadsheets/assay...,8,0.1.0,FBL
2,427284a7-02b2-440b-a1b0-28c0b8f12379,0.1.0,20161209_C01_002.czi,5,0,n,"[19, 190, 73, 322, 422, 613]",//allen/aics/assay-dev/StashSpreadsheets/assay...,9,0.1.0,FBL
3,0241af5b-4133-4525-9d14-9a65f8a93f6f,0.1.0,20161209_C01_002.czi,7,0,n,"[19, 195, 355, 594, 68, 397]",//allen/aics/assay-dev/StashSpreadsheets/assay...,11,0.1.0,FBL
4,a705b03f-7af4-4fba-9e8f-8b20b085a1ab,0.1.0,20161209_C01_003.czi,1,0,n,"[8, 189, 78, 297, 228, 481]",//allen/aics/assay-dev/StashSpreadsheets/assay...,15,0.1.0,FBL


Upload exiting local pkl files

In [11]:
DSToIngest = [
    {"file": "QCB_DRUG_MEM_feature.pkl",
     "name": "QCB_DRUG_MEM_feature",
     "description": "Cell membrane features extracted from QCB DRUG data",
     "na_allowed": True,
     "meta_id": "cell_id"},
    {"file": "QCB_DRUG_DNA_feature.pkl",
     "name": "QCB_DRUG_DNA_feature",
     "description": "Nucleus features extracted from QCB DRUG data",
     "na_allowed": True,
     "meta_id": "cell_id"}]

Purge datasets first

In [12]:
for data in DSToIngest:
    
    print("Purging: ",data["file"])
    
    try:
        prod._purge_dataset(name=data["name"])
    except: pass

Purging:  QCB_DRUG_MEM_feature.pkl
Purging:  QCB_DRUG_DNA_feature.pkl


In [13]:
for data in DSToIngest:
    
    print("running:",data["file"])
    
    with open(os.path.join("../../data-raw/",data["file"]), "rb") as fp:
        df = pickle.load(fp)
        
    if data["na_allowed"] is False:
        assert df.isnull().values.any() == False

    if data["meta_id"] is not None:
        df = pd.concat([df.reset_index(drop=True), df_meta[data["meta_id"]]], axis=1)
    
    ds = dsdb.Dataset(df, name=data["name"], description=data["description"])
    
    ds.upload_to(prod)

running: QCB_DRUG_MEM_feature.pkl
Storing algorithm parameters.
Input dataset already exists in database. 1
Tearing down object...
running: QCB_DRUG_DNA_feature.pkl
Storing algorithm parameters.
Input dataset already exists in database. 1
Tearing down object...


Checking uploaded datasets

In [14]:
for data in DSToIngest:
    
    try:
        print(prod.preview(name=data["name"]))
    except:
        print(data["name"]+" fail")
        pass

{'info': {'id': 58, 'name': 'QCB_DRUG_MEM_feature', 'description': 'Cell membrane features extracted from QCB DRUG data', 'introspector': 'datasetdatabase.introspect.dataframe.DataFrameIntrospector', 'created': datetime.datetime(2018, 10, 9, 20, 42, 27, 788234)}, 'shape': (1519, 46), 'keys': ['mem_volume', 'mem_surface_area', 'mem_1st_axis_x', 'mem_1st_axis_y', 'mem_1st_axis_z', 'mem_2nd_axis_x', 'mem_2nd_axis_y', 'mem_2nd_axis_z', 'mem_3rd_axis_x', 'mem_3rd_axis_y', 'mem_3rd_axis_z', 'mem_1st_axis_length', 'mem_2nd_axis_length', 'mem_3rd_axis_length', 'mem_1st_eigenvalue', 'mem_2nd_eigenvalue', 'mem_3rd_eigenvalue', 'mem_meridional_eccentricity', 'mem_equator_eccentricity', 'mem_sphericity', 'mem_lowest_z', 'mem_highest_z', 'mem_x_centroid', 'mem_y_centroid', 'mem_z_centroid', 'mem_intensity_mean', 'mem_intensity_median', 'mem_intensity_sum', 'mem_intensity_mode', 'mem_intensity_max', 'mem_intensity_std', 'mem_intensity_entropy', 'mem_haralick_ang2nd_moment', 'mem_haralick_contrast', 

# Structures Datasets

Uploading existing structures pkl files

In [7]:
DSToIngest = [
    {"file": "QCB_LMAP1_feature.pkl",
     "name": "QCB_LAMP1_feature",
     "description": "Features extracted from QCB images of LAMP1-tagged lysosome.",
     "na_allowed": True,
     "meta_id": "cell_id"},
    {"file": "QCB_SEC61B_feature.pkl",
     "name": "QCB_SEC61B_feature",
     "description": "Features extracted from QCB images of SEC61B-tagged ER.",
     "na_allowed": True,
     "meta_id": "cell_id"},
    {"file": "QCB_TUBA1B_feature.pkl",
     "name": "QCB_TUBA1B_feature",
     "description": "Features extracted from QCB images of TUBA1B-tagged microtubules.",
     "na_allowed": True,
     "meta_id": "cell_id"},
    {"file": "QCB_ZO1_feature",
     "name": "QCB_TJP1_feature",
     "description": "Features extracted from QCB images of TJP1-tagged adherens junctions.",
     "na_allowed": True,
     "meta_id": "cell_id"}]

In [8]:
for data in DSToIngest:
    
    print("running:",data["file"])
    
    with open(os.path.join("../../data-raw/",data["file"]), "rb") as fp:
        df = pickle.load(fp)
        
    if data["na_allowed"] is False:
        assert df.isnull().values.any() == False
    
    df = df.reset_index(drop=False)
    df = df.rename(index=str, columns={"index":"cell_id"})
    
    ds = dsdb.Dataset(df, name=data["name"], description=data["description"])
    
    ds.upload_to(prod)

running: QCB_ZO1_feature
Storing algorithm parameters.
Input dataset already exists in database. 1
Tearing down object...


In [7]:
ds.md5

'c062b067037fe289f7b8e4787542c38d'