In [None]:
import os
from pprint import pprint
from importlib import import_module

from sqlalchemy.exc import IntegrityError
from sqlalchemy.orm import joinedload, subqueryload, Load, load_only
from sqlalchemy.orm.exc import NoResultFound
from sqlalchemy.dialects import postgresql

from dataservice.extensions import db
from dataservice.utils import iterate_pairwise
from dataservice import create_app
from dataservice.api.investigator.models import Investigator
from dataservice.api.study.models import Study
from dataservice.api.participant.models import Participant
from dataservice.api.biospecimen.models import Biospecimen
from dataservice.api.family.models import Family
from dataservice.api.family_relationship.models import FamilyRelationship
from dataservice.api.diagnosis.models import Diagnosis
from dataservice.api.outcome.models import Outcome
from dataservice.api.phenotype.models import Phenotype
from dataservice.api.genomic_file.models import GenomicFile
from dataservice.api.sequencing_experiment.models import SequencingExperiment
from dataservice.api.workflow.models import Workflow, WorkflowGenomicFile
from dataservice.api.study_file.models import StudyFile

from dataservice.util.data_import.utils import to_camel_case
from dataservice.util.data_import.etl.defaults import DEFAULT_ENTITY_TYPES

study_id = 'SD_SJZFK2VV'

class BaseLoader(object):

    def __init__(self, config_name=None):
        if not config_name:
            config_name = 'development'
        self.setup(config_name)
        self.entity_id_map = {}

    def setup(self, config_name):
        """
        Creates tables in database
        """
        self.app = create_app(config_name)
        self.app.config['SQLALCHEMY_ECHO'] = True
        self.app_context = self.app.app_context()
        self.app_context.push()
#         db.create_all()

    def teardown(self):
        """
        Clean up
        """
        db.session.close()
        db.drop_all()

    def drop_all(self, study_external_id):
        """
        Delete all data related to a study
        """
        from dataservice.api.study.models import Study
        from dataservice.api.investigator.models import Investigator

        try:
            study = Study.query.filter_by(external_id=study_external_id).one()
        except NoResultFound:
            print("Study {} not found. Aborting drop all for this dataset"
                  .format(study_external_id))
        else:
            # Save investigator id
            investigator_id = study.investigator_id

            # Delete study
            db.session.delete(study)

            # Delete investigator
            if investigator_id:
                investigator = Investigator.query.get(investigator_id)
                db.session.delete(investigator)

            db.session.commit()

In [None]:
loader = BaseLoader()

### Get Entities by Study

In [None]:
# Participants
q = (Participant.query
                .options(joinedload(Participant.diagnoses)
                        .load_only('kf_id'))
                .options(joinedload(Participant.biospecimens)
                        .load_only('kf_id'))
                .options(joinedload(Participant.phenotypes)
                        .load_only('kf_id'))
                .options(joinedload(Participant.outcomes)
                        .load_only('kf_id')))
print(q.statement.compile(dialect=postgresql.dialect()))
q.count()

In [None]:
# Family 
q = (Family.query
     .options(load_only('kf_id'))
     .join(Family.participants)
     .options(Load(Participant).load_only('kf_id', 'study_id'))
     .filter(Participant.study_id==study_id)
     .distinct(Family.kf_id)
     .order_by(Family.kf_id))

In [None]:
print(q.statement.compile(dialect=postgresql.dialect()))
q.count()

In [None]:
# Family relationship
q = (FamilyRelationship.query
     .options(load_only('kf_id'))
     .join(FamilyRelationship.participant)
    .options(Load(Participant).load_only('kf_id', 'study_id'))
    .filter(Participant.study_id==study_id))
print(q.statement.compile(dialect=postgresql.dialect()))
q.count()

In [None]:
# Study File
q = (StudyFile.query
     .options(load_only('kf_id'))
     .filter(StudyFile.study_id == study_id))
print(q.statement.compile(dialect=postgresql.dialect()))
q.count()

In [None]:
# Investigator
q = (Investigator.query
     .options(load_only('kf_id'))
     .join(Investigator.studies)
     .options(Load(Study).load_only('kf_id'))
     .filter(Study.kf_id==study_id))
print(q.statement.compile(dialect=postgresql.dialect()))
q.count()

In [None]:
# Genomic files
q = (GenomicFile.query
     .options(load_only('kf_id'))
     .join(GenomicFile.biospecimen)
     .join(Biospecimen.participant)
     .options(Load(Participant).load_only("kf_id", "study_id"))
     .filter(Participant.study_id==study_id))
print(q.statement.compile(dialect=postgresql.dialect()))
q.count()

In [None]:
# Biospecimen
q = (Biospecimen.query
     .options(load_only('kf_id'))
     .join(Participant.biospecimens)
     .options(Load(Participant).load_only('kf_id', 'study_id'))
     .filter(Participant.study_id == study_id))
print(q.statement.compile(dialect=postgresql.dialect()))
q.count()

In [None]:
# Sequencing experiment
q = (SequencingExperiment.query
     .options(load_only('kf_id'))
     .join(SequencingExperiment.genomic_files)
     .join(GenomicFile.biospecimen)
     .join(Biospecimen.participant)
     .options(Load(Participant).load_only("kf_id", "study_id"))
     .filter(Participant.study_id==study_id))
print(q.statement.compile(dialect=postgresql.dialect()))
q.count()

In [None]:
app = create_app('testing')
app_context = app.app_context()
app_context.push()
db.drop_all()
db.create_all()

In [None]:
# Add a bunch of studies for pagination
s = Study(external_id='Study_1'.format(i))
db.session.add(s)
db.session.commit()

data = {
    'external_id': "test",
    'is_proband': True,
    'consent_type': 'GRU-IRB',
    'race': 'asian',
    'ethnicity': 'not hispanic',
    'gender': 'male'
}

p = Participant(**data, study_id=s.kf_id)
samp = Biospecimen(analyte_type='an analyte')
se_kwargs = {
    'external_id': 'se1',
    'experiment_strategy': 'WGS',
    'center': 'Baylor',
    'is_paired_end': True,
    'platform': 'Illumina'
}
seq_exp = SequencingExperiment(**se_kwargs)
gf = GenomicFile()
gf.biospecimen = samp
gf.sequencing_experiment = seq_exp
p.biospecimens = [samp]
diag = Diagnosis()
p.diagnoses = [diag]
outcome = Outcome()
p.outcomes = [outcome]
phen = Phenotype()
p.phenotypes = [phen]

db.session.add(p)
db.session.commit()