In [1]:
import pandas as pd
from google.cloud import bigquery # SQL table interface on Arcus

# Initialize the client service
client = bigquery.Client()

# Set the table names
lookup_table = "lab.proc_ord_projects"
project_table = "lab.nf1_scit_walkthrough"

In [None]:
# Get the nf1 table from Ayan's query
q_fn = "../queries/nf1.txt"

with open(q_fn, 'r') as f:
    q = f.read()

df_nf1 = client.query(q).to_dataframe()

print(df_nf1.shape)

In [None]:
df_nf1.head(10)

In [None]:
insert_q = "INSERT INTO "+lookup_table+" (proc_ord_id, pat_id, project) VALUES "
count = 1
q = insert_q
for idx, row in df_nf1.iterrows(): 
    proc_ord_id = row['proc_ord_id']
    pat_id = row['pat_id']
    alt_pat_id = row['nf1_subject_id']
    project = 'NF1'
    q += '("'+proc_ord_id+'", "'+pat_id+'", "'+project+'"), '
    if count % 1000 == 0:
        q = q[:-2]+";"

        # Add the rows to the table structure
        print(q)
        job = client.query(q)
        job.result()
        q = insert_q
        
    count += 1
    
q = q[:-2]+";"

# Add the rows to the table structure
job = client.query(q)
job.result()

In [None]:
# Time to check
q = 'select count(distinct proc_ord_id) from '+lookup_table+' where project = "NF1"'
df = client.query(q).to_dataframe()
print(df)

## Include SLIP 2's in Non-SLIP Cohorts (WIP)
For most cohorts, a 2 in SLIP will also serve as a 2 in other cohorts. Therefore, we should periodically update our non-SLIP cohorts to include SLIPs 2s

In [3]:
# First, get the proc_ord_ids associated with a specific project
# Second get all grades for reports in that project where the grade criteria was SLIP and the grade was 2
# Third, exclude duplicate entries
# Fourth, change the grading criteria and insert into specified project
def add_reports_for_nonslip(project_id="NF1", dry_run = False):
    client = bigquery.Client()
    grader_table_name = "lab.grader_table_with_metadata_project_independent"

    # Get column names
    q_get_cols = "select * from "+grader_table_name+" limit 1;"
    df_get_cols = client.query(q_get_cols).to_dataframe()
    cols_str = " ("+", ".join(list(df_get_cols))+") "
    
    q_insert = '''insert into '''+grader_table_name+cols_str+'''
    with CTE as (
      select
        proc_ord_id
      from
        lab.proc_ord_projects
      where
        project like "''' + project_id + '''"
    )
    select
      grades.proc_ord_id,
      grader_name,
      grade,
      grade_category,
      pat_id,
      age_in_days,
      proc_ord_year,
      proc_name,
      report_origin_table,
      grade_date,
      "nonSLIP: ''' + project_id + '''" as grade_criteria
    from
      ''' + grader_table_name + ''' grades
      join CTE on grades.proc_ord_id = CTE.proc_ord_id
    where
      grade_criteria = "SLIP"
      and grade = 2
    except distinct
        select * FROM ''' + grader_table_name + '''
        where grade_criteria = "nonSLIP: ''' + project_id + '''";'''
    print(q_insert)
    if not dry_run:
        print("Inserting SLIP 2 grades into non-SLIP project")
        j_insert = client.query(q_insert)
        j_insert.result()

In [5]:
add_reports_for_nonslip("NF1", dry_run = False)

insert into lab.grader_table_with_metadata_project_independent (proc_ord_id, grader_name, grade, grade_category, pat_id, age_in_days, proc_ord_year, proc_name, report_origin_table, grade_date, grade_criteria) 
    with CTE as (
      select
        proc_ord_id
      from
        lab.proc_ord_projects
      where
        project like "NF1"
    )
    select
      grades.proc_ord_id,
      grader_name,
      grade,
      grade_category,
      pat_id,
      age_in_days,
      proc_ord_year,
      proc_name,
      report_origin_table,
      grade_date,
      "nonSLIP: NF1" as grade_criteria
    from
      lab.grader_table_with_metadata_project_independent grades
      join CTE on grades.proc_ord_id = CTE.proc_ord_id
    where
      grade_criteria = "SLIP"
      and grade = 2
    except distinct
        select * FROM lab.grader_table_with_metadata_project_independent
        where grade_criteria = "nonSLIP: NF1";
Inserting SLIP 2 grades into non-SLIP project
