In [None]:
import os

# Get the current working directory
current_dir = os.getcwd()

# Add the code directory to sys.path
sys.path.append(os.path.join(os.path.dirname(current_dir), 'code'))

import pandas as pd
import numpy as np
import random
from IPython.display import clear_output
from google.cloud import bigquery # SQL table interface on Arcus
from dxFilterLibraryPreGrading import *
from reportMarkingFunctions import *
from projectTableFunctions import * 
import json
import matplotlib.pyplot as plt

## 22q11DS

### Load CSV with IDs

In [None]:
df_22q = pd.read_csv("~/arcus/shared/cohort_csvs/n363_from_AAron_forkosha_22q.csv")
df_22q.loc[:,"pat_mrn_id"] = [f"{x:08d}" for x in df_22q.loc[:,"CHOP_MRN"]]
df_22q = df_22q.loc[:,["pat_mrn_id","BBLID","VCFSID","trig_id"]]
df_22q.loc[df_22q["trig_id"].isna(),"trig_id"] = ""
df_22q["trig_id"] = df_22q["trig_id"].astype(str)
df_22q["BBLID"] = df_22q["BBLID"].astype(str)
print(df_22q.head())
print(df_22q.shape)

### Upload Tables

In [None]:
list(df_22q)

In [None]:
client = bigquery.Client()

# Upload table
new_table_name = "lab.cohort_id_table_22q11DS"
my_schema = []
for c in list(df_22q):
    print(c)
    my_schema.append(bigquery.SchemaField(c, "STRING"))

# Since string columns use the "object" dtype, pass in a (partial) schema
# to ensure the correct BigQuery data type.
job_config = bigquery.LoadJobConfig(schema=my_schema)
job = client.load_table_from_dataframe(
    df_22q, new_table_name, job_config=job_config
)

# Wait for the load job to complete.
job.result()
print(new_table_name, "created")

### Create Project
Next create a new query in the queries folder to match this project. Then use the cell below to define the new cohort with this query.

In [None]:
add_reports_to_project("22q11DS")

## Epilepsy

### Load CSV with IDs

In [None]:
df_epilepsy = pd.read_csv("~/arcus/shared/cohort_csvs/epilepsy_gene_mri_05162025.csv")
df_epilepsy.loc[:,"pat_mrn_id"] = [f"{x:08d}" for x in df_epilepsy.loc[:,"PAT_MRN_ID"]]
df_epilepsy = df_epilepsy.loc[:,["pat_mrn_id","GENE"]].drop_duplicates()

print(df_epilepsy.head())
print(df_epilepsy.shape)

### Upload Tables

In [None]:
list(df_epilepsy)

In [None]:
client = bigquery.Client()

# Upload table
new_table_name = "lab.cohort_id_table_epilepsy"
my_schema = []
for c in list(df_epilepsy):
    print(c)
    my_schema.append(bigquery.SchemaField(c, "STRING"))

# Since string columns use the "object" dtype, pass in a (partial) schema
# to ensure the correct BigQuery data type.
job_config = bigquery.LoadJobConfig(schema=my_schema)
job = client.load_table_from_dataframe(
    df_epilepsy, new_table_name, job_config=job_config
)

# Wait for the load job to complete.
job.result()
print(new_table_name, "created")

### Create Project
Next create a new query in the queries folder to match this project. Then use the cell below to define the new cohort with this query.

In [None]:
add_reports_to_project("Epilepsy")