In [16]:
from google.cloud import bigquery
import pandas as pd
import numpy as np

def run_opentargets_query(credentials_path, output_file, min_assoc_loci=10, min_n_cases=3000, min_l2g_score=0.2, study_ids_to_keep=None):
    try:
        # Authenticate with BigQuery
        client = bigquery.Client.from_service_account_json(credentials_path)

        # Construct parameterized query
        query = '''
        WITH ranked_genes AS (
            SELECT locus2gene.study_id, 
                locus2gene.chrom, locus2gene.pos, locus2gene.ref, locus2gene.alt, 
                study_metadata.*, 
                genes.gene_name, locus2gene.y_proba_full_model,
                lead_variants.pval,
                ROW_NUMBER() OVER(PARTITION BY locus2gene.study_id, genes.gene_name ORDER BY lead_variants.pval) AS rn
            
            FROM `bigquery-public-data.open_targets_genetics.locus2gene` AS locus2gene
            
            -- Get GWAS metadata
            INNER JOIN `bigquery-public-data.open_targets_genetics.studies` AS study_metadata
            ON locus2gene.study_id = study_metadata.study_id
            
            -- Get HGNC IDs
            INNER JOIN `bigquery-public-data.open_targets_genetics.genes` AS genes
            ON locus2gene.gene_id = genes.gene_id
            
            -- Get lead variant P-values
            INNER JOIN `bigquery-public-data.open_targets_genetics.variant_disease` AS lead_variants
            ON locus2gene.pos = lead_variants.lead_pos
                AND locus2gene.chrom = lead_variants.lead_chrom
                AND locus2gene.study_id = lead_variants.study_id
            '''

        # Add filter conditions
        query += f'''
            WHERE
                -- Remove the "raw" Neale lab results -- I'm not sure what this is
                locus2gene.study_id NOT LIKE '%raw%'
                
                -- Filter to a l2g score threshold
                AND locus2gene.y_proba_full_model > {min_l2g_score}
                
                -- Filter to number of associated loci of at least {min_assoc_loci}
                AND study_metadata.num_assoc_loci >= {min_assoc_loci}
                
                -- Filter to n_cases of at least {min_n_cases}
            '''

        # Optionally filter by study_ids_to_keep if provided
        if study_ids_to_keep and isinstance(study_ids_to_keep, list) and any(isinstance(x, str) for x in study_ids_to_keep):
            query += f'''
                AND locus2gene.study_id IN UNNEST(@study_ids_to_keep)
                '''

        query += '''
        )
        SELECT * FROM ranked_genes WHERE rn = 1;
        '''

        # Set query parameters
        job_config = bigquery.QueryJobConfig()

        if study_ids_to_keep:
            job_config.query_parameters = [bigquery.ArrayQueryParameter("study_ids_to_keep", "STRING", study_ids_to_keep)]

        # Run the query
        query_job = client.query(query, job_config=job_config)

        # Convert the query results to a Pandas dataframe
        l2g = query_job.to_dataframe()
        l2g.sort_values(by=['study_id', 'pval'])

        # Save dataframe to output file
        l2g.to_csv(output_file, compression="gzip", index=False)

    except Exception as e:
        print(f"An error occurred: {str(e)}")
        return None

In [18]:
run_opentargets_query(credentials_path="/home/robertg1/.ssh/test-bigquery-ot-956f8a01208f.json",
                            output_file = "/home/robertg1/gene_program_evaluation/smk/resources/OpenTargets_L2G_noQC.csv.gz",
                            study_ids_to_keep=["FINNGEN_R6_ABDOM_HERNIA"])

In [15]:
test

Unnamed: 0,study_id,chrom,pos,ref,alt,study_id_1,ancestry_initial,ancestry_replication,n_cases,n_initial,...,has_sumstats,num_assoc_loci,source,trait_reported,trait_efos,trait_category,gene_name,y_proba_full_model,pval,rn
0,FINNGEN_R6_ABDOM_HERNIA,18,63246460,G,A,FINNGEN_R6_ABDOM_HERNIA,{'list': [{'element': 'European=260405'}]},{'list': []},9245,260405,...,True,11,FINNGEN,Hernia of abodminal wall,{'list': [{'element': 'EFO_1001866'}]},gastrointestinal disease,BCL2,0.888353,4.37e-09,1
1,FINNGEN_R6_ABDOM_HERNIA,7,134907837,G,A,FINNGEN_R6_ABDOM_HERNIA,{'list': [{'element': 'European=260405'}]},{'list': []},9245,260405,...,True,11,FINNGEN,Hernia of abodminal wall,{'list': [{'element': 'EFO_1001866'}]},gastrointestinal disease,CALD1,0.59548,1e-15,1
2,FINNGEN_R6_ABDOM_HERNIA,12,77829818,G,A,FINNGEN_R6_ABDOM_HERNIA,{'list': [{'element': 'European=260405'}]},{'list': []},9245,260405,...,True,11,FINNGEN,Hernia of abodminal wall,{'list': [{'element': 'EFO_1001866'}]},gastrointestinal disease,NAV3,0.732569,7.73e-11,1
3,FINNGEN_R6_ABDOM_HERNIA,2,19576027,C,A,FINNGEN_R6_ABDOM_HERNIA,{'list': [{'element': 'European=260405'}]},{'list': []},9245,260405,...,True,11,FINNGEN,Hernia of abodminal wall,{'list': [{'element': 'EFO_1001866'}]},gastrointestinal disease,OSR1,0.55134,1.32e-13,1
4,FINNGEN_R6_ABDOM_HERNIA,2,111155479,T,C,FINNGEN_R6_ABDOM_HERNIA,{'list': [{'element': 'European=260405'}]},{'list': []},9245,260405,...,True,11,FINNGEN,Hernia of abodminal wall,{'list': [{'element': 'EFO_1001866'}]},gastrointestinal disease,BCL2L11,0.750643,1.15e-08,1
5,FINNGEN_R6_ABDOM_HERNIA,12,20426458,G,C,FINNGEN_R6_ABDOM_HERNIA,{'list': [{'element': 'European=260405'}]},{'list': []},9245,260405,...,True,11,FINNGEN,Hernia of abodminal wall,{'list': [{'element': 'EFO_1001866'}]},gastrointestinal disease,PDE3A,0.868831,5.17e-10,1
6,FINNGEN_R6_ABDOM_HERNIA,11,33519318,A,G,FINNGEN_R6_ABDOM_HERNIA,{'list': [{'element': 'European=260405'}]},{'list': []},9245,260405,...,True,11,FINNGEN,Hernia of abodminal wall,{'list': [{'element': 'EFO_1001866'}]},gastrointestinal disease,KIAA1549L,0.364931,7.9e-09,1
7,FINNGEN_R6_ABDOM_HERNIA,7,134907837,G,A,FINNGEN_R6_ABDOM_HERNIA,{'list': [{'element': 'European=260405'}]},{'list': []},9245,260405,...,True,11,FINNGEN,Hernia of abodminal wall,{'list': [{'element': 'EFO_1001866'}]},gastrointestinal disease,AGBL3,0.217192,1e-15,1
8,FINNGEN_R6_ABDOM_HERNIA,1,219570796,C,A,FINNGEN_R6_ABDOM_HERNIA,{'list': [{'element': 'European=260405'}]},{'list': []},9245,260405,...,True,11,FINNGEN,Hernia of abodminal wall,{'list': [{'element': 'EFO_1001866'}]},gastrointestinal disease,ZC3H11B,0.685505,5.52e-43,1
9,FINNGEN_R6_ABDOM_HERNIA,6,80510867,C,A,FINNGEN_R6_ABDOM_HERNIA,{'list': [{'element': 'European=260405'}]},{'list': []},9245,260405,...,True,11,FINNGEN,Hernia of abodminal wall,{'list': [{'element': 'EFO_1001866'}]},gastrointestinal disease,BCKDHB,0.244579,1.39e-08,1
