In [3]:
import streamlit as st
import pandas as pd
import numpy as np
import plotly.express as px


In [None]:
folder ="/exports/igmm/eddie/UK-BioBank-53116/users/kelsey/short_ESR1_run/ESR1_full_negative_controls/weighted_fluctuation"
def bQTLs_data(config_filename="nextflow.config"):
    params = load_pipeline_params(os.path.join(st.session_state.nextflow_rundir, config_filename))

    bqtls_filepath = os.path.join(st.session_state.nextflow_rundir, params["BQTLS"])

    if os.path.exists(bqtls_filepath):
        return pd.read_csv(bqtls_filepath, sep='\s+')
    return None

In [4]:
def result_file(nextflow_rundir):
    return os.path.join(nextflow_rundir, "preliminary_results.csv")

def raw_data_file(nextflow_rundir):
    return os.path.join(nextflow_rundir, "results", "tmle_inputs", "final.data.csv")


In [5]:
def http_variant_info(rsid):
    url = "".join((
        ENSEMBL_URL,
        "/variation/human/",
        rsid,
        "?",
        "phenotypes=1"
    ))
    return requests.get(url, headers={ "Content-Type": "application/json"}).json()


In [6]:
def http_ensemble_annotations(
        chr, start, end, 
        distance=100, 
        features=("gene", "regulatory", "motif")
        ):
    url = "".join((
        ENSEMBL_URL,
        "/overlap/region/human/", 
        chr, 
        ":", 
        str(start - distance), 
        "-", 
        str(end + distance),
        "?", 
        ";".join("".join(("feature=", f)) for f in features)
    ))
    return requests.get(url, headers={"Content-Type": "application/json"}).json()


bqtls_data = bQTLs_data()

data = filter(load_data(bqtls_data, results_file), mt_method, pvalue, "None", "None", "None")


bqtl_str = st.selectbox("Select bQTL (# Trait hits)", bqtls_hit_counts(data))
bqtl = bqtl_str.split(" (")[0]
# Display SNP base info
basesnpinfo = SNPinfo(bqtl, bqtls_data)


In [7]:
def look_up_variant_gtex(gtex, variant_code_b38_ref_alt, selection):
    gtex.columns = pd.MultiIndex.from_frame(
        pd.DataFrame(
            gtex
            .columns
            .str
            .split("_", n=1)
            .to_list()
        )
    )

    # accessing tissue
    tissue =  gtex.columns.get_level_values(1)

    return gtex.loc[:, tissue == selection]

In [8]:
def SNPinfo(rsid, bqtls_data):
    response = http_variant_info(rsid)
    st.write(response)
    mapping_1 = response["mappings"][0]
    variant_row = bqtls_data[bqtls_data.ID == rsid].iloc[0]
    st.write(bqtls_data[bqtls_data.ID == rsid])
    chr, start, end = location_from_str(mapping_1["location"])
    if variant_row["REF.counts"] > variant_row["ALT.counts"]:
        binding_allele = variant_row.REF + " (" + str(variant_row["REF.counts"]) + ")"
        non_binding_allele = variant_row.ALT + " (" + str(variant_row["ALT.counts"]) + ")"
    else:
        non_binding_allele = variant_row.REF + " (" + str(variant_row["REF.counts"]) + ")"
        binding_allele = variant_row.ALT + " (" + str(variant_row["ALT.counts"]) + ")"
    
    basesnpinfo = {
        "Binding Allele (Counts)": binding_allele,
        "Non-Binding Allele (Counts)": non_binding_allele,
        "REF Allele": variant_row.REF,
        "ALT Allele": variant_row.ALT,
        "Chromosome": [chr],
        "Start": [start],
        "End": [end],
        "Strand": [mapping_1["strand"]],
        "Ensembl Alleles": [mapping_1["allele_string"]],
        "Ensembl Minor Allele": [response["minor_allele"]],
        "Ensembl MAF": [response["MAF"]],
        "Ensembl Ancestral Allele": [mapping_1["ancestral_allele"]],
    }

    
    return basesnpinfo