Code for queries 1 and 2 for project 1, querying mongoDB

In [56]:
from pymongo.mongo_client import MongoClient
from pymongo.server_api import ServerApi

In [57]:
uri = "mongodb+srv://jasoguan10:D6980k4sH1jMjZvB@cluster0.gvyvj.mongodb.net/?retryWrites=true&w=majority&appName=Cluster0"
client = MongoClient(uri, server_api=ServerApi('1'))

In [58]:
try:
    client.admin.command('ping')
    print("Pinged your deployment. You successfully connected to MongoDB!")
except Exception as e:
    print(e)

Pinged your deployment. You successfully connected to MongoDB!


In [59]:
db = client['Project1']
nodes = db['nodes']
edges = db['edges']

In [60]:
def query1(node_id):
    result = nodes.aggregate([{
        "$match": {
            "$or": [

                # Include original node to get name
                {"id": node_id}, 

                # Disease is associated with genes or located in anatomy
                {"id": {"$in": list(edges.distinct("target", {
                    "source": node_id,
                    "metaedge": {"$in": ["DlA", "DaG"]}}))}
                },

                # Compound treats or palliates disease 
                {"id": {"$in": list(edges.distinct("source", {
                    "target": node_id,
                    "metaedge": {"$in": ["CpD", "CtD"]}}))}
                }
            ]
        }
    }])


    return result


In [61]:
results = query1('Disease::DOID:184')

In [62]:
def q1_to_file(file_path, q1):
    genes = []
    diseases = []
    compounds = []
    anatomy = []

    for node in q1:
        kind = node["kind"]
        name = node["name"]

        if kind == "Gene":
            genes.append(name)
        elif kind == "Disease":
            diseases.append(name)
        elif kind == "Compound":
            compounds.append(name)
        elif kind == "Anatomy":
            anatomy.append(name)

    with open(file_path, 'w') as file:
        file.write("Disease Name:\n")
        for item in diseases:
            file.write(str(item) + '\n')
        file.write("Gene Causes :\n")
        for item in genes:
            file.write(str(item) + '\n')
        file.write("Drug Treat/Palliate:\n")
        for item in compounds:
            file.write(str(item) + '\n')
        file.write("Anatomy/Disease Occurs:\n")
        for item in anatomy:
            file.write(str(item) + '\n')

In [63]:
q1_to_file('query1.txt', results)

In [64]:
def query2():
    pipeline = [

        # Get all compounds with metaedge CdG or CuG
        {"$match": {"kind": "Compound"}},
        {"$lookup": {
            "from": "edges",
            "let": {"compound_id": "$id"},
            "pipeline": [
                {"$match": {
                    "$expr": {
                        "$and": [
                            {"$eq": ["$source", "$$compound_id"]},
                            {"$in": ["$metaedge", ["CdG", "CuG"]]}
                        ]}}}],
            "as": "compound_gene_edges"
        }},

        # Remove compounds without gene connections
        {"$match": {"compound_gene_edges": {"$ne": []}}},

        # Check if compounds have disease edges
        {"$lookup": {
            "from": "edges",
            "let": {"compound_id": "$id"},
            "pipeline": [
                {"$match": {
                    "$expr": {
                        "$and": [
                            {"$eq": ["$source", "$$compound_id"]},
                            {"$regexMatch": {
                                "input": "$target",
                                "regex": "^Disease::"
                            }}]}}}],
            "as": "disease_edges"
        }},

        # Remove compounds with disease edges
        {"$match": {"disease_edges": {"$eq": []}}},
        {"$unwind": "$compound_gene_edges"},

        # Look for anatomy gene connections for each gene, stopping at 1
        {"$lookup": {
            "from": "edges",
            "let": {"gene_id": "$compound_gene_edges.target"},
            "pipeline": [
                {"$match": {
                    "$expr": {
                        "$and": [
                            {"$eq": ["$target", "$$gene_id"]},
                            {"$in": ["$metaedge", ["AuG", "AdG"]]}
                        ]}}},
                {"$limit": 1}  
            ],
            "as": "anatomy_gene_edges"
        }},

        # Remove compounds that have anatomy connectionns
        {"$match": {"anatomy_gene_edges": {"$ne": []}}},

        # Remove duplicates
        {"$group": {
            "_id": "$id",
            "name": {"$first": "$name"}
        }},

        {"$project": {
            "_id": 0,
            "name": 1
        }}
    ]
    
    result = list(nodes.aggregate(pipeline))
    return result

In [65]:
results = query2()

In [66]:
def q2_to_file(file_path, q2):
    with open(file_path, 'w') as file:
        for item in q2:
            file.write(str(item) + '\n')

In [67]:
q2_to_file('query2.txt', results)