Code for queries 1 and 2 for project 1, querying mongoDB

In [1]:
from pymongo.mongo_client import MongoClient
from pymongo.server_api import ServerApi

In [2]:
uri = "mongodb://localhost:27017"
client = MongoClient(uri, server_api=ServerApi('1'))

In [3]:
try:
    client.admin.command('ping')
    print("Pinged your deployment. You successfully connected to MongoDB!")
except Exception as e:
    print(e)

Pinged your deployment. You successfully connected to MongoDB!


In [4]:
db = client['Project1']
nodes = db['nodes']
edges = db['edges']

In [7]:
def query1(node_id):
    result = nodes.aggregate([{
        "$match": {
            "$or": [

                # Include original node to get name
                {"id": node_id}, 

                # Disease is associated with genes or located in anatomy
                {"id": {"$in": list(edges.distinct("target", {
                    "source": node_id,
                    "metaedge": {"$in": ["DlA", "DaG"]}}))}
                },

                # Compound treats or palliates disease 
                {"id": {"$in": list(edges.distinct("source", {
                    "target": node_id,
                    "metaedge": {"$in": ["CpD", "CtD"]}}))}
                }
            ]
        }
    }])


    return result


In [16]:
results = query1('Disease::DOID:184')

In [13]:
def q1_to_file(file_path, q1):
    genes = []
    diseases = []
    compounds = []
    anatomy = []

    for node in q1:
        kind = node["kind"]
        name = node["name"]

        if kind == "Gene":
            genes.append(name)
        elif kind == "Disease":
            diseases.append(name)
        elif kind == "Compound":
            compounds.append(name)
        elif kind == "Anatomy":
            anatomy.append(name)

    with open(file_path, 'w') as file:
        file.write("Disease Name:\n")
        for item in diseases:
            file.write(str(item) + '\n')
        file.write("Gene Causes :\n")
        for item in genes:
            file.write(str(item) + '\n')
        file.write("Drug Treat/Palliate:\n")
        for item in compounds:
            file.write(str(item) + '\n')
        file.write("Anatomy/Disease Occurs:\n")
        for item in anatomy:
            file.write(str(item) + '\n')

In [17]:
q1_to_file('query1.txt', results)

In [5]:
def query2():
    pipeline = [
        # Start with compounds only
        {"$match": {"kind": "Compound"}},
        
        # Lookup compound->gene edges with projection to reduce data transfer
        {"$lookup": {
            "from": "edges",
            "let": {"compound_id": "$id"},
            "pipeline": [
                {"$match": {
                    "$expr": {
                        "$and": [
                            {"$eq": ["$source", "$$compound_id"]},
                            {"$in": ["$metaedge", ["CdG", "CuG"]]}
                        ]
                    }
                }},
                # Only keep the target (gene_id) and metaedge fields
                {"$project": {
                    "target": 1,
                    "metaedge": 1,
                    "_id": 0
                }}
            ],
            "as": "compound_gene_edges"
        }},
        
        # Filter out compounds without gene connections
        {"$match": {"compound_gene_edges": {"$ne": []}}},
        
        # Unwind to process each gene connection individually
        {"$unwind": "$compound_gene_edges"},
        
        # Lookup anatomy->gene connections for the specific gene
        {"$lookup": {
            "from": "edges",
            "let": {"gene_id": "$compound_gene_edges.target", 
                   "compound_metaedge": "$compound_gene_edges.metaedge"},
            "pipeline": [
                {"$match": {
                    "$expr": {
                        "$and": [
                            {"$eq": ["$target", "$$gene_id"]},
                            {"$in": ["$metaedge", ["AuG", "AdG"]]},
                            # Match relationship patterns directly
                            {"$or": [
                                {"$and": [
                                    {"$eq": ["$$compound_metaedge", "CuG"]},
                                    {"$eq": ["$metaedge", "AdG"]}
                                ]},
                                {"$and": [
                                    {"$eq": ["$$compound_metaedge", "CdG"]},
                                    {"$eq": ["$metaedge", "AuG"]}
                                ]}
                            ]}
                        ]
                    }
                }},
                {"$limit": 1} # Still keep the limit 1 for efficiency
            ],
            "as": "anatomy_gene_edges"
        }},
        
        # Keep only compounds that have matching anatomy-gene connections
        {"$match": {"anatomy_gene_edges": {"$ne": []}}},
        
        # Group by compound ID to remove duplicates, keep just the name
        {"$group": {
            "_id": "$id",
            "name": {"$first": "$name"}
        }},
        
        # Final projection
        {"$project": {
            "_id": 0,
            "name": 1
        }}
    ]
    
    result = list(nodes.aggregate(pipeline))
    return result

In [11]:
results = query2()

In [12]:
names = [entry['name'] for entry in results]
names

['Haloperidol',
 'Trimethadione',
 'Econazole',
 'Quinethazone',
 'Citalopram',
 'Felbamate',
 'Oxprenolol',
 'Iloperidone',
 'Pentoxifylline',
 'Phentermine',
 'Carbidopa',
 'Nicotine',
 'Acarbose',
 'Ribavirin',
 'Brinzolamide',
 'Aminolevulinic acid',
 'Vemurafenib',
 'Metyrapone',
 'Pancuronium',
 'Methylprednisolone',
 'Cefdinir',
 'Glycopyrrolate',
 'Aminoglutethimide',
 'Neostigmine',
 'Benzocaine',
 'Famciclovir',
 'Piperacillin',
 'Nizatidine',
 'Paroxetine',
 'Albendazole',
 'Nisoldipine',
 'Midodrine',
 'Floxuridine',
 'Repaglinide',
 'Chlorthalidone',
 'Riluzole',
 'Anagrelide',
 'Eplerenone',
 'Pimozide',
 'Vitamin E',
 'Pyridostigmine',
 'Olanzapine',
 'Kanamycin',
 'Iodixanol',
 'Dicloxacillin',
 'Dasatinib',
 'Estrone',
 'Betazole',
 'Terazosin',
 'Gemfibrozil',
 'Paromomycin',
 'Metrizamide',
 'Estriol',
 'Iopamidol',
 'Entacapone',
 'Atropine',
 'Oxybenzone',
 'Adenosine',
 'Terconazole',
 'Amitriptyline',
 'Cetirizine',
 'Prazosin',
 'Nortriptyline',
 'Zidovudine',
 

In [8]:
def q2_to_file(file_path, q2):
    with open(file_path, 'w') as file:
        for item in q2:
            file.write(str(item) + '\n')

In [59]:
q2_to_file('query2.txt', results)