In [1]:
from pyeed import Pyeed
from pyeed.model import GOAnnotation, Protein

# Create Neo4j DB

Create local Neo4j DB without authentication.
Graph data science plugin is not installed.

```bash
docker run -it --name pyeed-neo4j \
  -p 7474:7474 \--user="$(id -u):$(id -g)" \
  -e NEO4J_AUTH=none \
  -p 7687:7687 \
  -v $HOME/Documents/db/data:/data \
  -v $HOME/Documents/db/logs:/logs \
  -v $HOME/Documents/db/import:/var/lib/neo4j/import \
  -v $HOME/Documents/db/plugins:/plugins \
  -e NEO4J_AUTH=neo4j/test \
  -e NEO4JLABS_PLUGINS='["apoc"]' \
  -e NEO4J_dbms_security_procedures_unrestricted="apoc.*" \
  -d neo4j:latest
```

# Connect to DB

Neo4j DB is hosted locally via Docker.
Also possible to use free hosted Neo4j Sandbox (not tested).

In [2]:
uri = "bolt://127.0.0.1:7687"
user = None
password = None

# Create a Pyeed object, automatically connecting to the database
eedb = Pyeed(uri)
eedb.db._wipe_database()

# DB connector is a property of the Pyeed object
print(eedb.db)

# If this is the first time you are running this script, the pyeed graph model needs to be initialized
first_time = False
if first_time:
    eedb.db._initialize_db_constraints(user=user, password=password)

📡 Connected to database.
All data has been wiped from the database.
<pyeed.dbconnect.DatabaseConnector object at 0x12a390610>


In [3]:
ids = [
    "P04182",
    "Q6QDP7",
    "P04182",
    "P29758",
    "A0A851UXD9",
    "A0A8C6HVU6",
    "A0A8C6GQ10",
    "A0A1U7QEB0",
    "A0A6I9L5L6",
    "G3HVE0",
    "A0A8J6G992",
    "A0A8C6W4W5",
    "A0A8B9YUY7",
    "L8I4V3",
    "A0A6P3IYQ1",
    "A0A452EKJ3",
    "A0A6P5B7Q0",
    "F1MYG0",
    "A0A5J5MK22",
    "A0A6J0Y425",
    "Q3ZCF5",
]

# Fetch proteins from primary database
eedb.fetch_from_primary_db(ids)

# number of nodes and edges in db
print(eedb.db.stats())

[32m2024-10-14 00:25:34.005[0m | [34m[1mDEBUG   [0m | [36mpyeed.adapter.primary_db_adapter[0m:[36mmake_request[0m:[36m142[0m - [34m[1mSending 5 requests in batches of 5[0m
[32m2024-10-14 00:25:34.009[0m | [34m[1mDEBUG   [0m | [36mpyeed.adapter.primary_db_adapter[0m:[36msend_request[0m:[36m123[0m - [34m[1mSending request to https://www.ebi.ac.uk/proteins/api/proteins with parameters: {'format': 'json', 'accession': 'P04182,Q6QDP7,P04182,P29758,A0A851UXD9'}[0m
[32m2024-10-14 00:25:34.109[0m | [34m[1mDEBUG   [0m | [36mpyeed.adapter.primary_db_adapter[0m:[36msend_request[0m:[36m123[0m - [34m[1mSending request to https://www.ebi.ac.uk/proteins/api/proteins with parameters: {'format': 'json', 'accession': 'A0A8C6HVU6,A0A8C6GQ10,A0A1U7QEB0,A0A6I9L5L6,G3HVE0'}[0m
[32m2024-10-14 00:25:34.209[0m | [34m[1mDEBUG   [0m | [36mpyeed.adapter.primary_db_adapter[0m:[36msend_request[0m:[36m123[0m - [34m[1mSending request to https://www.ebi.ac.uk/prot

{'nodes': 69, 'relationships': 179}


To use the web interface, open a browser and go to `http://localhost:7474/`.

# Query DB

In [4]:
## Query using pyeed graph objects
# Get all proteins
proteins = Protein.nodes.all()
print("Number of proteins in database: ", len(proteins))

# Get protein with id P04182
protein = Protein.nodes.get(accession_id="P04182")
print(protein)

# Get all protein which are accociated with GO term GO:0005739 (mitochondrion)
go_annotation = GOAnnotation.nodes.get(go_id="GO:0005739")
mito_proteins = protein.go_annotation.all()
print("Number of proteins associated with GO:0005739: ", len(mito_proteins))


## Or execute cypher query
# Get all organisms that have at least two connected proteins
query = """
MATCH (o:Organism)<-[:ORIGINATES_FROM]-(p:Protein)
WITH o, COUNT(p) AS proteinCount
WHERE proteinCount >= 2
RETURN o
"""

organisms = eedb.db.execute_read(query)
print("Number of organisms with at least two proteins: ", len(organisms))

Number of proteins in database:  20
{'accession_id': 'P04182', 'sequence': 'MLSKLASLQTVAALRRGLRTSVASATSVATKKTEQGPPSSEYIFERESKYGAHNYHPLPVALERGKGIYMWDVEGRQYFDFLSAYGAVSQGHCHPKIIEAMKSQVDKLTLTSRAFYNNVLGEYEEYITKLFNYNKVLPMNTGVEAGETACKLARRWGYTVKGIQKYKAKIVFAVGNFWGRTLSAVSSSTDPTSYDGFGPFMPGFETIPYNDLPALERALQDPNVAAFMVEPIQGEAGVIVPDPGYLTGVRELCTRHQVLFIADEIQTGLARTGRWLAVDHENVRPDIVLLGKALSGGLYPVSAVLCDDDIMLTIKPGEHGSTYGGNPLGCRIAIAALEVLEEEHLAENADKMGAILRKELMKLPSDVVTAVRGKGLLNAIVIRETKDCDAWKVCLRLRDNGLLAKPTHGDIIRLAPPLVIKEDEIRESVEIINKTILSF', 'name': 'Ornithine aminotransferase, mitochondrial', 'seq_length': 439, 'mol_weight': 48333.0, 'ec_number': '2.6.1.13', 'nucleotide_id': None, 'locus_tag': None, 'structure_ids': None, 'go_terms': None, 'embedding': None, 'element_id_property': '4:2dbbe7d3-51e1-4903-a514-4dd4aed7696d:203'}
Number of proteins associated with GO:0005739:  11
Number of organisms with at least two proteins:  3


In [5]:
# close connection
eedb.db.close()

🔌 Connection closed.
