In [1]:
try:
    from sdRDM import DataModel
    from sdRDM.database import build_sql_database
except ModuleNotFoundError:
    # Install package if not given
    import subprocess
    import sys
    subprocess.call([sys.executable, "-m", "pip", "install", "git+https://github.com/JR-1991/software-driven-rdm.git"])

### Fetching the data model

The PyEED data model can be inspected [here](https://github.com/PyEED/pyeed-data-model) which incorporates a [markdown file](https://github.com/PyEED/pyeed-data-model/blob/main/specifications/model.md) that defines the model. By using sdRDM's ```DataModel``` class and ```from_git``` method we can generate the corresponding Python code in-memory and use the model right away. See the printed tree to verify that the code is present.

In [2]:
pyeed = DataModel.from_git("https://github.com/PyEED/pyeed-data-model.git")
pyeed.ProteinSequence.visualize_tree()

Klone nach '/var/folders/_c/jfgxmn5j4392zwrvt1vtvlbm0000gn/T/tmpmc4whppx'...


ProteinSequence
├── id
├── name
├── amino_acid_sequence
├── nr_id
├── uniprot_id
├── pdb_id
├── organism
│   └── Organism
│       ├── id
│       └── ncbi_taxonomy_id
├── domain
│   └── Domain
│       ├── id
│       ├── name
│       ├── start_position
│       └── end_position
├── reference_sequence
├── equivalence
│   └── Equivalence
│       ├── id
│       ├── reference_position
│       └── sequence_position
└── annotation
    └── Annotation
        ├── id
        ├── start_position
        ├── function
        └── end_position


### Building the SQL database

Next, we are going to use the ```build_sql_database``` function to set up an SQLite database file. This one will be used later on to populate data from our model. The database will construct a tabe for each object/attribute and thus facilitate an easy transfer from an application to the database.

In [3]:
build_sql_database(pyeed.ProteinSequence, pyeed.DNASequence, loc="./test.db")

### Creating a dataset

In order to demonstrate how to populate the database using our model, we are going to construct a small dataset using the data model we just loaded.

In [4]:
# Initialize a ProteinSequence object and add details
# In this case we are using the PDB structure '3ZDR'
# and add most important annotations

dataset = pyeed.ProteinSequence(
    name="Alcohol dehydrogenase",
    pdb_id=["3ZDR"],
    amino_acid_sequence="MGSSHHHHHHSSGLVPRGSHMMNMQWFKVPPKIYFEKNAVQYLAKMPDISRAFIVTDPGMVKLGYVDKVLYYLRRRPDYVHSEIFSEVEPDPSIETVMKGVDMMRSFEPDVIIALGGGSPMDAAKAMWLFYEHPTADFNALKQKFLDIRKRVYKYPKLGQKAKFVAIPTTSGTGSEVTSFAVITDKKTNIKYPLADYELTPDVAIVDPQFVMTVPKHVTADTGMDVLTHAIEAYVSNMANDYTDGLAMKAIQLVFEYLPRAYQNGADELAREKMHNASTIAGMAFANAFLGINHSLAHKLGAEFHIPHGRANTILMPHVIRYNAAKPKKFTAFPKYEYFKADQRYAEIARMLGLPARTTEEGVESLVQAIIKLAKQLDMPLSIEACGVSKQEFESKVEKLAELAFEDQCTTANPKLPLVSDLVHIYRQAFKGV"
)

In [5]:
# Add ZN-Binding annotation
dataset.add_to_annotation(start_position=225, function="ZN Binding")
dataset.add_to_annotation(start_position=229, function="ZN Binding")
dataset.add_to_annotation(start_position=294, function="ZN Binding")
dataset.add_to_annotation(start_position=308, function="ZN Binding")

In [6]:
# Add GOL-Binding annotation
dataset.add_to_annotation(start_position=225, function="GOL Binding")
dataset.add_to_annotation(start_position=229, function="GOL Binding")
dataset.add_to_annotation(start_position=285, function="GOL Binding")
dataset.add_to_annotation(start_position=294, function="GOL Binding")
dataset.add_to_annotation(start_position=308, function="GOL Binding")

In [7]:
# Add SO4-Binding annotation
dataset.add_to_annotation(start_position=396, function="SO4 Binding")
dataset.add_to_annotation(start_position=399, function="SO4 Binding")

In [8]:
# Add covalent bond annotation
dataset.add_to_annotation(start_position=408, end_position=409, function="Covalent Bond")
dataset.add_to_annotation(start_position=409, end_position=410, function="Covalent Bond")

### Populating the database

Finally, we are going to add the data to our previously created database by using the datasets ```to_sql``` method where we also submit the location of our database file. This will create an Object Relation Model (ORM) that represents the Database structure and map the values present in our dataset to the corresponding tables.

In [9]:
dataset.to_sql(loc="./test.db")