### Read table data

In [1]:
### Install

%pip install sqlalchemy pandas python-dotenv pymysql pyarrow




In [2]:
### Imports

from sqlalchemy import create_engine, text
import pandas as pd
from dotenv import load_dotenv
import os

In [3]:
### Load credentials

# .env file path
env_path = os.path.join("env", "credentials.env")

# Load .env file
load_dotenv(dotenv_path=env_path)

# Call credentials
db_host = os.getenv("DB_HOST")
db_name = os.getenv("DB_NAME")
db_user = os.getenv("DB_USER")
db_password = os.getenv("DB_PASSWORD")

In [4]:
### connect to DB

db_url = f"mysql+pymysql://{db_user}:{db_password}@{db_host}/{db_name}"


engine = create_engine(db_url)

query = text("SELECT * FROM situation.l12_proteinliganddocking")

# Save path for CSV and parquet files
save_path_parquet = "data/proteinliganddocking.parquet"
save_path_csv = "data/proteinliganddocking.csv"


try:
    with engine.connect() as connection:
        
        result = connection.execute(query)

        df = pd.DataFrame(result.fetchall(), columns=result.keys())
        
        df.to_parquet(save_path_parquet)
        print(f"Data (Parquet) successfully saved in '{save_path_parquet}'")
        df.to_csv(save_path_csv, index=False)
        print(f"Data (CSV) successfully saved in '{save_path_csv}'")
        
        connection.close()
except Exception as e:
    print(f"Error: {e}")

Data (Parquet) successfully saved in 'data/proteinliganddocking.parquet'
Data (CSV) successfully saved in 'data/proteinliganddocking.csv'


In [6]:
### Read and output data

data = pd.read_parquet(save_path_parquet, engine="pyarrow")
data.head(15)

Unnamed: 0,id,protein_id,ligand_id,protein_HELM,ligand_HELM,affinity,resolution,pdb_code,classification,source
0,1,P001,L001,PEPTIDE1{A.G}$V2.0,CHEM1{O=C(O)C(O)C(O)C(O)C=O}$V2.0,4.5,2.0,PDB001,Enzyme,Simulated
1,2,P002,L002,PEPTIDE1{K.R}$V2.0,CHEM1{O=C(N)CCC(N)=O}$V2.0,6.3,2.5,PDB002,Transport,Simulated
2,3,P003,L003,PEPTIDE1{L.R.K}$V2.0,CHEM1{O=Cc1ccccc1}$V2.0,5.0,1.8,PDB003,Receptor,Simulated
3,4,P004,L004,PEPTIDE1{E.M.V}$V2.0,CHEM1{N(C)C}$V2.0,7.1,2.1,PDB004,Enzyme,Simulated
4,5,P005,L005,PEPTIDE1{H.G}$V2.0,CHEM1{CCCCCC}$V2.0,3.8,2.0,PDB005,Transport,Simulated
5,6,P006,L006,PEPTIDE1{N.D}$V2.0,CHEM1{O=C(O)c1ccccc1}$V2.0,4.7,1.9,PDB006,Enzyme,Simulated
6,7,P007,L007,PEPTIDE1{F.Q.K}$V2.0,CHEM1{O=C(O)C(F)(F)F}$V2.0,6.8,2.2,PDB007,Transport,Simulated
7,8,P008,L008,PEPTIDE1{S.T.Y}$V2.0,CHEM1{C1CCCCC1}$V2.0,5.6,2.0,PDB008,Receptor,Simulated
8,9,P009,L009,PEPTIDE1{Y.V.M}$V2.0,CHEM1{O=C1CCCN1}$V2.0,6.1,2.1,PDB009,Enzyme,Simulated
9,10,P010,L010,PEPTIDE1{L.H.K}$V2.0,CHEM1{c1nnnn1C}$V2.0,4.3,2.0,PDB010,Transport,Simulated
