<a href="https://colab.research.google.com/github/run-llama/llama_index/blob/main/docs/docs/examples/data_connectors/DatabaseReaderDemo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Database Reader

If you're opening this Notebook on colab, you will probably need to install LlamaIndex 🦙.

In [1]:
%pip install llama-index-readers-database

Note: you may need to restart the kernel to use updated packages.


In [2]:
!pip install llama-index

Collecting llama-index-core<0.12.0,>=0.11.23 (from llama-index)
  Using cached llama_index_core-0.11.23-py3-none-any.whl.metadata (2.5 kB)
Using cached llama_index_core-0.11.23-py3-none-any.whl (1.6 MB)
Installing collected packages: llama-index-core
  Attempting uninstall: llama-index-core
    Found existing installation: llama-index-core 0.12.1
    Uninstalling llama-index-core-0.12.1:
      Successfully uninstalled llama-index-core-0.12.1
Successfully installed llama-index-core-0.11.23


ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
llama-index-embeddings-huggingface 0.4.0 requires llama-index-core<0.13.0,>=0.12.0, but you have llama-index-core 0.11.23 which is incompatible.
llama-index-readers-database 0.3.0 requires llama-index-core<0.13.0,>=0.12.0, but you have llama-index-core 0.11.23 which is incompatible.
llama-index-readers-deeplake 0.3.0 requires llama-index-core<0.13.0,>=0.12.0, but you have llama-index-core 0.11.23 which is incompatible.


In [3]:
import logging
import sys

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

In [4]:
from __future__ import absolute_import

# My OpenAI Key
import getpass
import os

os.environ["OPENAI_API_KEY"] = getpass.getpass("open ai api key: ")

from llama_index.readers.database import DatabaseReader
from llama_index.core import VectorStoreIndex

In [5]:
!docker run --name my_postgres -e POSTGRES_USER=postgres -e POSTGRES_PASSWORD=FakeExamplePassword -e POSTGRES_DB=postgres -p 5432:5432 -d postgres


9cae2e8ab89f268f3199681431ebb829a7401f6e9c92822da0963447eb21e8e1


In [6]:
# Initialize DatabaseReader object with the following parameters:

db = DatabaseReader(
    scheme="postgresql",  # Database Scheme
    host="localhost",  # Database Host
    port="5432",  # Database Port
    user="postgres",  # Database User
    password="FakeExamplePassword",  # Database Password
    dbname="postgres",  # Database Name
)

In [7]:
### DatabaseReader class ###
# db is an instance of DatabaseReader:
print(type(db))
# DatabaseReader available method:
print(type(db.load_data))

### SQLDatabase class ###
# db.sql is an instance of SQLDatabase:
print(type(db.sql_database))
# SQLDatabase available methods:
print(type(db.sql_database.from_uri))
print(type(db.sql_database.get_single_table_info))
print(type(db.sql_database.get_table_columns))
print(type(db.sql_database.get_usable_table_names))
print(type(db.sql_database.insert_into_table))
print(type(db.sql_database.run_sql))
# SQLDatabase available properties:
print(type(db.sql_database.dialect))
print(type(db.sql_database.engine))

<class 'llama_index.readers.database.base.DatabaseReader'>
<class 'method'>
<class 'llama_index.core.utilities.sql_wrapper.SQLDatabase'>
<class 'method'>
<class 'method'>
<class 'method'>
<class 'method'>
<class 'method'>
<class 'method'>
<class 'str'>
<class 'sqlalchemy.engine.base.Engine'>


In [8]:
### Testing DatabaseReader
### from SQLDatabase, SQLAlchemy engine and Database URI:

# From SQLDatabase instance:
print(type(db.sql_database))
db_from_sql_database = DatabaseReader(sql_database=db.sql_database)
print(type(db_from_sql_database))

# From SQLAlchemy engine:
print(type(db.sql_database.engine))
db_from_engine = DatabaseReader(engine=db.sql_database.engine)
print(type(db_from_engine))

# From Database URI:
print(type(db.uri))
db_from_uri = DatabaseReader(uri=db.uri)
print(type(db_from_uri))

<class 'llama_index.core.utilities.sql_wrapper.SQLDatabase'>
<class 'llama_index.readers.database.base.DatabaseReader'>
<class 'sqlalchemy.engine.base.Engine'>
<class 'llama_index.readers.database.base.DatabaseReader'>
<class 'str'>
<class 'llama_index.readers.database.base.DatabaseReader'>


In [None]:
from sqlalchemy import create_engine, Table, Column, Integer, String, MetaData, text

from sqlalchemy import create_engine

# Define the engine with the database connection parameters
engine = create_engine(
    "postgresql://postgres:FakeExamplePassword@localhost:5432/postgres"
)

metadata = MetaData()
users_table = Table(
    "users", metadata,
    Column("id", Integer, primary_key=True),
    Column("name", String, nullable=False),
    Column("age", Integer, nullable=False)
)

# Create the table
metadata.create_all(engine)
print("Users table created.")


Users table created.


In [15]:
from sqlalchemy import create_engine, Table, Column, Integer, String, MetaData, text

with engine.connect() as conn:
    result = conn.execute(text(
        "SELECT * FROM information_schema.tables WHERE table_schema = 'public';"
    ))
    print("All tables in the public schema:")
    for row in result:
        print(row)


All tables in the public schema:
('postgres', 'public', 'users', 'BASE TABLE', None, None, None, None, None, 'YES', 'NO', None)


In [16]:
with engine.connect() as conn:
    # Start a transaction
    trans = conn.begin()
    try:
        conn.execute(users_table.insert(), [
            {"name": "Alice", "age": 25},
            {"name": "Bob", "age": 30},
            {"name": "Charlie", "age": 17},
            {"name": "Diana", "age": 22}
        ])
        # Commit the transaction
        trans.commit()
        print("Sample data inserted into the users table.")
    except Exception as e:
        trans.rollback()  # Rollback in case of an error
        print(f"Error: {e}")


Sample data inserted into the users table.


In [17]:
with engine.connect() as conn:
    result = conn.execute(text("SELECT COUNT(*) FROM users;"))
    print("Number of rows in users table:", result.scalar())


Number of rows in users table: 4


In [18]:
from sqlalchemy import text

# Execute the query using sqlalchemy.text
with engine.connect() as conn:
    result = conn.execute(text("SELECT * FROM users;"))
    for row in result:
        print(row)


(1, 'Alice', 25)
(2, 'Bob', 30)
(3, 'Charlie', 17)
(4, 'Diana', 22)


In [19]:
# The below SQL Query example returns a list values of each row
# with concatenated text from the name and age columns
# from the users table where the age is greater than or equal to 18

query = f"""
    SELECT
        CONCAT(name, ' is ', age, ' years old.') AS text
    FROM public.users
    WHERE age >= 18
    """

In [20]:
# Please refer to llama_index.utilities.sql_wrapper
# SQLDatabase.run_sql method
texts = db.sql_database.run_sql(command=query)

# Display type(texts) and texts
# type(texts) must return <class 'list'>
print(type(texts))

# Documents must return a list of Tuple objects
print(texts)

<class 'tuple'>
("[('Alice is 25 years old.',), ('Bob is 30 years old.',), ('Diana is 22 years old.',)]", {'result': [('Alice is 25 years old.',), ('Bob is 30 years old.',), ('Diana is 22 years old.',)], 'col_keys': ['text']})


In [21]:
texts

("[('Alice is 25 years old.',), ('Bob is 30 years old.',), ('Diana is 22 years old.',)]",
 {'result': [('Alice is 25 years old.',),
   ('Bob is 30 years old.',),
   ('Diana is 22 years old.',)],
  'col_keys': ['text']})

In [22]:
# Please refer to llama_index.readers.database.DatabaseReader.load_data
# DatabaseReader.load_data method
documents = db.load_data(query=query)

# Display type(documents) and documents
# type(documents) must return <class 'list'>
print(type(documents))

# Documents must return a list of Document objects
print(documents)

<class 'list'>
[Document(id_='ad3814ae-ae87-429c-85fe-c47e03997855', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, text='text: Alice is 25 years old.', mimetype='text/plain', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\n\n{content}', metadata_template='{key}: {value}', metadata_seperator='\n'), Document(id_='38bd1da4-deca-44e9-94cf-3bae79677d46', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, text='text: Bob is 30 years old.', mimetype='text/plain', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\n\n{content}', metadata_template='{key}: {value}', metadata_seperator='\n'), Document(id_='61c68ef7-a1b2-4580-83f2-cbe432bbd367', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, text='text: Diana is 22 years old.', mimetype='text/plain', start_char_idx=None,

In [23]:
index = VectorStoreIndex.from_documents(documents)

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


In [24]:
index

<llama_index.core.indices.vector_store.base.VectorStoreIndex at 0x10b0cc4dfd0>