# SQLite, Pandas, and Connector-X

Seeking to improve existing read time and memory consumption with SQLite and Pandas via [Connector-x](https://github.com/sfu-db/connector-x).

In [None]:
import resource
import sqlite3
import timeit
import tracemalloc
from typing import Callable

import connectorx as cx
import pandas as pd
from sqlalchemy import create_engine

In [None]:
# original reference: https://nih.figshare.com/articles/dataset/Cell_Health_-_Cell_Painting_Single_Cell_Profiles/9995672
# testing file generated from shrink-demo-file.ipynb
db_filename = "testing_SQ00014613.sqlite"
query = "select * from Cells"

In [None]:
# create a timeit function which we'll keep standard throughout benchmarking
def test_timeit(func: Callable):
    func_name = func.__name__
    print("Timing:", func.__name__)
    print(
        "Took:", timeit.timeit(f"{func_name}()", globals=globals(), number=5), "seconds"
    )

In [None]:
# create a timeit function which we'll keep standard throughout benchmarking
def test_memory(func: Callable):
    tracemalloc.reset_peak()
    tracemalloc.start()
    print("Tracing:", func.__name__)
    func()
    cur_mem, peak_mem = tracemalloc.get_traced_memory()
    peak_mem_mb = peak_mem / 1024 / 1024
    print("Used:", peak_mem_mb, "MB", "peak memory")

In [None]:
# sqlite test
def sqlite3_test():
    sqlite_conn = sqlite3.connect(database=db_filename)
    pd.read_sql(sql=query, con=sqlite_conn)

In [None]:
# sqlalchemy test
def sqlalchemy_test():
    sqlalchemy_engine = create_engine(url=f"sqlite:///{db_filename}")
    sqlalchemy_conn = sqlalchemy_engine.connect()
    pd.read_sql(sql=query, con=sqlalchemy_conn)

In [None]:
# connectorx test
def connectorx_test():
    cx.read_sql(conn=f"sqlite://{db_filename}", query=query, return_type="pandas")

In [None]:
test_timeit(sqlite3_test)
test_memory(sqlite3_test)

In [None]:
test_timeit(sqlalchemy_test)
test_memory(sqlalchemy_test)

In [None]:
test_timeit(connectorx_test)
test_memory(connectorx_test)