From b5e78a451136765c1b7b3a5f5f7609bf1da2b9a2 Mon Sep 17 00:00:00 2001 From: ParticularlyPythonicBS Date: Tue, 7 Apr 2026 12:45:20 -0400 Subject: [PATCH 1/3] perf: sqlite performance improvements removes VACUUM operations and performance tuning PRAGMAs that are now exposed in the config --- temoa/_internal/temoa_sequencer.py | 4 ++ temoa/core/config.py | 14 ++++++ temoa/extensions/myopic/myopic_sequencer.py | 5 +- temoa/tutorial_assets/config_sample.toml | 24 ++++++++++ temoa/utilities/sqlite_utils.py | 52 +++++++++++++++++++++ 5 files changed, 97 insertions(+), 2 deletions(-) create mode 100644 temoa/utilities/sqlite_utils.py diff --git a/temoa/_internal/temoa_sequencer.py b/temoa/_internal/temoa_sequencer.py index 0faf201b..ea3c6977 100644 --- a/temoa/_internal/temoa_sequencer.py +++ b/temoa/_internal/temoa_sequencer.py @@ -35,6 +35,7 @@ from temoa.extensions.single_vector_mga.sv_mga_sequencer import SvMgaSequencer from temoa.extensions.stochastics.stochastic_sequencer import StochasticSequencer from temoa.model_checking.pricing_check import price_checker +from temoa.utilities.sqlite_utils import tune_sqlite_connection if TYPE_CHECKING: import pyomo.opt @@ -134,6 +135,7 @@ def build_model(self) -> TemoaModel: raise RuntimeError('Database version check failed. See log file for details.') with sqlite3.connect(self.config.input_database) as con: + tune_sqlite_connection(con, self.config) hybrid_loader = HybridLoader(db_connection=con, config=self.config) data_portal = hybrid_loader.load_data_portal(myopic_index=None) instance = build_instance(data_portal, silent=self.config.silent) @@ -203,6 +205,7 @@ def start(self) -> None: def _run_check_mode(self) -> None: """Encapsulated logic for the CHECK mode.""" with sqlite3.connect(self.config.input_database) as con: + tune_sqlite_connection(con, self.config) if not self.config.source_trace: logger.warning('Source trace is automatically enabled for CHECK mode.') self.config.source_trace = True @@ -221,6 +224,7 @@ def _run_check_mode(self) -> None: def _run_perfect_foresight(self) -> None: """Encapsulated logic for the PERFECT_FORESIGHT mode.""" with sqlite3.connect(self.config.input_database) as con: + tune_sqlite_connection(con, self.config) hybrid_loader = HybridLoader(db_connection=con, config=self.config) data_portal = hybrid_loader.load_data_portal(myopic_index=None) instance = build_instance( diff --git a/temoa/core/config.py b/temoa/core/config.py index 3c7858d7..311893ea 100644 --- a/temoa/core/config.py +++ b/temoa/core/config.py @@ -159,6 +159,14 @@ def __init__( self.output_threshold_activity = output_threshold_activity self.output_threshold_emission = output_threshold_emission self.output_threshold_cost = output_threshold_cost + self.sqlite_inputs = sqlite or {} + + # SQLite performance defaults + self.sqlite_journal_mode = str(self.sqlite_inputs.get('journal_mode', 'WAL')) + self.sqlite_synchronous = str(self.sqlite_inputs.get('synchronous', 'NORMAL')) + self.sqlite_temp_store = str(self.sqlite_inputs.get('temp_store', 'MEMORY')) + self.sqlite_mmap_size = int(self.sqlite_inputs.get('mmap_size', 8589934592)) + self.sqlite_cache_size = int(self.sqlite_inputs.get('cache_size', -512000)) # Cycle detection limits if not isinstance(cycle_count_limit, int) or cycle_count_limit < -1: @@ -306,6 +314,12 @@ def __repr__(self) -> str: msg += '{:>{}s}: {}\n'.format('Save duals to output db', width, self.save_duals) msg += '{:>{}s}: {}\n'.format('Save storage to output db', width, self.save_storage_levels) + msg += spacer + msg += '{:>{}s}: {}\n'.format('SQLite journal mode', width, self.sqlite_journal_mode) + msg += '{:>{}s}: {}\n'.format('SQLite synchronous', width, self.sqlite_synchronous) + msg += '{:>{}s}: {}\n'.format('SQLite mmap size (bytes)', width, self.sqlite_mmap_size) + msg += '{:>{}s}: {}\n'.format('SQLite cache size (pages)', width, self.sqlite_cache_size) + msg += spacer msg += '{:>{}s}: {}\n'.format('Time sequencing', width, self.time_sequencing) msg += '{:>{}s}: {}\n'.format('Days per period', width, self.days_per_period) diff --git a/temoa/extensions/myopic/myopic_sequencer.py b/temoa/extensions/myopic/myopic_sequencer.py index d7f9601d..fbee15c4 100644 --- a/temoa/extensions/myopic/myopic_sequencer.py +++ b/temoa/extensions/myopic/myopic_sequencer.py @@ -21,6 +21,7 @@ from temoa.extensions.myopic.myopic_index import MyopicIndex from temoa.extensions.myopic.myopic_progress_mapper import MyopicProgressMapper from temoa.model_checking.pricing_check import price_checker +from temoa.utilities.sqlite_utils import tune_sqlite_connection logger = logging.getLogger(__name__) @@ -164,6 +165,8 @@ def get_connection(self) -> Connection: logger.error('Run aborted. I/O database pointers are different') sys.exit(-1) + tune_sqlite_connection(con, self.config) + return con def start(self) -> None: @@ -303,8 +306,6 @@ def start(self) -> None: ) self.output_con.commit() - # 11. Compact the db... lots of writes/deletes leads to bloat - self.output_con.execute('VACUUM;') # Total system cost is, theoretically, sum of discounted costs from output_cost table total_cost = self.get_current_total_cost(last_base_year if last_base_year is not None else 0) diff --git a/temoa/tutorial_assets/config_sample.toml b/temoa/tutorial_assets/config_sample.toml index e03cdd7d..3943bf18 100644 --- a/temoa/tutorial_assets/config_sample.toml +++ b/temoa/tutorial_assets/config_sample.toml @@ -59,6 +59,30 @@ cycle_count_limit = 100 # Use this to filter out very small cycles if needed cycle_length_limit = 1 +# ------------------------------------ +# SQLITE PERFORMANCE TUNING +# ------------------------------------ + +[sqlite] +# These settings improve database performance, especially for large-scale +# runs and myopic/MGA modes which perform many small writes. + +# journal_mode: WAL (Write-Ahead Logging) provides better concurrency and speed. +# Note: This creates sidecar files (-wal and -shm) during execution. +journal_mode = 'WAL' + +# synchronous: NORMAL reduces disk flushes while remaining safe against +# application-level crashes. +synchronous = 'NORMAL' + +# mmap_size: Memory-map the database file for faster reads (bytes). +# 8589934592 = 8GB +mmap_size = 8589934592 + +# cache_size: SQLite page cache size. Negative values specify size in KiB. +# -512000 = 500MiB +cache_size = -512000 + # ------------------------------------ # SOLVER # Solver Selection diff --git a/temoa/utilities/sqlite_utils.py b/temoa/utilities/sqlite_utils.py new file mode 100644 index 00000000..ec3433f9 --- /dev/null +++ b/temoa/utilities/sqlite_utils.py @@ -0,0 +1,52 @@ +""" +Utilities for SQLite performance tuning in Temoa. +""" + +import logging +import sqlite3 +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from temoa.core.config import TemoaConfig + +logger = logging.getLogger(__name__) + + +def tune_sqlite_connection(con: sqlite3.Connection, config: 'TemoaConfig | None' = None) -> None: + """ + Apply performance-tuning PRAGMAs to a SQLite connection. + + Args: + con: The sqlite3.Connection object to tune. + config: Optional TemoaConfig object to override defaults. + """ + journal_mode = 'WAL' + synchronous = 'NORMAL' + temp_store = 'MEMORY' + mmap_size = 8589934592 # 8GB + cache_size = -512000 # 500MB (negative means KiB) + + if config: + journal_mode = getattr(config, 'sqlite_journal_mode', journal_mode) + synchronous = getattr(config, 'sqlite_synchronous', synchronous) + temp_store = getattr(config, 'sqlite_temp_store', temp_store) + mmap_size = getattr(config, 'sqlite_mmap_size', mmap_size) + cache_size = getattr(config, 'sqlite_cache_size', cache_size) + + try: + con.execute(f'PRAGMA journal_mode = {journal_mode}') + con.execute(f'PRAGMA synchronous = {synchronous}') + con.execute(f'PRAGMA temp_store = {temp_store}') + con.execute(f'PRAGMA mmap_size = {mmap_size}') + con.execute(f'PRAGMA cache_size = {cache_size}') + logger.debug( + 'SQLite tuned: journal_mode=%s, synchronous=%s, temp_store=%s, ' + 'mmap_size=%d, cache_size=%d', + journal_mode, + synchronous, + temp_store, + mmap_size, + cache_size, + ) + except sqlite3.Error as e: + logger.warning('Failed to apply some SQLite performance PRAGMAs: %s', e) From a77d767cc2a43e2b86322fa26f1cc5a1f09ca3bf Mon Sep 17 00:00:00 2001 From: ParticularlyPythonicBS Date: Tue, 7 Apr 2026 12:45:40 -0400 Subject: [PATCH 2/3] docs: adding sqlite config details to docs --- docs/source/database.rst | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/docs/source/database.rst b/docs/source/database.rst index 59dd676f..0e64b632 100644 --- a/docs/source/database.rst +++ b/docs/source/database.rst @@ -213,5 +213,18 @@ Users can configure the cycle detection behavior using the following settings: Note that the myopic mode *requires* the use of Source Tracing to ensure accuracy as some orphans may be produced by endogenous decisions in myopic runs. +SQLite Performance Tuning +------------------------- + +For large-scale models or long-running simulation modes (such as myopic or MGA), database I/O can become a performance bottleneck. Temoa allows you to tune the SQLite connection parameters in your configuration file under the ``[sqlite]`` section. + +The following settings are available: + +* **journal_mode**: Sets the SQLite journaling mode. Default is ``WAL`` (Write-Ahead Logging), which provides better performance and concurrency. Note that this will create temporary ``-wal`` and ``-shm`` files alongside your database during execution. +* **synchronous**: Controls how frequently SQLite flushes data to disk. Default is ``NORMAL``, which provides a good balance between speed and safety. +* **mmap_size**: The maximum number of bytes for memory-mapped I/O. Default is 8GB (``8589934592``). This allows SQLite to access the database file directly from memory, significantly speeding up reads for large databases. +* **cache_size**: The number of pages or the size in KiB for the SQLite page cache. If negative, it specifies size in KiB. Default is 500MiB (``-512000``). + +These settings are especially impactful in **myopic mode**, where Temoa frequently updates and queries the database between period iterations. By default, Temoa also disables the per-period ``VACUUM`` operation in myopic runs to avoid redundant and expensive full-database rewrites. .. _sqlite: https://www.sqlite.org/ From ccfa00455afff0de28d497d661a1070d4694aede Mon Sep 17 00:00:00 2001 From: ParticularlyPythonicBS Date: Tue, 7 Apr 2026 17:44:46 -0400 Subject: [PATCH 3/3] PR feedback fixes --- temoa/core/config.py | 53 ++++++++++++++++++++++++++++----- temoa/utilities/sqlite_utils.py | 31 +++++++++---------- 2 files changed, 60 insertions(+), 24 deletions(-) diff --git a/temoa/core/config.py b/temoa/core/config.py index 311893ea..de3e3430 100644 --- a/temoa/core/config.py +++ b/temoa/core/config.py @@ -161,12 +161,48 @@ def __init__( self.output_threshold_cost = output_threshold_cost self.sqlite_inputs = sqlite or {} - # SQLite performance defaults - self.sqlite_journal_mode = str(self.sqlite_inputs.get('journal_mode', 'WAL')) - self.sqlite_synchronous = str(self.sqlite_inputs.get('synchronous', 'NORMAL')) - self.sqlite_temp_store = str(self.sqlite_inputs.get('temp_store', 'MEMORY')) - self.sqlite_mmap_size = int(self.sqlite_inputs.get('mmap_size', 8589934592)) - self.sqlite_cache_size = int(self.sqlite_inputs.get('cache_size', -512000)) + # SQLite performance settings + # journal_mode: DELETE | TRUNCATE | PERSIST | MEMORY | WAL | OFF + jm_allowed = {'DELETE', 'TRUNCATE', 'PERSIST', 'MEMORY', 'WAL', 'OFF'} + jm = self.sqlite_inputs.get('journal_mode', 'WAL') + if isinstance(jm, str) and jm.upper() in jm_allowed: + self.sqlite_journal_mode: str | int = jm.upper() + elif isinstance(jm, (int, float, str)) and str(jm).isdigit(): + self.sqlite_journal_mode = int(jm) + else: + self.sqlite_journal_mode = 'WAL' + + # synchronous: OFF (0) | NORMAL (1) | FULL (2) | EXTRA (3) + sync_allowed = {'OFF', 'NORMAL', 'FULL', 'EXTRA'} + sync = self.sqlite_inputs.get('synchronous', 'NORMAL') + if isinstance(sync, str) and sync.upper() in sync_allowed: + self.sqlite_synchronous: str | int = sync.upper() + elif isinstance(sync, (int, float, str)) and str(sync).isdigit(): + self.sqlite_synchronous = int(sync) + else: + self.sqlite_synchronous = 'NORMAL' + + # temp_store: DEFAULT (0) | FILE (1) | MEMORY (2) + temp_allowed = {'DEFAULT', 'FILE', 'MEMORY'} + ts = self.sqlite_inputs.get('temp_store', 'MEMORY') + if isinstance(ts, str) and ts.upper() in temp_allowed: + self.sqlite_temp_store: str | int = ts.upper() + elif isinstance(ts, (int, float, str)) and str(ts).isdigit(): + self.sqlite_temp_store = int(ts) + else: + self.sqlite_temp_store = 'MEMORY' + + mmap_size = self.sqlite_inputs.get('mmap_size', 8589934592) + if isinstance(mmap_size, (int, float, str)): + self.sqlite_mmap_size = int(mmap_size) + else: + self.sqlite_mmap_size = 8589934592 + + cache_size = self.sqlite_inputs.get('cache_size', -512000) + if isinstance(cache_size, (int, float, str)): + self.sqlite_cache_size = int(cache_size) + else: + self.sqlite_cache_size = -512000 # Cycle detection limits if not isinstance(cycle_count_limit, int) or cycle_count_limit < -1: @@ -317,8 +353,11 @@ def __repr__(self) -> str: msg += spacer msg += '{:>{}s}: {}\n'.format('SQLite journal mode', width, self.sqlite_journal_mode) msg += '{:>{}s}: {}\n'.format('SQLite synchronous', width, self.sqlite_synchronous) + msg += '{:>{}s}: {}\n'.format('SQLite temp store', width, self.sqlite_temp_store) msg += '{:>{}s}: {}\n'.format('SQLite mmap size (bytes)', width, self.sqlite_mmap_size) - msg += '{:>{}s}: {}\n'.format('SQLite cache size (pages)', width, self.sqlite_cache_size) + msg += '{:>{}s}: {}\n'.format( + 'SQLite cache size (pages or KiB if negative)', width, self.sqlite_cache_size + ) msg += spacer msg += '{:>{}s}: {}\n'.format('Time sequencing', width, self.time_sequencing) diff --git a/temoa/utilities/sqlite_utils.py b/temoa/utilities/sqlite_utils.py index ec3433f9..953127ff 100644 --- a/temoa/utilities/sqlite_utils.py +++ b/temoa/utilities/sqlite_utils.py @@ -33,20 +33,17 @@ def tune_sqlite_connection(con: sqlite3.Connection, config: 'TemoaConfig | None' mmap_size = getattr(config, 'sqlite_mmap_size', mmap_size) cache_size = getattr(config, 'sqlite_cache_size', cache_size) - try: - con.execute(f'PRAGMA journal_mode = {journal_mode}') - con.execute(f'PRAGMA synchronous = {synchronous}') - con.execute(f'PRAGMA temp_store = {temp_store}') - con.execute(f'PRAGMA mmap_size = {mmap_size}') - con.execute(f'PRAGMA cache_size = {cache_size}') - logger.debug( - 'SQLite tuned: journal_mode=%s, synchronous=%s, temp_store=%s, ' - 'mmap_size=%d, cache_size=%d', - journal_mode, - synchronous, - temp_store, - mmap_size, - cache_size, - ) - except sqlite3.Error as e: - logger.warning('Failed to apply some SQLite performance PRAGMAs: %s', e) + pragmas = [ + ('journal_mode', journal_mode), + ('synchronous', synchronous), + ('temp_store', temp_store), + ('mmap_size', mmap_size), + ('cache_size', cache_size), + ] + + for name, value in pragmas: + try: + con.execute(f'PRAGMA {name} = {value}') + logger.debug('Applied SQLite PRAGMA: %s = %s', name, value) + except sqlite3.Error as e: + logger.warning('Failed to apply SQLite PRAGMA %s: %s', name, e)