# datastore

> Datastore for obsidian notes

In [None]:
#| default_exp datastore

In [None]:
#| hide
from nbdev.showdoc import *

In [None]:

#| export
from dataclasses import dataclass
from typing import List, Dict, Optional
from fastlite import Database, diagram
from memexplatform_obsidian.commons import config
import pathlib
from pathlib import Path
from datetime import datetime, timezone
from memexplatform_obsidian.mdmanager import ObsidianPage, resolve_note_path, Link, RawText, AnyLink, TagLink, get_subdirs
import uuid
from memexplatform_obsidian.commons import MountPaths

In [None]:
#| export
@dataclass
class Node:
    lockey: str
    created_time: float
    modified_time: float
    file_size: int
    fname: str
    text: str
    blob: bytes
    ext: str
    is_folder: bool
    url: str
    obsidian_url: Optional[str]
    checksum: bytes

@dataclass
class Links:
    id: str
    lockey: str
    link: str
    linked_lockey: str
    link_type: str
    title: str
    is_internal: bool

@dataclass
class Properties:
    id: str
    lockey: str
    name: str
    type: str
    value: str


In [None]:
#| export
def update_node(page, file_path):
    node_text_content = ""
    node_blob_content = b""
    if file_path.is_file():
        node_text_content = page.text
        node_blob_content = page._fpath.read_bytes()
    
    # Get stat info if not already populated by ObsidianPage (due to its internal logic
    # where created_time, modified_time, file_size are set to None for regular files).
    current_created_time = page.created_time
    current_modified_time = page.modified_time
    current_file_size = page.file_size
    # print(node_text_content, node_blob_content)
    # print(node_text_content)
    if page._fpath.is_file():
        try:
            s = page._fpath.stat()
            if current_created_time is None:
                current_created_time = datetime.fromtimestamp(s.st_ctime_ns / 1e9, tz=timezone.utc)
            if current_modified_time is None:
                current_modified_time = datetime.fromtimestamp(s.st_mtime_ns / 1e9, tz=timezone.utc)
            if current_file_size is None:
                current_file_size = s.st_size
        except Exception as e:
            print(f"Warning: Could not get stat info for {file_path}: {e}")
            pass # Keep as None or default if stat fails

    # Convert datetime objects to float timestamps, providing default 0.0 if None
    created_time_ts = current_created_time.timestamp() if current_created_time else 0.0
    modified_time_ts = current_modified_time.timestamp() if current_modified_time else 0.0
    file_size_val = current_file_size if current_file_size is not None else 0

    # Ensure extension is a string, default to empty string if None
    node_ext = page.file_extension if page.file_extension else ""

    # Construct the Node object
    node = Node(
        lockey=page.lockey,
        created_time=created_time_ts,
        modified_time=modified_time_ts,
        file_size=file_size_val,
        fname=file_path.name,
        text=node_text_content,
        blob=node_blob_content,
        ext=node_ext,
        is_folder=file_path.is_dir(), # True if it's a directory, False otherwise
        url=page.app_url,
        obsidian_url=str(page.obsidian_url) if page.obsidian_url else None,
        checksum=page.checksum
    )

    return node

In [None]:
resolve_note_path?

[0;31mSignature:[0m [0mresolve_note_path[0m[0;34m([0m[0mvault[0m[0;34m:[0m [0mpathlib[0m[0;34m.[0m[0mPath[0m[0;34m,[0m [0mfile[0m[0;34m:[0m [0mstr[0m[0;34m)[0m [0;34m->[0m [0mpathlib[0m[0;34m.[0m[0mPath[0m [0;34m|[0m [0;32mNone[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m <no docstring>
[0;31mFile:[0m      ~/rahuketu/programming/gitea/memexplatform_obsidian/memexplatform_obsidian/mdmanager.py
[0;31mType:[0m      function

In [None]:
#| export
def get_properties(page):
    fm = page.frontmatter
    plist = []
    if fm:
        for p in fm.children:
            for c in p.children:
                if isinstance(c,RawText):
                    if c.content:
                        plist.append(Properties(
                            id=str(uuid.uuid4()),
                            lockey=page.lockey,
                            name=p.key,
                            type=type(c).__name__,
                            value=c.content
                        ))
                elif isinstance(c,(Link, AnyLink)):
                    name = p.key
                    if isinstance(c, TagLink): name = "file_tags"
                    if getattr(c, 'fname', None):
                        plist.append(Properties(
                            id=str(uuid.uuid4()),
                            lockey=page.lockey,
                            name=name,
                            type=type(c).__name__,
                            value=getattr(c, 'fname')
                        ))

                    else:
                        plist.append(Properties(
                            id=str(uuid.uuid4()),
                            lockey=page.lockey,
                            name=name,
                            type=type(c).__name__,
                            value=getattr(c, 'target')
                        ))

    for c in page.tags:
        plist.append(Properties(
            id=str(uuid.uuid4()),
            lockey=page.lockey,
            name='tags',
            type=type(c).__name__,
            value=c.title
        ))

    return plist

In [None]:

fpath = "/Users/rahul1.saraf/rahuketu/programming/notesobs/pages/@RahulSaraf.md"
fpath = "/Users/rahul1.saraf/rahuketu/programming/notesobs/Clippings/Fuck You, Show Me The Prompt. – 2.md"
# fpath = "/Users/rahul1.saraf/rahuketu/programming/notesobs/pages/Product Mindset for RAG.md"
op = ObsidianPage.from_file_path(fpath)
link = op.links[0]; link
# link.fname
vault = config.OBSIDIAN_VAULT
linked_path = resolve_note_path(vault, link.fname) if getattr(link, "fname", None) else None
linked_lockey = str(linked_path.relative_to(vault)) if linked_path else None; linked_lockey
op.lockey

link.target, link.title
fm = op.frontmatter
for p in fm.children:
    print(p.key, p.children)
op = ObsidianPage.from_file_path(fpath)
get_properties(op)
# op.tags

title [<mistletoe.span_token.RawText content='Fuck You, Show Me The Prompt. '...+1>]
source [<memexplatform_obsidian.mdmanager.AnyLink with 1 child title='https://hamel.dev/blog/posts/p'...+6 target='https://hamel.dev/blog/posts/p'...+6 children=(<mistletoe.span_token.RawText content='https://hamel.dev/blog/posts/p'...+6>,)>]
author [<memexplatform_obsidian.mdmanager.WikiLink with 1 child ext='.md' fname='@HamelHusain' children=[<mistletoe.span_token.RawText content='@HamelHusain'>] title='@HamelHusain' label=None dest_type='wikilink' title_delimiter=None label=None src='/obsidian/open?file=%40HamelHu'...+4>]
published [<mistletoe.span_token.RawText content='None'>]
created [<mistletoe.span_token.RawText content='2025-08-30'>]
description [<mistletoe.span_token.RawText content='Quickly understand inscrutable'...+42>]
tags [<memexplatform_obsidian.mdmanager.TagLink with 1 child fname='#clippings' children=[<mistletoe.span_token.RawText content='#clippings'>] title='#clippings' label=Non

[Properties(id='6c139794-2f99-4748-a48b-ba4f8e7805f4', lockey='Clippings/Fuck You, Show Me The Prompt. – 2.md', name='title', type='RawText', value='Fuck You, Show Me The Prompt. –'),
 Properties(id='5773d693-c251-405e-b2a1-acb8f2e01358', lockey='Clippings/Fuck You, Show Me The Prompt. – 2.md', name='source', type='AnyLink', value='https://hamel.dev/blog/posts/prompt/'),
 Properties(id='47b8b584-ffbf-4263-8925-78a0bfd06dbe', lockey='Clippings/Fuck You, Show Me The Prompt. – 2.md', name='author', type='WikiLink', value='@HamelHusain'),
 Properties(id='10f00b5f-5c75-489d-bfc2-16a1fd130e8a', lockey='Clippings/Fuck You, Show Me The Prompt. – 2.md', name='published', type='RawText', value='None'),
 Properties(id='1e1ebe7c-4b6b-4a54-8010-c0960fecc392', lockey='Clippings/Fuck You, Show Me The Prompt. – 2.md', name='created', type='RawText', value='2025-08-30'),
 Properties(id='5701a0f6-529c-4e48-8dde-8281c7208c25', lockey='Clippings/Fuck You, Show Me The Prompt. – 2.md', name='description', t

In [None]:
#| export
def get_pagelinks(page:ObsidianPage, vault:Path):
    link_rows = []
    for link in page.links:
        # Try to resolve linked file path inside vault
        linked_path = resolve_note_path(vault, link.fname) if getattr(link, "fname", None) else None
        linked_lockey = str(linked_path.relative_to(vault)) if linked_path else None; linked_lockey

        link_rows.append(
            Links(
                id=str(uuid.uuid4()),
                lockey=page.lockey,       # source file
                link=link.target,           # raw link text
                linked_lockey=linked_lockey,
                link_type=type(link).__name__,
                title=link.title,
                is_internal=bool(linked_lockey or type(link).__name__ == 'TagLink'),
            )
        )
    return link_rows

In [None]:
#| export
def update_folder_node(folder_path: Path) -> Node:
    """Create a Node entry for a folder."""
    try:
        s = folder_path.stat()
        created_time = datetime.fromtimestamp(s.st_ctime_ns / 1e9, tz=timezone.utc).timestamp()
        modified_time = datetime.fromtimestamp(s.st_mtime_ns / 1e9, tz=timezone.utc).timestamp()
    except Exception:
        created_time, modified_time = 0.0, 0.0

    return Node(
        lockey=str(folder_path.relative_to(config.OBSIDIAN_VAULT)),
        created_time=created_time,
        modified_time=modified_time,
        file_size=0,
        fname=folder_path.name,
        text="",
        blob=b"",
        ext="",
        is_folder=True,
        url=MountPaths.open.to(file=folder_path.relative_to(config.OBSIDIAN_VAULT)),
        obsidian_url=None,
        checksum=b"",
    )

In [None]:
#| export
def iter_files(dirs):
    for d in dirs:
        yield from (f for f in d.rglob("*") if f.is_file())


In [None]:

vault_path = pathlib.Path(config.OBSIDIAN_VAULT)
dirs = get_subdirs(config.OBSIDIAN_VAULT) ; dirs
list(iter_files(dirs))

[Path('/Users/rahul1.saraf/rahuketu/programming/notesobs/Clippings/Frequently Asked Questions (And Answers) About AI Evals – Hamel’s Blog.md'),
 Path('/Users/rahul1.saraf/rahuketu/programming/notesobs/Clippings/Learn In Public.md'),
 Path('/Users/rahul1.saraf/rahuketu/programming/notesobs/Clippings/How I Built a Learning Machine.md'),
 Path('/Users/rahul1.saraf/rahuketu/programming/notesobs/Clippings/ContextCite Attributing Model Generation to Context.md'),
 Path('/Users/rahul1.saraf/rahuketu/programming/notesobs/Clippings/Apertus a fully open, transparent, multilingual language model.md'),
 Path('/Users/rahul1.saraf/rahuketu/programming/notesobs/Clippings/How to solve any problem? - supermemo.guru.md'),
 Path('/Users/rahul1.saraf/rahuketu/programming/notesobs/Clippings/From GPT-2 to gpt-oss Analyzing the Architectural Advances.md'),
 Path('/Users/rahul1.saraf/rahuketu/programming/notesobs/Clippings/Fuck You, Show Me The Prompt. – 2.md'),
 Path('/Users/rahul1.saraf/rahuketu/programming

In [None]:
#| export
class ObsidianDB:
    def __init__(self, config):
        self.config = config
        self.db = Database(config.OBSIDIAN_DB); self.db
        self.vault = config.OBSIDIAN_VAULT; self.vault

    def initdb(self, replace=False) -> Database:
        self.db.create(Node, pk='lockey', replace=replace)
        self.db.create(Links, pk='id', replace=replace, foreign_keys=[
            ('lockey', 'node'),
            # ('linked_lockey', 'node') # Dropping this constraints for efficiency as I want to avoid rereading db twice
        ])
        self.db.create(Properties, pk='id', replace=replace, foreign_keys=[
            ('lockey', 'node')
        ])
        return self.db

    def upsert(self, file_path: Path): # Assuming Path is available from pathlib
        # Assuming ObsidianPage, datetime, and timezone are available in the scope.
        # For example, they might be imported at the top of the file as:
        # from pathlib import Path
        # from datetime import datetime, timezone
        # from .mdmanager import ObsidianPage
        
        file_path = pathlib.Path(file_path)
        page = ObsidianPage.from_file_path(file_path)

        if page.lockey is None:
            # This file path is not relative to the vault, which is required for 'lockey'.
            # Log a warning and skip upserting this file.
            print(f"Warning: File path {file_path} is not relative to vault {self.vault}. Skipping upsert.")
            return

        # Check if ObsidianPage's text content is an error message (based on its implementation)
        is_error_text_from_page = page.text and page.text.startswith("[Error:")
        
        node = update_node(page, file_path)
        self.db['node'].upsert(node)
        self.db["links"].delete_where("lockey = ?", (page.lockey,))
        links = get_pagelinks(page, self.vault)
        self.db["links"].insert_all(links)
        self.db["properties"].delete_where("lockey = ?", (page.lockey,))
        self.db["properties"].insert_all(get_properties(page))

    def sync_vault(self, reindex=False):
        dirs = get_subdirs(self.vault); dirs
        for d in dirs: self.db['node'].upsert(update_folder_node(d))
        for f in iter_files(dirs): 
            # check for entry and metadata
            if reindex: self.upsert(f)
            
            lockey = str(f.relative_to(config.OBSIDIAN_VAULT)); lockey
            # print(lockey)
            # print(list(self.db['node'].rows_where("lockey = ?", (lockey,), select='lockey,modified_time,file_size,checksum'))[0])
            infos = list(self.db['node'].rows_where("lockey = ?", (lockey,), select='lockey,modified_time,file_size,checksum'))
            if infos:
                info = infos[0]; info
                details = f.stat()
                same_size = details.st_size == info['file_size'] 
                similar_time = details.st_mtime-info['modified_time'] < 1e-6
                if same_size and similar_time: continue
                else:
                    if ObsidianPage.from_file_path(f).checksum == info['checksum']: continue
                    else: self.upsert(f)
            else:
                self.upsert(f)


In [None]:
#| notest
config.OBSIDIAN_DB
obs = ObsidianDB(config)
obs.initdb()

<Database <apsw.Connection object "/Users/rahul1.saraf/rahuketu/programming/gitea/memexplatform_obsidian/data/memexplatform_obsidian.db">>

In [None]:
lockey = 'pages/Checkoslovakia.md'
list(obs.db['node'].rows_where("lockey = ?", (lockey,), 
select='lockey,modified_time,file_size,checksum'))

[]

In [None]:
obs.sync_vault()

In [None]:
dirs = get_subdirs(config.OBSIDIAN_VAULT); dirs
fname = None
for f in iter_files(dirs):
    fname = f
    break

lockey = str(fname.relative_to(config.OBSIDIAN_VAULT)); lockey
info = list(obs.db['node'].rows_where("lockey = ?", (lockey,), select='lockey,modified_time,file_size,checksum'))[0]
info, 
details = fname.stat()
same_size = details.st_size == info['file_size'] 
similar_time = details.st_mtime-info['modified_time'] < 1e-6


In [None]:
obs.db.table_names()

['node', 'links', 'properties', 'sqlite_stat1', 'sqlite_stat4']

In [None]:
fpath = "/Users/rahul1.saraf/rahuketu/programming/notesobs/pages/Product Mindset for RAG.md"
# fpath = "/Users/rahul1.saraf/rahuketu/programming/notesobs/pages/@RahulSaraf.md"
# fpath = "/Users/rahul1.sarasf/rahuketu/programming/notesobs/Clippings/Fuck You, Show Me The Prompt. – 2.md"
obs.upsert(fpath)
# page = ObsidianPage.from_file_path(fpath)
# page.text

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()