# Define Classes and create the SQLite database

> This module defines the classes we use to represent the PKM workflow.

In [None]:
#| default_exp classdb

In [None]:
#| hide
from nbdev.showdoc import *

In [None]:
#| export
from __future__ import annotations

In [None]:
#| export
import json
from enum import Enum
from typing import List, Union, ClassVar
from dataclasses import dataclass
from pydantic import BaseModel, field_serializer, field_validator
from fastlite import *
from fastcore.test import *

We use `from __future__ import annotations` to support forward references in type hints. To be precise in the `@classmethod` we create to keep track of all instances of the class.

## Enum Classes

First we define the possible values of the different variables that are available in the classes. We use the module `enum` to define **Enumerations**. We use this to bind the possible values to a variable name, making the code more readable and maintainable.

In [None]:
#| export
class InformationType(Enum):
    """Information content types that flow through the PKM workflow."""
    BOOK = "book"
    RESEARCH_PAPER = "research_paper"
    DOCUMENT = "document"
    ANNOTATION = "annotations&highlights"
    NOTE = "note"
    EMAIL = "email"
    DISCORD_MESSAGE = "discord_message"
    WEB_ARTICLE = "web_article"
    YOUTUBE_VIDEO = "youtube_video"
    PODCAST = "podcast"
    PRODUCT_IDEA = "product_idea"
    PROJECT_IDEA = "project_idea"

class Method(Enum):
    """How actions are performed - manually or automatically."""
    MANUAL = "manual"
    AUTOMATIC = "automatic"

class Phase(Enum):
    """The five phases of the PKM workflow."""
    COLLECT = "collect"
    RETRIEVE = "retrieve"
    CONSUME = "consume"
    EXTRACT = "extract"
    REFINE = "refine"

class PhaseQuality(Enum):
    """Quality rating for how well a tool performs in each phase."""
    NA = "na"
    BAD = "bad"
    OK = "ok"
    GREAT = "great"

class OrganizationSystem(Enum):
    """How tools organize and structure information."""
    TAGS = "tags"
    FOLDERS = "folders"
    LINKS = "links"
    JOHNNY_DECIMAL = "johnny_decimal"

In [None]:
Phase("refine")

<Phase.REFINE: 'refine'>

## PKM Workflow Classes

Next we create a dataclass for each item we need to be present in the PKM workflow.

#### Track instances of classes

We also want to keep track of the instances available for each class. Therefore we need some higher order magic.

- a list in the class to store the instances
- a __init__ method to add the instance to the list
- a classmethod to get the list of instances

We can't just add a `_instances = []` statement to the Class, because Pydantic will then assume it is a model field (private attribute). We need to tell Pydantic to ignore the _instances class variable as a model field and treat is as a class variable. Therefore we need to import `ClassVar` from `typing` and use it to type the _instances variable.

#### Using Pydantic with MiniDataAPI and SQLite

We want to use `Pydantic` Dataclasses to enable typechecking and validation. We also want to use the Dataclasses with the `MiniDataAPI` to create the tables in the `SQLite` database. But `SQLite` only has datatypes: `NULL`, `INTEGER`, `REAL`, `TEXT`, and `BLOB`. So no `list` or any of the Dataclass(Enum) types we use.

To be able to use both `Pydanctic` and the `MiniDataAPI` we will do two things:

1. Define a Pydantic Dataclass with the correct datatypes and a Dataclass that has the same fields as the Pydantic Dataclass, but with datatypes that can be used with SQLite.
2. We add `@field_serializer` and `@field_validator` methods to the Pydantic Dataclass that convert the fields to JSON strings when we use the method `.model_dump()` on the instance of the Pydantic Dataclass.

This way we can:

- create the SQLite database tables using the regular Dataclasses.
- create instances with the Pydantic Dataclass to have easy typechecking and validation.
- convert this instances to `MiniDataAPI` and `SQLite` friendly datatypes using `.model_dump()` on the instance.

```python
class InformationItem(BaseModel):
    info_type: InformationType
    method: List[Union[Method, None]]
    toolflow: List[str]
    
    # Convert Enum to string and list to JSON string so we can add to SQLite
    @field_serializer('info_type', 'method', 'toolflow')
    def serialize_lists(self, v):
        if isintance(v, list):
            return json.dumps([i.value if hasattr(i, 'value') else i for i in v])
        return str(v.value) if hasattr(v, 'value') else str(v)
    
    # Convert JSON string from SQLite to list of Enum and strings
    @field_validator('method', 'toolflow', mode='before')
    def parse_json_lists(cls, v):
        if isinstance(v, str):
            return json.loads(v)
        return v
```

**Pydantic Dataclasses**

Used for typechecking.

In [None]:
#| export
class InformationItem(BaseModel):
    """Represents an information item flowing through the PKM workflow."""
    name: str
    info_type: InformationType
    method: list[Union[Method, None]]  # [collect, retrieve, consume, extract, refine]
    toolflow: list  # [collect, retrieve, consume, extract, refine]

    _instances: ClassVar[list[InformationItem]] = []

    def __init__(self, **data):
        super().__init__(**data)
        type(self)._instances.append(self)
    
    @classmethod
    def get_instances(cls) -> List[InformationItem]:
        return cls._instances.copy()
    
    @field_serializer('info_type','method', 'toolflow')
    def db_serialize(self, v):
        if isinstance(v, list):
            return json.dumps([i.value if hasattr(i, 'value') else i for i in v])
        return str(v.value) if hasattr(v, 'value') else v
    
    @field_validator('method', 'toolflow', mode='before')
    def parse_json_lists(cls, value):
        if isinstance(value, str):
            return json.loads(value)
        return value

class Tool(BaseModel):
    """Represents a PKM tool with supported information items."""
    name: str
    info_items: list[InformationItem]
    organization_system: list[OrganizationSystem]
    phase_quality: list[PhaseQuality]

    _instances: ClassVar[List[Tool]] = []

    def __init__(self, **data):
        super().__init__(**data)
        type(self)._instances.append(self)
    
    @classmethod
    def get_instances(cls) -> list[Tool]:
        return cls._instances.copy()
    
    @field_serializer('info_items', 'organization_system', 'phase_quality')
    def db_serialize(self, v):
        if isinstance(v, list):
            return json.dumps([i.value if hasattr(i, 'value') else i for i in v])
        return str(v.value) if hasattr(v, 'value') else v
    
    @field_validator('info_items', 'organization_system', 'phase_quality', mode='before')
    def parse_json_lists(cls, value):
        if isinstance(value, str):
            return json.loads(value)
        return value

class Improvement(BaseModel):
    """Tracks workflow improvements needed for better PKM effectiveness."""
    title: str
    what: str
    why: str
    prio: int
    tool: Tool
    phase: Phase

    _instances: ClassVar[List[Improvement]] = []

    def __init__(self, **data):
        super().__init__(**data)
        type(self)._instances.append(self)
    
    @classmethod
    def get_instances(cls) -> list[Improvement]:
        return cls._instances.copy()
    
    @field_serializer('tool', 'phase')
    def db_serialize(self, v):
        if isinstance(v, list):
            return json.dumps([i.value if hasattr(i, 'value') else i for i in v])
        return str(v.value) if hasattr(v, 'value') else v
    
    @field_validator('tool', 'phase', mode='before')
    def parse_json_lists(cls, value):
        if isinstance(value, str):
            return json.loads(value)
        return value

Test creating instances

In [None]:
inf_a = InformationItem(name="infoitem_a", info_type=InformationType.BOOK, method=[Method.MANUAL], toolflow=[Phase.COLLECT])
tool_a = Tool(name="reader", info_items=[inf_a], organization_system=[OrganizationSystem.TAGS], phase_quality=[PhaseQuality.GREAT])
imp_a = Improvement(title="improvement_a", what="gras", why="dus", prio=0, tool=tool_a, phase=Phase.COLLECT)

Test creating list of instances

In [None]:
test_eq(len(Improvement.get_instances()), 1)

**Regular Dataclasses with SQLite datatypes**

Used for creating the tables in the SQLite database.
These contain the same fields as the Pydantic Dataclasses we defined above. But these Dataclasses only contain datatypes that are supported by SQLite and have an `id: int` field added as a primary key and use the build-in `@dataclass` decorator, because `FastLite` doesn't support Pydantic Dataclasses.

In [None]:
#| export
@dataclass
class ImprovementDB:
    id: int
    title: str
    what: str
    why: str
    prio: int
    workflow_routes: str

@dataclass
class InformationItemDB:
    id: int
    name: str
    info_type: str
    method: str
    toolflow: str

@dataclass
class ToolDB:
    id: int
    name: str
    info_items: str
    organization_system: str
    phase_quality: str

## SQLite database

Connect to the database in the `main.py`. We should also enable foreign key constraints. These are disabled by default in Sqlite.

For testing purposes in this module we will use `db = database(":memory:")` to create an in-memory database.

In [None]:
#| export
def create_db(loc="static/infoflow.db"):
    db = database(loc)
    db.execute("PRAGMA foreign_keys = ON;")
    inf_tbl = db.create(InformationItemDB)
    tool_tbl = db.create(ToolDB)
    impr_tbl = db.create(ImprovementDB)
    return inf_tbl, tool_tbl, impr_tbl


::: {.callout-tip}
We can add foreign key constraints to the tables using the `transform` method from `sqlite_utils`.

```python
inf_tbl.transform(add_foreign_keys=[("<field_name>", "<table_name_to_connect>", "<field_name_in_table_to_connect>")])
```
:::

But for now we won't use foreign key constraints.

Tests and usage examples

In [None]:
inf_tbl, tool_tbl, imp_tbl = create_db(":memory:")

In [None]:
inf_tbl.columns

[Column(cid=0, name='id', type='INTEGER', notnull=0, default_value=None, is_pk=1),
 Column(cid=1, name='info_type', type='TEXT', notnull=0, default_value=None, is_pk=0),
 Column(cid=2, name='method', type='TEXT', notnull=0, default_value=None, is_pk=0),
 Column(cid=3, name='toolflow', type='TEXT', notnull=0, default_value=None, is_pk=0)]

Create instances for information items, tools and improvements.

In [None]:
info_item = InformationItem(
    info_type=InformationType.BOOK,
    method=[Method.MANUAL, Method.AUTOMATIC], 
    toolflow=["collect", "process"]
)

obsidian = Tool(
    name="Obsidian",
    info_items=[info_item],
    organization_system=[OrganizationSystem.TAGS, OrganizationSystem.LINKS],
    phase_quality=[PhaseQuality.GREAT, PhaseQuality.OK]
)

improvement_item = Improvement(
    title="Better search functionality",
    what="Better search functionality",
    why="Current search is too slow",
    prio=1,
    tool=obsidian,
    phase=Phase.RETRIEVE
)

Add the instances to the tables

In [None]:
obsidian.model_dump()

PydanticSerializationError: Error calling function `db_serialize`: TypeError: Object of type InformationItem is not JSON serializable

In [None]:
inf_tbl.insert(info_item.model_dump())
tool_tbl.insert(obsidian.model_dump())
imp_tbl.insert(improvement_item.model_dump())


PydanticSerializationError: Error calling function `db_serialize`: TypeError: Object of type InformationItem is not JSON serializable

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()