In [1]:
import uuid

from anki.collection import Collection

from addon.application.services.formatter_service import (
    convert_note_to_addon_note,
)
from addon.domain.entities.note import AddonNote
from addon.domain.repositories.document_repository import (
    Document,
    SearchQuery,
)
from addon.infrastructure.persistence.qdrant_repository import (
    QdrantDocumentRepository,
)

  description="Check that the field is empty, alternative syntax for `is_empty: \&quot;field_name\&quot;`",
  description="Check that the field is null, alternative syntax for `is_null: \&quot;field_name\&quot;`",
  from .autonotebook import tqdm as notebook_tqdm


In [2]:
collection_path = "/home/gianluca/.local/share/Anki2/User 1/collection.anki2"
col = Collection(collection_path)

In [3]:
deck_name = "Default"
deck_id = col.decks.id(deck_name)
note_ids = col.find_notes(f"did:{deck_id}")

In [4]:
notes_to_index = []
n_notes_to_index = 100

for n, note_id in enumerate(note_ids):
    note = col.get_note(note_id)
    addon_note = convert_note_to_addon_note(note)
    notes_to_index.append(addon_note)
    if n <= 10 and "personal" not in addon_note:
        print(addon_note)
        print("----")
    if n >= n_notes_to_index:
        break

guid='D?H@y-%%r' front='<img src="paste-d0ff77498ff8dde85ba00ae8b7c4bb6032d8483d.jpg">' back='Headboard' tags=['english'] notetype=<AddonNoteType.BASIC: 'basic'> deck_name=None
----
guid='IjfKk}wnb@' front='<img src="paste-334a3566ffa4cab66033c10810e8d06af8fda194.jpg">' back='Towel' tags=['english'] notetype=<AddonNoteType.BASIC: 'basic'> deck_name=None
----
guid='G1Z_~#;mLc' front='<img src="paste-d9689dc830d3f333e81b9b7058d5b25517064954.jpg">' back='Jug' tags=['english'] notetype=<AddonNoteType.BASIC: 'basic'> deck_name=None
----
guid='Azd65{j+,q' front='Command to create a soft link' back='```bash<br>$ ln -s &lt;file&gt; &lt;link&gt;<br>```' tags=['linux'] notetype=<AddonNoteType.BASIC: 'basic'> deck_name=None
----
guid='BGL!8$wV<W' front='In `ln -s`, what is the order of file name and link name?' back='```bash<br>$ ln -s &lt;file_name&gt; &lt;link_name&gt;<br>```' tags=['linux'] notetype=<AddonNoteType.BASIC: 'basic'> deck_name=None
----
guid='be:y>MF$Ae' front='In `zip`, what is t

In [5]:
qdrant = QdrantDocumentRepository.create()

In [6]:
n = notes_to_index[52]

def note_to_str(note: AddonNote) -> str:
    return f"{note.front} {note.back} {''.join(note.tags)}"

note_to_str(n)

'What is the main benefit of bagging? Reduce variance for high-variance low-bias weak learners ml'

In [7]:
%%time
documents = [
    Document(
        id=str(uuid.uuid4()),
        content=f"{note_to_str(n)}",
        source="source",
        metadata={},
    )
    for n in notes_to_index
]
qdrant.store_batch(documents=documents)

CPU times: user 5.01 s, sys: 47 ms, total: 5.06 s
Wall time: 2.75 s


In [8]:
%%time
query = SearchQuery(text=r"decompress zip archive")
results = qdrant.find_similar(query=query)

CPU times: user 58.5 ms, sys: 2.85 ms, total: 61.3 ms
Wall time: 36.2 ms


In [9]:
for r in results:
    if "personal" not in r.document.content:
        print(
            f"Relevance score: {r.relevance_score} .. {r.document.content}\n"
        )

Relevance score: 0.5711341386769451 .. What command extracts files from a zip archive? ```bash<br>$ unzip &lt;file&gt;<br>``` linux

Relevance score: 0.4617749124519591 .. In `zip`, what is the option to specify the destination? ```bash<br>$ unzip &lt;file&gt; -d &lt;path&gt;<br>```<br><br><img src="paste-92e15adfe1d216e9ba6f170e4033b292b7b15756.jpg"> linux

Relevance score: 0.21118621039727276 .. How does bagging work? <img src="2560px-Ensemble_Bagging.svg.png"> ml

Relevance score: 0.17499870340832863 .. What command retrieves content from web servers? ```bash<br>$ wget &lt;url&gt;<br>``` linux

Relevance score: 0.17187193915249446 .. What argument sets the destination folder for `wget`? ```bash<br>$ wget &lt;url&gt; -P &lt;path&gt;<br>```<br><br><img src="paste-c971f4ef171edc01f1079127e58e78097f03e917.jpg"><br> linux

