In [1]:
# PLAN:
# Retrieve existing notes from Anki
# Index notes into Qdrant
# Create a search query
# Retrieve similar notes to the search query
# ...
# Find possible duplicate notes

In [2]:
import uuid

from anki.collection import Collection

from addon.domain.repositories.document_repository import (
    Document,
    SearchQuery,
    SearchResult,
)
from addon.infrastructure.persistence.qdrant_repository import (
    QdrantDocumentRepository,
)
from addon.application.services.formatter_service import convert_note_to_addon_note

  description="Check that the field is empty, alternative syntax for `is_empty: \&quot;field_name\&quot;`",
  description="Check that the field is null, alternative syntax for `is_null: \&quot;field_name\&quot;`",
  from .autonotebook import tqdm as notebook_tqdm


In [3]:
collection_path = "/home/gianluca/.local/share/Anki2/User 1/collection.anki2"  # Usually in your Anki profile folder
col = Collection(collection_path)

In [4]:
deck_name = "Default"
deck_id = col.decks.id(deck_name)
note_ids = col.find_notes(f"did:{deck_id}")

In [5]:
notes_to_index = []

for n, note_id in enumerate(note_ids):
    note = col.get_note(note_id)
    addon_note = convert_note_to_addon_note(note)
    notes_to_index.append(addon_note)
    if n <= 10 and "personal" not in addon_note:
        print(addon_note)
        print("----")
    if n >= 500:
        break

guid='D?H@y-%%r' front='<img src="paste-d0ff77498ff8dde85ba00ae8b7c4bb6032d8483d.jpg">' back='Headboard' tags=['english'] notetype=<AddonNoteType.BASIC: 'basic'> deck_name=None
----
guid='IjfKk}wnb@' front='<img src="paste-334a3566ffa4cab66033c10810e8d06af8fda194.jpg">' back='Towel' tags=['english'] notetype=<AddonNoteType.BASIC: 'basic'> deck_name=None
----
guid='G1Z_~#;mLc' front='<img src="paste-d9689dc830d3f333e81b9b7058d5b25517064954.jpg">' back='Jug' tags=['english'] notetype=<AddonNoteType.BASIC: 'basic'> deck_name=None
----
guid='Azd65{j+,q' front='Command to create a soft link' back='```bash<br>$ ln -s &lt;file&gt; &lt;link&gt;<br>```' tags=['linux'] notetype=<AddonNoteType.BASIC: 'basic'> deck_name=None
----
guid='BGL!8$wV<W' front='In `ln -s`, what is the order of file name and link name?' back='```bash<br>$ ln -s &lt;file_name&gt; &lt;link_name&gt;<br>```' tags=['linux'] notetype=<AddonNoteType.BASIC: 'basic'> deck_name=None
----
guid='be:y>MF$Ae' front='In `zip`, what is t

In [6]:
qdrant = QdrantDocumentRepository.create()

In [7]:
n = notes_to_index[52]

from addon.domain.entities.note import AddonNote

def note_to_str(note: AddonNote) -> str:
    return f"{note.front} {note.back} {''.join(note.tags)}"

note_to_str(n)

'What is the main benefit of bagging? Reduce variance for high-variance low-bias weak learners ml'

In [8]:
documents = [Document(id=str(uuid.uuid4()), content=f"{note_to_str(n)}", source="source", metadata={}) for n in notes_to_index]
qdrant.store_batch(documents=documents)

In [9]:
query = SearchQuery(text=r"What is the main benefit of bagging? Reduce variance for high-variance low-bias weak learners ml")
results = qdrant.find_similar(query=query)

for r in results:
    if "personal" not in r.document.content:
        print(f"Relevance score: {r.relevance_score} .. {r.document.content}\n")

Relevance score: 1.0000000274074927 .. What is the main benefit of bagging? Reduce variance for high-variance low-bias weak learners ml

Relevance score: 0.45419317104720636 .. When using the `re` module, what does the `:`, `#`, `=`, or `!` metacharacters represent? Designate a special group pythonregex

Relevance score: 0.4110985513044786 .. Verb in the `diw` command `d` (delete) nvim

Relevance score: 0.3796527494476845 .. Draw the logarithmic function <br><img src="300px-Logarithm_plots.png"> dl

Relevance score: 0.37313506213415937 .. L1-norm formula $\|\boldsymbol{x}\|_1 = \sum_{i=1}^n \left|x_i\right|$ math

