In [24]:
import re
import os

from difflib import SequenceMatcher
from os import path

def find_heading(_data: str, _heading: str, _exit_on_first=False) -> str:
    """
    Find all the text encapsulating a _heading, not the _heading itself.
    :param _data: file contents.
    :param _heading: the name of the _heading, excluding the '#' and any leading or trailing whitespace.
    :param _exit_on_first: if True, stops after finding the next _heading.
    """
    _heads = RE_HEADING.split(_data)

    target_heading = 0
    _text = ''

    for _i, _s in enumerate(_heads):
        if _s == '#' * len(_s):  # only hashtags
            if _heads[_i + 1] == _heading:
                target_heading = len(_s)
            elif target_heading != 0:  # already found the target _heading
                if _exit_on_first:
                    break
                elif len(_s) <= target_heading:  # found a higher level _heading, stop
                    break
            continue

        if target_heading == 0:
            continue  # _heads before _heading

        _text += _s
    # do not strip new lines! they may be used for parsing later (eg: \n    1. valid question)
    return _text.removeprefix(_heading).strip('---')

In [25]:
DB = 'C:\\mine\\Journey'

with open(path.join(DB, 'Biologia celular.md'), 'r', encoding='utf-8') as f:
    data = f.read()

In [32]:
def re_heading(heading):
    # r'(#+?) +(.+?) *\n'
    return re.compile(r'(#+?) +' + heading + r' *\n')

In [36]:
re_heading('abcdefghijk').split(data)

['#hub/bio/celula\n#anki/biologia/biologia_celular\n\n\nLivro 1, Frente 1, Capítulo 5\n\n#      Anki Questions    \n---\n2. O que é [[teoria celular]]?\n3. Quais organelas são compartilhadas entre as [[células eucarióticas]] e [[células procarióticas]]? [[Biologia celular#organelas compartilhadas|ans]]\n4. O que é [[teoria endossimbiôntica]]?\n5. Descreva a função e onde encontra-se o [[citoplasma]].\n6. This has multiple [[ribossomos]] links [[peroxissomo]] \n\n## inside heading\ntext inside 1. what about middle text?\n\n1. are we reading this?\naa\n\naa\na\na\na\na\na\na\nsdfkaslkdjf;laks\n\na\n\n#                Answers     \n---\n### organelas              compartilhadas    \nCitoplasma, ribossomos, membrana plasmática e DNA \nlorem ipum\n\n\nsadfkjasldf \naslfkjasldfk\ndlafjsad;lf\n\n# LAST\n']

In [44]:
re_heading('test').split('# test\n')

['', '#', '']

In [31]:
text = find_heading(data, QUESTIONS_HEADING)
cards = RE_CARD_BODY.findall(text)

card_front, card_back = list(), list()

for c in cards:
    for i, (q, ans) in enumerate(RE_CARD_ENTRY.findall(c)):
        print(i, q)
        print('\t >', ans)
        if ans:
            

0 O que é [[teoria celular]]?
	 > 
0 Quais organelas são compartilhadas entre as [[células eucarióticas]] e [[células procarióticas]]?
	 > Biologia celular#organelas compartilhadas|ans
0 O que é [[teoria endossimbiôntica]]?
	 > 
0 Descreva a função e onde encontra-se o [[citoplasma]].
	 > 
0 are we reading this?
	 > 


In [104]:
text = find_heading(data, QUESTIONS_HEADING)

In [86]:
questions = RE_QUESTIONS_BODY.findall(text)
questions

['O que é [[teoria celular]]?',
 'Quais organelas são compartilhadas entre as [[células eucarióticas]] e [[células procarióticas]]? [[Biologia celular#organelas compartilhadas|ans]]',
 'O que é [[teoria endossimbiôntica]]?']

In [87]:
for i, q in enumerate(questions):
    for link in RE_LINKS.findall(q):
        if link.endswith('|ans'):
            questions[i] = q.replace('[['+link+r']]', '').strip()
questions

['O que é [[teoria celular]]?',
 'Quais organelas são compartilhadas entre as [[células eucarióticas]] e [[células procarióticas]]?',
 'O que é [[teoria endossimbiôntica]]?']

In [37]:
my_model = genanki.Model(
  1607392319,
  'Simple Model',
  fields=[
    {'name': 'Question'},
    {'name': 'Answer'},
  ],
  templates=[
    {
      'name': 'Card 1',
      'qfmt': '{{Question}}',
      'afmt': '{{FrontSide}}<hr id="answer">{{Answer}}',
    },
  ])

In [39]:
my_note = genanki.Note(
    model=my_model,
    fields=['Testing question', 'Correct answer']
)

In [40]:
bio_deck = genanki.Deck(2059400110, 'Biologia')
bio_deck.add_note(my_note)

In [41]:
genanki.Package(bio_deck).write_to_file(path.join('C:\\mine\\cs\\jupyter.apkg'))