In [1]:
import re
import os

from difflib import SequenceMatcher
from os import path

In [2]:
DB = 'C:\\mine\\Journey'

with open(path.join(DB, 'Biologia celular.md'), 'r', encoding='utf-8') as f:
    data = f.read()

In [3]:
QUESTIONS_HEADING = 'Anki Questions'

# Target: #anki/DECK/TAG[/SUB_TAGS]
# Group: DECK e TAG[/SUB_TAGS], used to determine the Anki deck and Note tags
RE_ANKI_TAG = re.compile(r'(#anki)/(\S+?)/(\S.+)')
# RE_QUESTIONS_HEAD = re.compile(r'(#)+? +?' + QUESTIONS_HEADING + r' *?\n')
# Target: 1. QUESTION TEXT
# Group: returns the text of the question
RE_QUESTIONS_BODY = re.compile(r'\d+\. (.+)')
# Target: [[ANY TEXT BETWEEN TWO BRACKETS]]
# Group: text in between, may contain the optional #heading link or |link renaming tokens
RE_LINKS = re.compile(r'\[\[(.*?)]]')

# [[link#header|ans]] groups: link, header, ans
# RE_LINKS = re.compile(r'\[\[([a-zA-Z0-9 ]+)#?([a-zA-Z0-9 ]*?)\|?(ans)*?]]')

# Target: #[#...#] [space...space]HEADING[space..space]\n
# Group: hashtags, used to figure out the heading level
def regex_heading(heading):
    return re.compile(r'(#)+? +?' + heading + r' *?\n')


In [4]:
anki_tag = RE_ANKI_TAG.search(data)
anki_tag, anki_tag.groups()

(<re.Match object; span=(16, 47), match='#anki/biologia/biologia_celular'>,
 ('#anki', 'biologia', 'biologia_celular'))

In [15]:
def _find_heading(heading, exit_on_first=False):
    text_before, text = data.split(regex_heading(heading), maxsplit=1)

    heading_level = 1
    for c in text_before[::-1]:
        if c == '#':
            heading_level += 1
        else:
            break

    heading_text = ''

    for line in text.split('\n'):
        h = re.match(r'(#+?) ', line)
        if h is not None:
            if exit_on_first:
                break  # first heading found
            elif len(h.group(1)) <= heading_level:
                break  # found a higher level heading
            # child heading, proceed
        heading_text += line + '\n'

    return heading_text.strip('\n').strip('---')

questions = RE_QUESTIONS_BODY.findall(_find_heading(QUESTIONS_HEADING))
questions

['O que é [[teoria celular]]?',
 'Quais organelas são compartilhadas entre as [[células eucarióticas]] e [[células procarióticas]]? [[Biologia celular#organelas compartilhadas|ans]]',
 'O que é [[teoria endossimbiôntica]]?',
 'Descreva a função e onde encontra-se o [[citoplasma]].']

In [8]:
regex_heading(QUESTIONS_HEADING).split(data)

['#hub/bio/celula\n#anki/biologia/biologia_celular\n\nLivro 1, Frente 1, Capítulo 5\n',
 '#',
 '---\n2. O que é [[teoria celular]]?\n3. Quais organelas são compartilhadas entre as [[células eucarióticas]] e [[células procarióticas]]? [[Biologia celular#organelas compartilhadas|ans]]\n4. O que é [[teoria endossimbiôntica]]?\n5. Descreva a função e onde encontra-se o [[citoplasma]].\n\n# Answers\n---\n### organelas compartilhadas\nCitoplasma, ribossomos, membrana plasmática e DNA ']

In [37]:
my_model = genanki.Model(
  1607392319,
  'Simple Model',
  fields=[
    {'name': 'Question'},
    {'name': 'Answer'},
  ],
  templates=[
    {
      'name': 'Card 1',
      'qfmt': '{{Question}}',
      'afmt': '{{FrontSide}}<hr id="answer">{{Answer}}',
    },
  ])

In [39]:
my_note = genanki.Note(
    model=my_model,
    fields=['Testing question', 'Correct answer']
)

In [40]:
bio_deck = genanki.Deck(2059400110, 'Biologia')
bio_deck.add_note(my_note)

In [41]:
genanki.Package(bio_deck).write_to_file(path.join('C:\\mine\\cs\\jupyter.apkg'))