# Load Packages and helper functions

## Packages

In [95]:
import re
import sys
import glob, os
import numpy as np
from os.path import exists

## Helper functions

In [2]:
def conv_dict(D):
    for key in D.keys():
        if D[key] == '🟢':
            D[key] = True
        elif D[key] == '🔴':
            D[key] = False
    return D


is_in_table_line = lambda x: x.startswith('|') and x.endswith('|')
enum             = lambda x: enumerate(x)


# PARAMETERS

## Global Constants (to not be changed)

In [6]:
ID__TABLES__table_alignment__center = 0
ID__TABLES__table_alignment__right  = 1
ID__TABLES__table_alignment__middle = 2


ID__TABLES__PACKAGE__longtblr = 0
ID__TABLES__PACKAGE__tabularx = 1

ID__CNV__TABLE_STARTED      = 0
ID__CNV__TABLE_ENDED        = 1
ID__CNV__IDENTICAL          = 2

# ⚠ does not work for longtblr!
CMD__TABLE__TABULARX__CENTERING = '\\newcolumntype{Y}{>{\\centering\\arraybackslash}X}'

# For recognizing file names, section names, block names
SPECIAL_CHARACTERS = ' %💬⚠💼🟢➕❓🔴✔🧑☺📁⚙🔒🟡🔲💊💡🤷‍♂️▶📧🔗🎾👨‍💻📞💭📖ℹ🤖🏢🧠🕒👇📚👉'


## User Parameters

In [94]:
path_files  = 'C:\\Users\\mariosg\\OneDrive - NTNU\\FILES\\'
path0       = path_files + 'AUTOMATIONS\\'


PARS = conv_dict(dict({
    '⚙': # SETTINGS
        conv_dict(dict({'TABLES':  
                conv_dict(dict({
                                  'package': ID__TABLES__PACKAGE__longtblr,
                       'hlines-to-all-rows': '🔴',
                        'any-hlines-at-all': '🔴',
                                'alignment': [
                                                ID__TABLES__table_alignment__center,
                                                ID__TABLES__table_alignment__middle],
                                'rel-width': 1.2
                }))})),
    '📁':
         dict({
                'markdown-file': path0 + 'example.md',  # Markdown (.md) file for conversion
                     'tex-file': path0 + 'example.tex',  # LateX (.tex) file (converted from the .md file)
                        'vault': path_files + 'workTips\\'
            }),
    'par':
        dict({
            'tabular-package':
                            dict({
                                       'names': ['longtblr', 'tabularx'],
                                'before-lines': ['{colspec}']
                            })
        })
}))


# Rest of code

In [108]:
def package_loader():

    packages_to_load = ['hyperref', 'graphics']

    tables_package = PARS['⚙']['TABLES']['package']
    if tables_package == ID__TABLES__PACKAGE__longtblr:

        packages_to_load.append('tabularray')
        packages_to_load.append('longtable')

    elif tables_package == ID__TABLES__PACKAGE__tabularx:
        packages_to_load.append('tabularx')

    return ['\\usepackage{'+x+'}' for x in packages_to_load]


def identify__tables(S):

    table_indexes = []
    table_has_started = False
    for i, l in enumerate(S):
        lstr = l.lstrip().rstrip()
        is_table_line = is_in_table_line(lstr)        
        if is_table_line and (not table_has_started):
            table_has_started = True
            idx__table_start = i
        # ⚠ NEVER add "or (i == len(S)-1)" to the condition below    
        elif (not is_table_line and table_has_started):
            table_has_started = False
            idx__table_end = i
            table_indexes.append(idx__table_start)
            table_indexes.append(idx__table_end)


    return table_indexes
            

def convert__tables(S):
    '''
    Converts tables    
    '''

    TABLE_SETTINGS = PARS['⚙']['TABLES']
    package = TABLE_SETTINGS['package']
    add_txt = ''
    if (ID__TABLES__table_alignment__center in TABLE_SETTINGS['alignment']) \
        and package == ID__TABLES__PACKAGE__longtblr:
        add_txt = '\centering '


    # After having found the table
    ## We expect that the 1st line defines the columns

    cols = S[0].split('|')
    cols = [[x.lstrip().rstrip() for x in cols if len(x)>0 and x!='\n']]

    C = []
    for s in S[2:]:
        c = s.split('|')
        c = [x.lstrip().rstrip() for x in c if len(x)>0 and x!='\n']
        C.append(c)

    y = cols + C

    # CONVERT
    N_cols = len(cols[0])

    latex_table = []
    addText = ''
    for i, c in enumerate(y):
        c1 = [add_txt + x for x in c]
        if i==0: 
            if TABLE_SETTINGS['any-hlines-at-all']:
                addText = ' \hline'
        else:
            if TABLE_SETTINGS['hlines-to-all-rows']:
                addText = ' \hline'
        latex_table.append('    ' + " & ".join(c1) + ' \\\\' + addText)

    lbefore = []


    if package == ID__TABLES__PACKAGE__tabularx:


        PCKG_NAME = '{tabularx}'

        if ID__TABLES__table_alignment__center in TABLE_SETTINGS['alignment']:
            lbefore.append(CMD__TABLE__TABULARX__CENTERING)
            colPrefix = 'Y'
        else:
            colPrefix = 'X'

        if (ID__TABLES__table_alignment__middle in TABLE_SETTINGS['alignment']):
            lbefore.append('\\renewcommand\\tabularxcolumn[1]{m{#1}}')

        latex_before_table = lbefore + [
            '\\begin{center}',
            '\\begin'+PCKG_NAME+'{\\textwidth}{' + '|' + N_cols*(colPrefix+'|') + '}',
            '   \hline'
        ]

        latex_after_table = [
            '   \hline',
            '\end'+PCKG_NAME,
            '\end{center}'
        ]



    elif package == ID__TABLES__PACKAGE__longtblr:

        PCKG_NAME = '{longtblr}'

        latex_before_table = [
            '\\begin{center}',
            '\\begin' + PCKG_NAME + '[',
            'caption = {},',
            'entry = {},',
            'label = {},',
            'note{a} = {},',
            'note{$\dag$} = {}]',
            '   {colspec = {'+ N_cols*'X' +'}, width = ' + str(TABLE_SETTINGS['rel-width']) + '\linewidth, hlines, rowhead = 2, rowfoot = 1}'
            ]  

        latex_after_table = [
            '\end' + PCKG_NAME,
            '\end{center}'
        ]

        add_hline_at_end = False # to be moved to user settings
        if add_hline_at_end:
            latex_after_table = '   \hline' + latex_after_table


    else:
        raise Exception('NOTHING CODED HERE!')


    LATEX = latex_before_table + latex_table + latex_after_table


    return LATEX


def internal_links__identifier(S):

    if not isinstance(S, list):
        raise Exception('Input of the function must be a list of strings!')
        return np.nan


    pattern_sections = '\[\[([\w-]+)\#([\w' + SPECIAL_CHARACTERS + '\-]+)(\|[\w' + SPECIAL_CHARACTERS + '\-]+)?\]\]'
    pattern_blocks = '\[\[([\w-]+)\#\^([\w' + SPECIAL_CHARACTERS + '\-]+)(\|[\w' + SPECIAL_CHARACTERS + '\-]+)?\]\]'
    
    MATCHES = []
    for i, s in enum(S):
        match_sections = re.findall(pattern_sections, s)
        match_blocks = re.findall(pattern_blocks, s)
        if len(match_sections) != 0 or len(match_blocks) != 0:
            MATCHES.append([i, match_sections, match_blocks])
    
    return MATCHES


def internal_links__enforcer(S, sections_blocks, internal_links):

    type_of_link = ['sec:', '']
    type_of_link_obsidian = ['#', '#^']
    sections = sections_blocks[0]
    blocks = sections_blocks[1]
    section_names = [x[1] for x in sections]
    block_names = [x[1] for x in blocks]
    for I in internal_links:
        for iS in range(2):
            Ii_sections = I[iS+1]
            if len(Ii_sections) != 0:
                
                for i in Ii_sections:
                    section_i = Ii_sections[0][1]
                    idx = [j for j in range(len(sections_blocks[iS])) if sections_blocks[iS][j][1] == section_i]
                    if len(idx)>0: 
                        idx=idx[0]

                        label = type_of_link[iS] + section_i.replace(' ', '-')
                        label_of_source = ' \label{' + label + '}'
                        hyperref_text = Ii_sections[0][-1].replace('|', '')
                        if len(hyperref_text) != 0:
                            hyperref_text = '{' + hyperref_text + '}'
                        else:
                            hyperref_text = '{' + 'ADD_NAME' + '}'

                        if not label_of_source in S[sections_blocks[iS][idx][0]]:
                            if iS==0:
                                S[sections_blocks[iS][idx][0]] = S[sections_blocks[iS][idx][0]].replace('\n', '')\
                                    + ' \label{' + type_of_link[iS] + section_i.replace(' ', '-') + '}'
                            else:
                                S[sections_blocks[iS][idx][0]] = S[sections_blocks[iS][idx][0]].replace('\n', '').replace('^' + label, '')\
                                    + ' \label{' + type_of_link[iS] + section_i.replace(' ', '-') + '}'


                        hyperref = '\hyperref[' + label + ']' + hyperref_text

                        obsidian_hyperref = '[[' + Ii_sections[0][0] + type_of_link_obsidian[iS] + Ii_sections[0][1] + Ii_sections[0][2] + ']]'
                        S[I[0]] = S[I[0]].replace(obsidian_hyperref, hyperref)
    return S


def embedded_references_recognizer(S):

    if not isinstance(S, list):
        raise Exception('Input of the function must be a list of strings!')
        return np.nan

    pattern_embedded = '!\[\[([\.\w-]+)(\|[\w' + SPECIAL_CHARACTERS + '\-]+)?\]\]'
    MATCHES = []
    for i, s in enum(S):
        match_pattern_embedded = re.findall(pattern_embedded, s)
        if len(match_pattern_embedded) != 0:
            MATCHES.append([i, match_pattern_embedded])
            # path-finder

    
    return MATCHES


def images_converter(S, images):


    to_print = [
	'\\begin{figure}',
	'	\centering',
	'	\includegraphics[width=0.7\linewidth, height=0.5\textheight]{"C:/Users/mariosg/OneDrive - NTNU/FILES/workTips/Literature/Notes/2022-08-23_19h25_29"}',
	'	\caption[Caption short]{Caption long}',
	'	\label{fig:2022-08-2319h2529}',
	'\end{figure}']

    
    return "1"


def embedded_references_path_finder(u):

    files = []
    # for folder, subfolders, files in os.walk(PARS['📁']['vault']):
    #    for f in files:
    #     if f.endswith('.md'): files_md.append(f)
    os.chdir(PARS['📁']['vault'])
    for root, dirs, files in os.walk(PARS['📁']['vault']):
        if u in files: return os.path.join(root,u)
    return ''

PATHS = PARS['📁']

with open(PATHS['markdown-file'], 'r') as f:
    content = f.readlines()


# Replace headers and map sections \==================================================
Lc = len(content)-1
sections = []
for i in range(Lc+1):
    # ⚠ The sequence of replacements matters: 
    # ---- replace the lowest-level subsections first
    content_00 = content[i]

    content_0 = content[i]
    content[i] = re.sub(r'#### (.*)', r'\\subsubsubsection{\1}', content[i])
    if content[i] != content_0:
        sections.append([i, content_0.replace('#### ', '').replace('\n', '')])

    content_0 = content[i]
    content[i] = re.sub(r'### (.*)', r'\\subsubsection{\1}', content[i])
    if content[i] != content_0:
        sections.append([i, content_0.replace('### ', '').replace('\n', '')])

    content_0 = content[i]
    content[i] = re.sub(r'## (.*)', r'\\subsection{\1}', content[i])
    if content[i] != content_0:
        sections.append([i, content_0.replace('## ', '').replace('\n', '')])

    content_0 = content[i]
    content[i] = re.sub(r'# (.*)', r'\\section{\1}', content[i])
    if content[i] != content_0:
        sections.append([i, content_0.replace('# ', '').replace('\n', '')])

# \==================================================\==================================================

# find reference blocks \==================================================
#---1. they have to be at the end of the sentence (i.e. before "\n")
blocks = []
for i in range(Lc+1):
    s = content[i].replace('\n', '')
    pattern = r"\^\w*$"
    link_label = re.findall(pattern, s)
    if len(link_label) > 0:
        blocks.append([i, link_label[0].replace('^', '')])    
# \==================================================


internal_links = internal_links__identifier(content)
content = internal_links__enforcer(content, [sections, blocks], internal_links)


IDX__TABLES = [0]
TYPE_OF_CNV = [ID__CNV__IDENTICAL]
tmp1 = identify__tables(content)
tmp2 = [ID__CNV__TABLE_STARTED for _ in tmp1]
tmp2[1::2] = [ID__CNV__IDENTICAL for _ in tmp1[1::2]]
IDX__TABLES += tmp1
TYPE_OF_CNV += tmp2

Lc = len(content)-1
if IDX__TABLES[-1] < Lc: 
    IDX__TABLES.append(Lc)
    TYPE_OF_CNV.append(ID__CNV__IDENTICAL)

LATEX_TABLES = []
for i in range(int(len(tmp1)/2)):
    LATEX_TABLES.append(convert__tables(content[tmp1[2*i]:tmp1[2*i+1]]))


# for i, L in enumerate(content):

#     for idx_table in IDX__TABLES:
#         LATEX_TABLES.append(convert__tables(content[idx_table[0]:idx_table[1]]))

LATEX = []
i0 = IDX__TABLES[0]
i_tables = 0
for j, i in enumerate(IDX__TABLES[1:]):
    if TYPE_OF_CNV[j] == ID__CNV__IDENTICAL:
        LATEX += content[i0:i]
    elif TYPE_OF_CNV[j] == ID__CNV__TABLE_STARTED:
        LATEX += LATEX_TABLES[i_tables]
        i_tables += 1
    
    i0 = i
    
    

PREAMBLE = ['\documentclass{article}'] + package_loader() + ['\\begin{document}']


LATEX = PREAMBLE + LATEX + ['\end{document}']
with open(PATHS['tex-file'], 'w') as f:
    for l in LATEX:
        if not l.endswith('\n'): l+='\n'
        f.write(l)



# Debugginng

In [117]:
# DEBUG/PLAY


# print(internal_links__identifier(content))
# print('')



MATCHES = embedded_references_recognizer(content)
print(MATCHES[0][1][0][0])

embedded_references_path_finder(MATCHES[0][1][0][0])
#

image1.png


''

## LAB

In [36]:
internal_links = internal_links__identifier(content)
def internal_links__enforcer(S, sections, internal_links):

    section_names = [x[1] for x in sections]
    for I in internal_links:
        Ii_sections = I[1]
        if len(Ii_sections) != 0:
            
            for i in Ii_sections:
                section_i = Ii_sections[0][1]
                idx = [j for j in range(len(section_names)) if section_names[j] == section_i][0]


                label = 'sec:' + section_i.replace(' ', '-')
                label_of_source = ' \label{' + label + '}'
                hyperref_text = Ii_sections[0][-1].replace('|', '')
                if len(hyperref_text) != 0:
                    hyperref_text = '{' + hyperref_text + '}'
                else:
                    hyperref_text = '{' + 'ADD_NAME' + '}'

                if not label_of_source in S[sections[idx][0]]:
                    S[sections[idx[0]][0]] = S[sections[idx[0]][0]].replace('\n', '')\
                         + ' \label{sec:' + section_i.replace(' ', '-') + '}'

                hyperref = '\hyperref[' + label + ']' + hyperref_text

                obsidian_hyperref = '[[' + Ii_sections[0][0] + '#' + Ii_sections[0][1] + Ii_sections[0][2] + ']]'
                S[I[0]] = S[I[0]].replace(obsidian_hyperref, hyperref)
                print('')
            

    print('')
    return [S]

print(internal_links__enforcer(content, sections, internal_links))


TypeError: 'int' object is not subscriptable

# Notes

## Internal links/crossrefs

Using this format:

\section{Hello World}
\label{sec:hello}


\hyperref[sec:hello]{Word of text}


### Strategy
1. Add the label with the same name as in the Obsidian note. Add it just using "\n \label{sec:label}" instead of creating a new line
2. Map the sections and blocks so that we can correspond them easily