In [1]:
#!/usr/bin/env python3
import os
import re
import csv
import logging
import argparse
from pathlib import Path
from collections import defaultdict

import sys

In [2]:
sys.path.append(os.path.abspath('/home/asmodi/Code/git/markdown_linker/src/'))
from Backlink import *

## Sys Functions

In [36]:
def __init__():
    sys_dict = generate_sys_dict()
    md_list = generate_link_list(sys_dict['SYSTEM_PATH'])
    sys_dict_2 = populate_markdown_dictionary(md_list, sys_dict)
    
    return sys_dict_2

In [37]:
system_dict  = __init__()

In [38]:
system_dict['MARKDOWNS_DICT']['/SlipBox/AI_Levels_20280826.md']

{'PATH': PosixPath('/home/asmodi/Code/git/markdown_linker/test/markdown/SlipBox/AI_Levels_20280826.md'),
 'REL_PATH': '/SlipBox/AI_Levels_20280826.md',
 'BACKLINKS': [],
 'BACKLINKS_PATH': [],
 'LINKS': [],
 'LINKS_PATH': [],
 'NEED2UPDATE': False,
 'ID': None,
 'TITLE': 'AI Levels',
 'DESCRIPTION': '',
 'DATE': '2025-08-27T02:00:35.044Z',
 'PUBLISHED': 'true',
 'DATECREATED': '2025-08-27T02:00:35.044Z',
 'EDITOR': 'markdown',
 'TAGS': ['purpose', ' system applicaitons']}

# Internal Logic

In [39]:
def post_linkage(source: dict, target: dict, link_type: str, link_status: str = 'invalid'):
    return {'source_file': source['REL_PATH'],
     'source_title': source['TITLE'],
     'target_file':target['REL_PATH'],
     'target_title':target['TITLE'],
     'status': link_status,
     'link_type': link_type}

In [40]:
def pull_markdown_link_list(markdown_dict):
    link_list = {}
    for md, md_dic in markdown_dict.items():
        link_list[md] = md_dic['LINKS_PATH'].copy()
    return link_list

In [41]:
# ###
# Internal Logic
# ###
def markdown_link_crosswalker(markdowns_dict):
    """builds the links between markdown files and 

    Args:
        markdown_dict (_type_): _description_
    """

    Crosslinks_list = []
    markdown_list= list(markdowns_dict.items())

    link_list = pull_markdown_link_list(markdowns_dict)
    
    tracker_idx = 0 

    # checking that these files do crosslink
    while tracker_idx < len(markdowns_dict):
        # Getting current source markdown file and its data
        source_md, source_dic = markdown_list[tracker_idx]
        source_link_list = source_dic['LINKS_PATH']
        next_tracker_idx = tracker_idx + 1
        
        while next_tracker_idx < len(markdowns_dict):
            target_md, target_dic = markdown_list[next_tracker_idx]
            
            # Check if source links to target
            if target_md in source_dic['LINKS_PATH']:
                link_list[source_md].remove(target_md)
                Crosslinks_list.append(post_linkage(source_dic, target_dic, 'To Markdown','Valid'))

                # if the source does link to the target, then the target NEEDS to backlink to source
                if source_md not in target_dic['BACKLINKS_PATH']:
                    target_dic['NEED2UPDATE'] = True
                    target_dic['BACKLINKS_PATH'].append((source_dic['TITLE'], source_md))
                Crosslinks_list.append(post_linkage(target_dic, source_dic, 'Markdown Backlink','Valid'))
                
            # same as above, just flipped
            if source_md in target_dic['LINKS_PATH']:
                link_list[target_md].remove(source_md)
                Crosslinks_list.append(post_linkage(target_dic, source_dic, 'To Markdown','Valid'))
                
                if target_md not in source_dic['BACKLINKS_PATH']:
                    source_dic['NEED2UPDATE'] = True
                    source_dic['BACKLINKS_PATH'].append((target_dic['TITLE'], target_md))
                Crosslinks_list.append(post_linkage(source_dic, target_dic, 'Markdown Backlink','Valid'))
            next_tracker_idx += 1
        tracker_idx += 1
        if link_list[source_md] == []:
            del link_list[source_md]
            
    # Now, we've gone through all the markdown docs, and there are records that don't tie to anything
    #either due to files don't exists, or are formatted wrong, or otherwise
    for md_file, md_link_list in link_list:
        md_dict = markdowns_dict[md_file]
        for links in md_link_list:
            if links.lower().starts_with('http'):
                tar = {'REL_PATH': links, 'TITLE': links}
                Crosslinks_list.append(post_linkage(md_dict, tar, 'http','Valid'))  
            elif links.lower().ends_with('.md'):
                tar = {'REL_PATH': links, 'TITLE': links}
                Crosslinks_list.append(post_linkage(md_dict, tar, 'To Markdown', 'Broken'))   
            
    return Crosslinks_list

In [42]:
markdown_link_crosswalker(system_dict['MARKDOWNS_DICT'])

[{'source_file': '/SlipBox/BetterThanHuman_or_BetterThanHumanAndMachine_20250826/Static_Axioms_and_Dynamic_Policies.md',
  'source_title': 'Static Axioms and Dynamic Policies',
  'target_file': '/SlipBox/AI_Levels_20280826.md',
  'target_title': 'AI Levels',
  'status': 'Valid',
  'link_type': 'To Markdown'},
 {'source_file': '/SlipBox/AI_Levels_20280826.md',
  'source_title': 'AI Levels',
  'target_file': '/SlipBox/BetterThanHuman_or_BetterThanHumanAndMachine_20250826/Static_Axioms_and_Dynamic_Policies.md',
  'target_title': 'Static Axioms and Dynamic Policies',
  'status': 'Valid',
  'link_type': 'Markdown Backlink'},
 {'source_file': '/SlipBox/The_Right_Group_can_be_Better_Than_the_Group_That_is_Right.md',
  'source_title': 'The Right Group can be Better Than the Group That is Right',
  'target_file': '/SlipBox/Administration_of_Justice_George_Washing_20250826.md',
  'target_title': 'Administration of Justice (George Washington)',
  'status': 'Valid',
  'link_type': 'To Markdown'},


In [43]:
# ###
# Internal Logic
# ###
def markdown_crossrefrence(system_dict):
    """allows checking cross refrences between markdown files

    Args:
        markdown_dict (_type_): _description_
    """

    system_dict['LINKS_DATA'] = markdown_link_crosswalker(system_dict['MARKDOWNS_DICT'])

In [46]:
markdown_crossrefrence(system_dict)
system_dict['LINKS_DATA']

[{'source_file': '/SlipBox/BetterThanHuman_or_BetterThanHumanAndMachine_20250826/Static_Axioms_and_Dynamic_Policies.md',
  'source_title': 'Static Axioms and Dynamic Policies',
  'target_file': '/SlipBox/AI_Levels_20280826.md',
  'target_title': 'AI Levels',
  'status': 'Valid',
  'link_type': 'To Markdown'},
 {'source_file': '/SlipBox/AI_Levels_20280826.md',
  'source_title': 'AI Levels',
  'target_file': '/SlipBox/BetterThanHuman_or_BetterThanHumanAndMachine_20250826/Static_Axioms_and_Dynamic_Policies.md',
  'target_title': 'Static Axioms and Dynamic Policies',
  'status': 'Valid',
  'link_type': 'Markdown Backlink'},
 {'source_file': '/SlipBox/The_Right_Group_can_be_Better_Than_the_Group_That_is_Right.md',
  'source_title': 'The Right Group can be Better Than the Group That is Right',
  'target_file': '/SlipBox/Administration_of_Justice_George_Washing_20250826.md',
  'target_title': 'Administration of Justice (George Washington)',
  'status': 'Valid',
  'link_type': 'To Markdown'},


# Loading Tests

In [16]:
%cd /home/asmodi/Code/git/markdown_linker/test/markdown

/home/asmodi/Code/git/markdown_linker/test/markdown


In [None]:
def __init__():
    sys_dict = {

# Old Docs

In [None]:

def scan_documents(scan_path):
    """Scan all markdown files and build comprehensive link data"""
    scan_path = Path(scan_path).resolve()
    logging.info(f"Scanning documents in {scan_path}")
    csv_path = scan_path / 'backlinks.csv'
    # existing_data = load_csv_data(csv_path)
    

    markdown_header = {} # Map of file path to its header/title
    links_data = []
    backlinks_map = defaultdict(set)
    md_files = list(scan_path.rglob('*.md'))
    
    logging.info(f"Found {len(md_files)} markdown files")
    
    for md_file in md_files:
        content = read_markdown_doc(md_file)
            
        links_found = find_markdown_links(content)
        if links_found:
            logging.debug(f"Found {len(links_found)} links in {md_file.name}")
        print(links_found)    
        for link_text, target_file in links_found:
            # Handle links relative to scan directory
            if target_file.startswith('/'):
                # Link is scan-relative (e.g., /TESTDIR/docs/file.md)
                target_parts = Path(target_file).parts[1:]  # Remove leading '/'
                if target_parts and target_parts[0].upper() == scan_path.name.upper():
                    # Link points within scan structure - convert to absolute path
                    rel_path = Path(*target_parts[1:]) if len(target_parts) > 1 else Path('.')
                    target_path = (scan_path / rel_path).resolve()
                    logging.debug(f"Resolved scan-relative link {target_file} to {target_path}")
                else:
                    # Link points outside scan structure
                    target_path = Path(target_file)
            else:
                # Link is relative to current file location
                target_path = (md_file.parent / target_file).resolve()
            
            # Determine status - check existence with absolute path
            if target_path.exists():
                if scan_path in target_path.parents or target_path == scan_path:
                    status = 'Valid'
                else:
                    status = 'Outside Root'
                    logging.warning(f"Link outside scan path: {md_file.name} -> {target_file}")
            else:
                status = 'Broken'
                logging.error(f"Broken link: {md_file.name} -> {target_file} (resolved to {target_path})")
            
            # Convert to scan-relative paths for CSV
            source_rel = get_scan_relative_path(md_file, scan_path)
            target_rel = get_scan_relative_path(target_path, scan_path)
            
            # Get or find titles/headers
            title_found = find_markdown_title(content)
            if title_found:
                logging.debug(f"Found header in {md_file.name}: {title_found}")
            
            markdown_header = add_headers_dict(markdown_header, md_file, title_found)

            if markdown_header.get(target_path) is None and target_path.exists():
                with open(target_path, 'r', encoding='utf-8') as tf:
                    tcontent = tf.read()
                ttitle_found = find_markdown_title(tcontent)

                markdown_header = add_headers_dict(markdown_header, target_path, ttitle_found)

            # Add original link
            links_data.append({
                'source_file': source_rel,
                'source_title': markdown_header[md_file],
                'target_file': target_rel,
                'target_title': markdown_header[target_path],
                'link_text': link_text,
                'status': status,
                'hierarchy_level': get_hierarchy_level(md_file, scan_path),
                'link_type': 'original'
            })
            
            # Add backlink entry regardless of validity
            links_data.append({
                'source_file': target_rel,
                'source_title': markdown_header[target_path],
                'target_file': source_rel,
                'target_title': markdown_header[md_file],
                'link_text': '',
                'status': status,  # Use same status as original link
                'hierarchy_level': get_hierarchy_level(target_path, scan_path),
                'link_type': 'backlink'
            })
            
            if status == 'Valid':
                backlinks_map[target_rel].add((source_rel, markdown_header[md_file]))
    
    save_csv_data(csv_path, links_data)
    return backlinks_map


def add_backlinks(scan_path):
    """Add backlinks to markdown files"""
    scan_path = Path(scan_path).resolve()
    backlinks_map = scan_documents(scan_path)
    
    files_updated = 0
    for target_file_rel, source_files_rel in backlinks_map.items():
        # Convert scan-relative path back to absolute for file operations
        if target_file_rel.startswith('/' + scan_path.name):
            rel_part = target_file_rel[len('/' + scan_path.name):].lstrip('/')
            target_path = scan_path / rel_part if rel_part else scan_path
        else:
            target_path = Path(target_file_rel)  # Outside scan path, use as-is
            
        logging.debug(f"Processing backlinks for {target_path.name}")
        
        with open(target_path, 'r', encoding='utf-8') as f:
            content = f.read()
        
        existing_backlinks = get_existing_backlinks(content)
        
        # Remove existing backlinks section
        content = re.sub(r'\n# Backlinks\n.*?(?=\n# |\Z)', '', content, flags=re.DOTALL)
        
        # Build new backlinks section with only new links
        new_backlinks = []
        for source_file_rel in source_files_rel:
            # Convert scan-relative path back to absolute for path calculations
            if False:
                print(source_file_rel)
                if source_file_rel[0].startswith('/' + scan_path.name):
                    rel_part = source_file_rel[0][len('/' + scan_path.name):].lstrip('/')
                    print(rel_part)
                    source_path = scan_path / rel_part if rel_part else scan_path
                    print(rel_part, source_path)
                else:
                    source_path = Path(source_file_rel[0])  # Outside scan path, use as-is
            #source_path = Path(source_file_rel[0])  # Outside scan path, use as-is
            #rel_path = os.path.relpath(source_path, target_path.parent)
            rel_path = source_file_rel[0]
            if rel_path not in existing_backlinks:
                #source_name = source_path.stem
                new_backlinks.append(f'- [{source_file_rel[1]}]({rel_path})')
            #print(source_path, source_path.stem, rel_path, existing_backlinks)
        
        if new_backlinks:
            # Remove existing backlinks section
            content = re.sub(r'\n# Backlinks\n.*?(?=\n# |\Z)', '', content, flags=re.DOTALL)
        
            logging.info(f"Adding {len(new_backlinks)} backlinks to {target_path.name}")
            backlinks_section = '\n# Backlinks\n\n' + '\n'.join(new_backlinks) + '\n'
            
            with open(target_path, 'w', encoding='utf-8') as f:
                f.write(content + backlinks_section)
            files_updated += 1
    
    logging.info(f"Updated {files_updated} files with backlinks")

if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Generate backlinks for markdown files')
    parser.add_argument('scan_path', nargs='?', help='Folder path to scan for markdown files')
    parser.add_argument('--log-level', choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], 
                       default='INFO', help='Logging level')
    parser.add_argument('--log-file', help='Log file path (optional)')
    
    args = parser.parse_args()
    
    setup_logging(args.log_level, args.log_file)
    
    # Use SCAN_PATH if defined, otherwise use command line arg or prompt
    if SCAN_PATH:
        scan_path = SCAN_PATH
        logging.info(f"Using hard-coded SCAN_PATH: {SCAN_PATH}")
    else:
        scan_path = args.scan_path or input("Enter scan folder path: ").strip()
    
    try:
        add_backlinks(scan_path)
        logging.info("Backlinks processing completed successfully!")
        print("Backlinks added successfully! Check backlinks.csv for link analysis.")
    except Exception as e:
        logging.error(f"Error processing backlinks: {e}")
        raise