In [1]:
import re
from bs4 import BeautifulSoup
import markdown
import os

def parse_markdown_tables(md_content):
    """Parses all markdown tables in the content and returns a dictionary."""
    # Regex to find sections with tables
    sections = re.findall(r'## (.*?)\n\n(.*?)(?=\n## |\Z)', md_content, re.DOTALL)
    parsed_data = {}

    for title, content in sections:
        title_key = title.strip().lower().replace(' ', '_')
        
        # Find table in the section content
        table_match = re.search(r'\|.*?\n\|.*?\n((?:\|.*?\n)+)', content, re.DOTALL)
        
        if table_match:
            rows_md = table_match.group(1).strip().split('\n')
            table_data = []
            for row_md in rows_md:
                # Split by '|' and clean up whitespace and empty strings
                cells = [cell.strip() for cell in row_md.split('|') if cell.strip()]
                if cells:
                    table_data.append(cells)
            parsed_data[title_key] = table_data
        else:
            # Handle non-table content (like Profile/Background paragraph)
            # This is a simple approach; can be made more sophisticated
            paragraph = content.split('|')[0].strip() # Get text before any table
            parsed_data[title_key] = paragraph

    return parsed_data

def parse_markdown_lists(md_content):
    """Parses markdown lists under specific headers."""
    parsed_lists = {}
    sections = re.findall(r'## (.*?)\n\n(.*?)(?=\n## |\Z)', md_content, re.DOTALL)
    for title, content in sections:
        title_key = title.strip().lower().replace(' ', '_')
        if content.strip().startswith('*'):
            # Convert markdown list to HTML
            html_list = markdown.markdown(content.strip())
            parsed_lists[title_key] = html_list
    return parsed_lists

def create_character_html(md_path, template_path, output_dir):
    """
    Generates a character HTML file from a markdown file and an HTML template.
    """
    # 1. Read Markdown and HTML files
    with open(md_path, 'r', encoding='utf-8') as f:
        md_content = f.read()
    with open(template_path, 'r', encoding='utf-8') as f:
        template_html = f.read()

    # --- Data Extraction from Markdown ---
    
    # A more reliable way to get the character name is from the quote block.
    # Example: ― Kitadani Fukuira
    name_match = re.search(r'― (.*?)\n', md_content)
    if name_match:
        char_name = name_match.group(1).strip()
    else:
        char_name = "[Character Name]" # Fallback

    # Extract quote
    quote_match = re.search(r'> "(.*?)"\n\n― \[(.*?)]', md_content)
    quote_text = quote_match.group(1) if quote_match else "[Character Quote]"
    
    lists = parse_markdown_lists(md_content)

    # --- HTML Manipulation with BeautifulSoup ---
    # Parse all tables at once
    tables = parse_markdown_tables(md_content)

    soup = BeautifulSoup(template_html, 'html.parser')

    # Helper to replace text in all matching elements
    def replace_text(soup, old_text, new_text):
        for element in soup.find_all(string=re.compile(re.escape(old_text))):
            element.replace_with(element.replace(old_text, new_text))

    # 1. Replace simple placeholders like [Character Name]
    replace_text(soup, '[Character Name]', char_name)
    soup.title.string = f"{char_name} - Character Profile"
    
    # 2. Update Quote and Infobox
    quote_p = soup.select_one('aside.infobox blockquote p')
    if quote_p:
        quote_p.string = f'"{quote_text}"'
    infobox_img = soup.select_one('aside.infobox img')
    if infobox_img:
        infobox_img['alt'] = f"Character illustration of {char_name}"
        # You still need to manually set the image src path
        
    # 3. Populate Tables
    # This logic assumes the HTML template has empty tables we can fill.
    # We will find the table by its preceding h2's ID.
    
    for section_id, table_data in tables.items():
        # Find the section by an ID derived from the markdown title
        section = soup.find('section', id=lambda x: x and x.replace('-', '_') in section_id)
        if not section or not isinstance(table_data, list):
            continue
            
        tbody = section.find('tbody')
        if not tbody:
            continue

        # Clear existing placeholder rows (except for key-value tables)
        if 'key-value-table' not in tbody.parent.get('class', []):
             tbody.clear()

        if 'key-value-table' in tbody.parent.get('class', []):
            # Handle Profile/Background key-value table
            profile_table_data = tables.get('profile/background', [])
            rows = tbody.find_all('tr')
            for i, row_data in enumerate(profile_table_data):
                if i < len(rows):
                    # Assumes second cell is the one to fill
                    value_cell = rows[i].find_all('td')[1]
                    value_cell.string = row_data[1] if len(row_data) > 1 else ''
        else:
            # Handle all other standard tables
            for row_data in table_data:
                new_row = soup.new_tag('tr')
                # Make first cell bold if it's a name/key
                new_row.append(BeautifulSoup(f'<td><strong>{row_data[0]}</strong></td>', 'html.parser').td)
                for cell_data in row_data[1:]:
                     new_row.append(BeautifulSoup(f'<td>{cell_data}</td>', 'html.parser').td)
                tbody.append(new_row)

    # 4. Populate Lists (Motivation & Trivia)
    for section_id, list_html in lists.items():
        section = soup.find('section', id=lambda x: x and x.replace('-', '_') in section_id)
        if section:
            ul = section.find('ul')
            if ul:
                # Replace placeholder list with parsed HTML list
                ul.replace_with(BeautifulSoup(list_html, 'html.parser').ul)

    # --- Output ---
    # Clean the character name for the filename (remove markdown, etc.)
    safe_char_name = re.sub(r'[\*\_\#]', '', char_name)
    output_filename = f"{safe_char_name.lower().replace(' ', '_')}_profile.html"
    output_path = os.path.join(output_dir, output_filename)
    
    # Create output directory if it doesn't exist

    os.makedirs(output_dir, exist_ok=True)
    
    with open(output_path, 'w', encoding='utf-8') as f:
        f.write(str(soup.prettify()))
        
    print(f"Successfully generated HTML profile: {output_path}")


# --- Execution in a Notebook Cell ---

# Define file paths relative to your notebook's location
# Adjust these paths as necessary
markdown_file = 'character_designs/spinosaurus/AegyptSpino_profile.md'
template_file = 'character_designs/character-profile-template.html'
output_directory = 'character_designs/spinosaurus'

create_character_html(markdown_file, template_file, output_directory)

# Before running, you should create a copy of character_template.md,
# rename it (e.g., 'giganotosaurus.md'), and fill in the details.
# For this example, we'll use the template file directly.
# Let's assume you have a filled markdown file named 'my_character.md'
# For demonstration, we'll just use the template as is.

# Example Usage:
# create_character_html('path/to/your_filled_character.md', template_file, output_directory)

print("Script is ready. To use it, call the `create_character_html` function with your file paths.")
print(f"Example: create_character_html('{markdown_file}', '{template_file}', '{output_directory}')")



Successfully generated HTML profile: character_designs/spinosaurus\aegypt_spino_profile.html
Script is ready. To use it, call the `create_character_html` function with your file paths.
Example: create_character_html('character_designs/spinosaurus/AegyptSpino_profile.md', 'character_designs/character-profile-template.html', 'character_designs/spinosaurus')


In [2]:
# convert_asterisks_to_bold.py
import re
from bs4 import BeautifulSoup, NavigableString

def convert_asterisks_in_html(file_path):
    """
    Reads an HTML file, converts text surrounded by asterisks to bold
    using <strong> tags, and overwrites the original file.

    It handles:
    - **double asterisks** for bold.
    - *single asterisks* for bold.
    - It avoids converting text inside <code> or <pre> tags.
    """
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            soup = BeautifulSoup(f, 'lxml')

        # Regex to find **text** or *text*
        # It uses a non-greedy match for the content inside asterisks.
        # It also ensures the asterisks are not immediately followed by another asterisk
        # to correctly handle things like ***text*** if needed in other contexts.
        pattern = re.compile(r'(\*{1,2})(.+?)\1')

        # Find all text nodes in the document body
        for text_node in soup.body.find_all(string=True):
            # We don't want to modify content in <script> or <style> tags
            if text_node.parent.name in ['script', 'style', 'code', 'pre']:
                continue

            # We process the text node only if it's a NavigableString and contains asterisks
            if isinstance(text_node, NavigableString) and '*' in text_node:
                new_html_parts = []
                last_end = 0
                original_text = str(text_node)

                # Find all matches in the current text node
                for match in pattern.finditer(original_text):
                    start, end = match.span()
                    # Add the text before the match
                    new_html_parts.append(original_text[last_end:start])

                    # Create a new <strong> tag
                    strong_tag = soup.new_tag('strong')
                    strong_tag.string = match.group(2) # The text inside asterisks
                    new_html_parts.append(str(strong_tag))

                    last_end = end
                
                # Add any remaining text after the last match
                new_html_parts.append(original_text[last_end:])

                # If any replacements were made, replace the old text node
                if last_end > 0:
                    new_soup = BeautifulSoup(''.join(new_html_parts), 'html.parser')
                    text_node.replace_with(*new_soup.contents)

        # Overwrite the original file with the modified HTML
        with open(file_path, 'w', encoding='utf-8') as f:
            # Use soup.prettify() for a nicely formatted HTML output
            f.write(str(soup))

        print(f"Successfully processed '{file_path}' and converted asterisks to <strong> tags.")

    except FileNotFoundError:
        print(f"Error: The file '{file_path}' was not found.")
    except Exception as e:
        print(f"An error occurred: {e}")

if __name__ == '__main__':
    # --- IMPORTANT ---
    # Replace this with the actual path to your HTML file.
    # Using a raw string (r"...") or forward slashes is recommended on Windows
    # to avoid issues with backslashes.
    html_file_to_process = 'character_designs/spinosaurus/aegypt_spino_profile.html'
    
    convert_asterisks_in_html(html_file_to_process)


Successfully processed 'character_designs/spinosaurus/aegypt_spino_profile.html' and converted asterisks to <strong> tags.


In [2]:
import markdown
import os

def create_html_from_markdown(project_root, readme_file, css_file, output_file):
    """
    Converts a Markdown file to a self-contained HTML file with embedded CSS.

    Args:
        project_root (str): The absolute path to the project's root directory.
        readme_file (str): The name of the source Markdown file.
        css_file (str): The name of the source CSS file.
        output_file (str): The name of the destination HTML file.
    """
    # Construct full paths
    md_path = os.path.join(project_root, readme_file)
    css_path = os.path.join(project_root, css_file)
    output_path = os.path.join(project_root, output_file)

    # --- 1. Read the source files ---
    try:
        with open(md_path, 'r', encoding='utf-8') as f:
            md_text = f.read()
        print(f"Successfully read '{md_path}'")
    except FileNotFoundError:
        print(f"Error: Markdown file not found at '{md_path}'")
        return

    try:
        with open(css_path, 'r', encoding='utf-8') as f:
            css_styles = f.read()
        print(f"Successfully read '{css_path}'")
    except FileNotFoundError:
        print(f"Error: CSS file not found at '{css_path}'")
        return

    # --- 2. Convert Markdown to an HTML fragment ---
    # We use the 'tables' extension to correctly render the tables in your README.
    html_body_content = markdown.markdown(md_text, extensions=['tables'])
    print("Markdown content converted to HTML.")

    # --- 3. Assemble the final HTML document ---
    # This template embeds the CSS and wraps the content in the main container div.
    html_template = f"""
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Paleo-Maiden Championship: GDD</title>
    <link rel="preconnect" href="https://fonts.googleapis.com">
    <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
    <link href="https://fonts.googleapis.com/css2?family=Cormorant+Garamond:wght@400;700&family=Lato:wght@400;700&display=swap" rel="stylesheet">
    <style>
        {css_styles}
    </style>
</head>
<body>
    <div class="profile-container">
        {html_body_content}
    </div>
</body>
</html>
"""

    # --- 4. Write the final HTML to a file ---
    with open(output_path, 'w', encoding='utf-8') as f:
        f.write(html_template)
    print(f"Successfully created self-contained HTML file at '{output_path}'")

if __name__ == '__main__':
    # Define project structure
    # IMPORTANT: Make sure this path is correct for your system.
    PROJECT_ROOT = r'c:\Users\Doodwingster_Rig\OneDrive\Desktop\Project Paleo Maiden'
    README_FILENAME = 'readme.md'
    CSS_FILENAME = os.path.join('gdd', 'gdd.css') # CSS is in a subdirectory
    OUTPUT_FILENAME = 'gdd.html'

    create_html_from_markdown(PROJECT_ROOT, README_FILENAME, CSS_FILENAME, OUTPUT_FILENAME)

Successfully read 'c:\Users\Doodwingster_Rig\OneDrive\Desktop\Project Paleo Maiden\readme.md'
Successfully read 'c:\Users\Doodwingster_Rig\OneDrive\Desktop\Project Paleo Maiden\gdd\gdd.css'
Markdown content converted to HTML.
Successfully created self-contained HTML file at 'c:\Users\Doodwingster_Rig\OneDrive\Desktop\Project Paleo Maiden\gdd.html'
