In [None]:
from docx import Document

# Function to extract sections and tables based on user selection
def extract_selected_sections_with_tables(docx_file, selected_sections):
    doc = Document(docx_file)
    sections_content = {}
    current_section = ""
    section_included = False

    # Iterate through elements in the document (both paragraphs and tables)
    for element in doc.element.body:
        if element.tag.endswith('p'):  # Check if it's a paragraph
            paragraph = doc.paragraphs[len(sections_content)]  # Retrieve corresponding paragraph object

            # Check if it's a heading (section)
            if paragraph.style.name.startswith('Heading'):
                current_section = paragraph.text
                section_included = current_section in selected_sections

                if section_included:
                    sections_content[current_section] = []

            # Add paragraph content to the section
            if section_included and paragraph.text.strip():
                sections_content[current_section].append(paragraph.text)

        elif element.tag.endswith('tbl'):  # Check if it's a table
            if section_included:
                # Convert the table to Markdown and add it to the current section
                table_md = table_to_markdown(doc.tables[len(sections_content)])
                sections_content[current_section].append(table_md)

    return sections_content

# Function to convert a table to Markdown format
def table_to_markdown(table):
    table_md = []
    headers = []
    separator = []

    # Process the first row as headers
    for cell in table.rows[0].cells:
        headers.append(cell.text.strip())
        separator.append("---")

    table_md.append("| " + " | ".join(headers) + " |")
    table_md.append("| " + " | ".join(separator) + " |")

    # Process remaining rows
    for row in table.rows[1:]:
        row_md = []
        for cell in row.cells:
            row_md.append(cell.text.strip())
        table_md.append("| " + " | ".join(row_md) + " |")

    return "\n".join(table_md)

# Function to save extracted sections and tables into a .txt file
def save_sections_to_txt(file_path, sections):
    with open(file_path, 'w', encoding='utf-8') as file:
        for section, content in sections.items():
            file.write(f"## {section}\n")  # Markdown header for the section
            file.write("\n".join(content) + "\n")
            file.write("\n" + "="*40 + "\n")  # Separator between sections

# Wrapper function to extract user-specified sections and save to a text file
def process_and_save_sections(docx_file, selected_sections, output_txt_file):
    sections = extract_selected_sections_with_tables(docx_file, selected_sections)
    save_sections_to_txt(output_txt_file, sections)
    print(f"Data has been saved to {output_txt_file}")

# Example usage:
if __name__ == "__main__":
    docx_path = 'path_to_your_document.docx'  # Replace with actual DOCX file path
    selected_sections = ['Introduction', 'Conclusion']  # List of section names to include
    output_file = 'output.txt'  # Output file path

    # Process and save the selected sections to the output file
    process_and_save_sections(docx_path, selected_sections, output_file)
