# this notebook will read your current resume and try to make a markdown variant of it, so that you could use the resume_maker.ipynb

## the notebook accepts pdf and word(docx) files

## please put your current resume in the same folder as this notebook
## and write the name and file extention in the quotation marks

In [4]:

current_resume_file_name = "My_CV en.docx"

In [5]:
output_file_name = "resume.md"

In [6]:
!pip install python-docx pymupdf markdownify



In [7]:
pip install PyMuPDF


Note: you may need to restart the kernel to use updated packages.


In [8]:
pip install markdownify





In [9]:
# Warning control
import warnings
warnings.filterwarnings('ignore')

In [10]:
import os
import fitz  # PyMuPDF (for PDF files)
from docx import Document
from markdownify import markdownify as md

from docx import Document

## this method transforms word documents to markdown

In [12]:
def docx_to_markdown(file_path):
    try:
        doc = Document(file_path)
        full_text = []

        for para in doc.paragraphs:
            full_text.append(para.text.strip())

        # Вземаме всички текстове от таблиците, ако има такива
        for table in doc.tables:
            for row in table.rows:
                for cell in row.cells:
                    full_text.append(cell.text.strip())

        # Обединяваме всички събрани части в един текст и го връщаме
        markdown_content = "\n\n".join(full_text)
        return markdown_content

    except Exception as e:
        print(f"Error extracting DOCX file: {file_path}. Error: {e}")
        return ""



## this method transforms word documents to markdown

In [14]:
def pdf_to_markdown(file_path):
    """Converts a PDF file to Markdown format, ensuring UTF-8 encoding."""
    try:
        doc = fitz.open(file_path)
        text = "\n\n".join([page.get_text("text", sort=True) for page in doc])
        if not text.strip():
            print(f"No text extracted from PDF file: {file_path}")
        return md(text)
    except Exception as e:
        print(f"Error extracting PDF file: {file_path}. Error: {e}")
        return ""

## this method calls the right method depending of you gave a word or pdf  file and saves the result 

In [16]:
def convert_resume_to_markdown(input_file):
    """Converts a DOCX or PDF resume to Markdown and saves it as in the variable output_file_name."""
    # Check file extension and call the appropriate conversion function
    file_extension = os.path.splitext(input_file)[-1].lower()

    if file_extension == ".docx":
        markdown_text = docx_to_markdown(input_file)
    elif file_extension == ".pdf":
        markdown_text = pdf_to_markdown(input_file)
    else:
        raise ValueError("Unsupported file format. Only DOCX and PDF are supported.")

    if markdown_text.strip() == "":
        print("Failed to extract any meaningful text from the file.")
        return None

    # Save the output as "resume.md" in the same directory as the input file
    output_md_file = os.path.join(os.path.dirname(input_file), output_file_name)

    # Write the converted text into the file
    try:
        with open(output_md_file, "w", encoding="utf-8", errors="replace") as f:
            f.write(markdown_text)
        print(f"Markdown file saved successfully: {output_md_file}")
    except Exception as e:
        print(f"Error saving markdown file: {e}")
        return None

    return output_md_file  # Return the path for further use


## we call the function here

In [18]:

markdown_resume = convert_resume_to_markdown(current_resume_file_name)

Markdown file saved successfully: resume.md
