## Fitz 

In [1]:
import fitz 

def extract_pdf_structure(pdf_path):
    doc = fitz.open(pdf_path)
    
    document_structure = []

    for page_num in range(doc.page_count):
        page_structure = {
            'page_number': page_num + 1,
            'elements': []
        }
        page = doc[page_num]
        
        # Extract text blocks
        for block_num, block in enumerate(page.get_text("blocks")):
            text_block = {
                'type': 'text',
                'block_number': block_num + 1,
                'text': block[4],
                'font': block[0],
                'font_size': block[1],
                'rect': block[2]
            }
            page_structure['elements'].append(text_block)

        # Extract images
        for img_num, img in enumerate(page.get_images(full=True)):
            image_block = {
                'type': 'image',
                'image_number': img_num + 1,
                'image_rect': img[0],
                'image_width': img[1],
                'image_height': img[2],
                'image_colorspace': img[3],
            }
            page_structure['elements'].append(image_block)

        document_structure.append(page_structure)

    doc.close()

    return document_structure

pdf_path = "Your resume path :::::"
structure = extract_pdf_structure(pdf_path)

for page_info in structure:
    print(f"Page {page_info['page_number']}")
    for element_info in page_info['elements']:
        if element_info['type'] == 'text':
            print(f"  Text Block {element_info['block_number']}")
            print(f"    Text: {element_info['text']}")
            print(f"    Font: {element_info['font']}")
            print(f"    Font Size: {element_info['font_size']}")
            print(f"    Rectangle: {element_info['rect']}")
        elif element_info['type'] == 'image':
            print(f"  Image {element_info['image_number']}")
            print(f"    Rectangle: {element_info['image_rect']}")
            print(f"    Width: {element_info['image_width']}")
            print(f"    Height: {element_info['image_height']}")
            print(f"    Colorspace: {element_info['image_colorspace']}")
        print()


Page 1
  Text Block 1
    Text: PROJECTS

    Font: 286.5746154785156
    Font Size: 533.1427001953125
    Rectangle: 363.97869873046875

  Text Block 2
    Text: <image: DeviceRGB, width: 728, height: 800, bpc: 8>
    Font: 537.3345336914062
    Font Size: 61.19999694824219
    Rectangle: 550.0845336914062

  Text Block 3
    Text: SUMMARY

    Font: 264.43792724609375
    Font Size: 158.79612731933594
    Rectangle: 347.55181884765625

  Text Block 4
    Text: PROFESSIONAL EXPERIENCE
EDUCATION

    Font: 57.81295394897461
    Font Size: 245.1464080810547
    Rectangle: 503.86553955078125

  Text Block 5
    Text: SKILLS

    Font: 57.81295394897461
    Font Size: 368.6838684082031
    Rectangle: 110.10291290283203

  Text Block 6
    Text: CERTIFICATIONS

    Font: 55.7723388671875
    Font Size: 525.0294799804688
    Rectangle: 186.49436950683594

  Text Block 7
    Text: speaktoharisudhan@gmail.com

    Font: 364.084716796875
    Font Size: 80.37564849853516
    Rectangle: 526.7014

## PDF Miner