In [1]:
from docx import Document
from docx.shared import Mm, Pt, Cm
from docx.enum.text import WD_LINE_SPACING, WD_ALIGN_PARAGRAPH
from docx.enum.table import WD_CELL_VERTICAL_ALIGNMENT, WD_TABLE_ALIGNMENT
import json
import os
from write import write_pdf_linux, write_jpg
from pdf2jpg import pdf2jpg
import pypdfium2 as pdfium

logo = 'samples/logo/1.png'
header = 'Генеральному директору ООО "Реальная компания" А. А. Сидорову'
reference_text = 'Уважаемый Иван Иванов!'
main_text = ''
with open('samples/intros.txt', 'r', encoding='utf-8') as f:
    main_text += f.readline()
with open('samples/instructions.json', 'r', encoding='utf-8') as f:
    instr = json.load(f)
    main_text += instr[0]['task_texts'][0]
creator_title = 'Генеральный директор'
creator_name = 'Созонов С.В.'
signature = 'samples/signature/1.png'

document = Document()

# General doc settings
style = document.styles['Normal']
font = style.font
p_format = style.paragraph_format
font.name = "Times New Roman" # consts.font_name
font.size = Pt(14) # consts.font_size
p_format.line_spacing_rule = WD_LINE_SPACING.ONE_POINT_FIVE # consts.line_spacing

for section in document.sections:
    section.top_margin =  Mm(20) # consts.top_margin
    section.bottom_margin = Mm(20) # consts.bottom_margin
    section.left_margin = Mm(30) # consts.left_margin
    section.right_margin = Mm(15) # consts.right_margin
    
# Header
table = document.add_table(rows=1, cols=2)
for cell in table.rows[0].cells:
    tc = cell._element.tcPr
    tc.left = None
    tc.top = None
    tc.right = None
    tc.bottom = None
    cell.vertical_alignment = WD_CELL_VERTICAL_ALIGNMENT.TOP
    cell.paragraphs[0].alignment = WD_TABLE_ALIGNMENT.LEFT

table.cell(0, 0).paragraphs[0].add_run().add_picture(logo, Mm(40), Mm(40))
table.cell(0, 0).add_paragraph().add_run('№_____-______')

header_cell = table.cell(0, 1)
header_cell.paragraphs[0].add_run(header)
header_cell.paragraphs[0].alignment = WD_TABLE_ALIGNMENT.RIGHT

for c0, c1 in zip(table.columns[0].cells, table.columns[1].cells):
    c0.width = Cm(11.0)
    c1.width = Cm(5.0)

# Reference
ref = document.add_paragraph()
ref.alignment = 1
ref.add_run(reference_text)

# Main part
main = document.add_paragraph()
main_fmt = main.paragraph_format
main_fmt.alignment = WD_ALIGN_PARAGRAPH.JUSTIFY
main_fmt.first_line_indent = Cm(1.25)
main.add_run(main_text)

# Footer
foot_table = document.add_table(rows=1, cols=3)

for cell in foot_table.rows[0].cells:
    tc = cell._element.tcPr
    tc.left = None
    tc.top = None
    tc.right = None
    tc.bottom = None
    cell.vertical_alignment = WD_CELL_VERTICAL_ALIGNMENT.TOP
    cell.paragraphs[0].alignment = WD_TABLE_ALIGNMENT.LEFT

for c0, c1, c2 in zip(foot_table.columns[0].cells, foot_table.columns[1].cells, foot_table.columns[2].cells):
    c0.width = Cm(10.0)
    c1.width = Cm(3.0)
    c2.width = Cm(3.5)

foot_table.cell(0, 0).paragraphs[0].add_run(creator_title)
foot_table.cell(0, 0).paragraphs[0].alignment = WD_TABLE_ALIGNMENT.LEFT

foot_table.cell(0, 1).paragraphs[0].add_run().add_picture(signature, Mm(30), Mm(20))
foot_table.cell(0, 1).paragraphs[0].alignment = WD_TABLE_ALIGNMENT.RIGHT

foot_table.cell(0, 2).paragraphs[0].add_run(creator_name)
foot_table.cell(0, 2).paragraphs[0].alignment = WD_TABLE_ALIGNMENT.RIGHT

out_dir = 'letters_out'

document.save(os.path.join(out_dir, 'doc', 'letter.docx'))

# write_pdf_linux('letter.docx', '.', 0)
# write_jpg('.', 0)

# Comtypes

In [2]:
%%time
import sys
import os
import comtypes.client

wdFormatPDF = 17

in_file = os.path.abspath(os.path.join(out_dir, 'doc', 'letter.docx'))
out_file = os.path.abspath(os.path.join(out_dir, 'pdf', 'comtypes.pdf'))

word = comtypes.client.CreateObject('Word.Application')
doc = word.Documents.Open(in_file)
doc.SaveAs(out_file, FileFormat=wdFormatPDF)
doc.Close()
word.Quit()

pdf2jpg.convert_pdf2jpg(out_file, os.path.join(out_dir, 'jpg', 'comtypes.jpg'), pages="ALL")

[WinError 2] Не удается найти указанный файл
CPU times: total: 359 ms
Wall time: 2.97 s


False

In [3]:
pdf = pdfium.PdfDocument(os.path.join(out_dir, 'pdf', 'comtypes.pdf'))

n_pages = len(pdf)

for page_num in range(n_pages):
    page = pdf.get_page(page_num)
    pil_image = page.render(
        scale=1,
        rotation=0,
        crop=(0, 0, 0, 0),
        # colour=(255, 255, 255, 255),
        # annotations=True,
        # greyscale=False,
        # optimise_mode=pdfium.OptimiseMode.NONE
    ).to_pil()
    pil_image.save(os.path.join(out_dir, 'jpg', f'comtypes_{page_num}.png'))

pdf.close()

True

# Pywin32

In [4]:
%%time
import sys
import os
import win32com.client

wdFormatPDF = 17

in_file = os.path.abspath(os.path.join(out_dir, 'doc', 'letter.docx'))
out_file = os.path.abspath(os.path.join(out_dir, 'pdf', 'win32.pdf'))

word = win32com.client.Dispatch('Word.Application')
doc = word.Documents.Open(in_file)
doc.SaveAs(out_file, FileFormat=wdFormatPDF)
doc.Close()
word.Quit()

# pdf2jpg.convert_pdf2jpg(out_file, os.path.join(out_dir, 'jpg', 'win32.jpg'), pages="ALL")

CPU times: total: 15.6 ms
Wall time: 2.51 s


# Aspose.words

оставляет вотермарку

In [5]:
%%time
import aspose.words as aw

# Load word document
doc = aw.Document(os.path.join(out_dir, 'doc', 'letter.docx'))

# Save as PDF
doc.save(os.path.join(out_dir, 'pdf', 'aspose.pdf'))

# pdf2jpg.convert_pdf2jpg(os.path.join(out_dir, 'pdf', 'aspose.pdf'), os.path.join(out_dir, 'jpg', 'aspose.jpg'), pages="ALL")

CPU times: total: 1.64 s
Wall time: 2.53 s


<aspose.words.saving.SaveOutputParameters object at 0x000001FBE3419FD0>

# docx2pdf

In [6]:
%%time
from docx2pdf import convert

convert(os.path.join(out_dir, 'doc', 'letter.docx'), os.path.join(out_dir, 'pdf', 'docx2pdf.pdf'))

  from .autonotebook import tqdm as notebook_tqdm
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.38s/it]

CPU times: total: 15.6 ms
Wall time: 4.21 s





In [7]:
pdf = pdfium.PdfDocument(os.path.join(out_dir, 'pdf', 'docx2pdf.pdf'))

n_pages = len(pdf)

for page_num in range(n_pages):
    page = pdf.get_page(page_num)
    pil_image = page.render(
        scale=1,
        rotation=0,
        crop=(0, 0, 0, 0),
    ).to_pil()
    pil_image.save(os.path.join(out_dir, 'jpg', f'docx2pdf_{page_num}.png'))

pdf.close()

True

# pdf2image

## Need to install a poppler https://github.com/oschwartz10612/poppler-windows/releases/

In [8]:
# need a poppler
from pdf2image import convert_from_path

images = convert_from_path(os.path.join(out_dir, 'pdf', 'docx2pdf.pdf'), poppler_path=r'poppler-23.11.0\Library\bin') # set your poppler path

for i in range(len(images)):
    images[i].save(os.path.join(out_dir, 'jpg', f'docx2pdf_{i}.jpg'), 'JPEG')
