# Day 12 (PyPDF and PILLOW)

### Practicing python from basics

### Extracting metadata of a pdf

In [6]:
# importing the module
from pypdf import PdfReader

# creating the reader object to read pdf
reader = PdfReader("input/Day_9.pdf")

# reading metadata from pdf
meta = reader.metadata

# printing number of pages from pdf
print(len(reader.pages))

# printing the metadata
# sometimes this data is not available so it can print None.
print(meta.author)
print(meta.creator)
print(meta.producer)
print(meta.subject)
print(meta.title)

5
None
LaTeX with hyperref
MiKTeX-dvipdfmx (20231124)
None
None


### Reducing PDF size (By reducing image size in page)

In [20]:
# importing os module
import os

filesize = os.path.getsize("input/day_9_1.pdf")
print(f"Filesize before compression is :: {filesize/1000000} bytes")

Filesize before compression is :: 2.055771 bytes


In [15]:
from pypdf import PdfReader, PdfWriter

reader = PdfReader("input/Day_9_1.pdf")
writer = PdfWriter()

for page in reader.pages:
    writer.add_page(page)

for page in writer.pages:
    for img in page.images:
        img.replace(img.image, quality=25)
    print("\r Reducing file size",end='')

print("\nFile Size reduced")
with open("output/after_reduce.pdf", "wb") as f:
    writer.write(f)

 Reducing file size
File Size reduced


In [19]:
filesize = os.path.getsize("output/after_reduce.pdf")
print(f"Filesize after compression/ reducing is :: {filesize/1000000} Mb")

Filesize after compression/ reducing is :: 1.589743 Mb


### Encrypting a PDF (or Adding a password to open the pdf)

In [1]:
# importing a module
from pypdf import PdfReader, PdfWriter

reader = PdfReader("input/image-pdf-sample.pdf")
writer = PdfWriter(clone_from=reader)

# Adding a password to the new PDF using Secure algorithm
writer.encrypt("welcome@pypdf", algorithm="AES-256")

# Saving the new PDF to a file
with open("output/encrypted-pdf.pdf", "wb") as f:
    writer.write(f)
    
print("PDF Encrypted successfully")

PDF Encrypted successfully


### Decrypting the pdf (or Removing the password)

In [3]:
# importing modules
from pypdf import PdfReader, PdfWriter

# creating reader object to read the pdf
reader = PdfReader("output/encrypted-pdf.pdf")

# checking if the file is encrypted of not
if reader.is_encrypted:
    reader.decrypt("welcome@pypdf")

# Writing decrypted file to the writer object
writer = PdfWriter(clone_from=reader)

# Saving the new PDF to a file
with open("output/decrypted-pdf.pdf", "wb") as f:
    writer.write(f)
    
# message
print("PDf Decrypted successfully")

PDf Decrypted successfully


### Converting images to pdf

In [45]:
# importing modules
import os
from PIL import Image

# defining function to create pdf using images
def image_to_pdf(img_folder, out_pdf):
    
    # creating image list
    img_list = []
    
    # reading all images
    for img_file in sorted(os.listdir(img_folder)):
        
        # checking file type for only images
        if img_file.endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp')):
            
            # reading images using pillow
            img = Image.open(os.path.join(img_folder, img_file))
            
            # appending all images to list
            img_list.append(img)
    
    # getting any one image from list to save images
    img1 = img_list.pop(0)
    
    # saving images as pdf
    img1.save(out_pdf,save_all=True,append_images = img_list)
        

        
## calling function
print("converting images to pdf in progress ........")
img_folder_path = 'imgs'
output_file = 'output/img_to_pdf.pdf'

## calling
image_to_pdf(img_folder_path,output_file)

print(f"Conversion complete : file saved to {output_file}")

converting images to pdf in progress ........
Conversion complete : file saved to output/img_to_pdf.pdf
