In [2]:
pip install PyPDF2 docx2pdf pandas matplotlib python-pptx openpyxl

Note: you may need to restart the kernel to use updated packages.


In [None]:
import os
from PyPDF2 import PdfReader, PdfWriter
from docx2pdf import convert as docx_to_pdf
import pandas as pd
import matplotlib.pyplot as plt
import logging

logging.basicConfig(level=logging.INFO)

def convert_to_pdf(input_path, output_path):
    _, ext = os.path.splitext(input_path)
    if not os.path.exists(output_path):
        os.makedirs(output_path)

    try:
        if ext == '.pdf':
            output_file = os.path.join(output_path, os.path.basename(input_path))
            with open(input_path, 'rb') as infile:
                reader = PdfReader(infile)
                writer = PdfWriter()
                for page_num in range(len(reader.pages)):
                    writer.add_page(reader.pages[page_num])
                with open(output_file, 'wb') as outfile:
                    writer.write(outfile)
                    logging.info(f"PDF file copied: {input_path} to {output_file}")
        elif ext == '.docx':
            docx_to_pdf(input_path, output_path)
            logging.info(f"Word document converted to PDF: {input_path} to {output_path}")
        elif ext in ['.txt', '.md']:
            logging.error(f"Text/Markdown file conversion requires Pandoc, which is not installed.")
        elif ext == '.xlsx':
            output_file = os.path.join(output_path, os.path.splitext(os.path.basename(input_path))[0] + '.pdf')
            df = pd.read_excel(input_path, engine='openpyxl')  # specifying engine
            fig, ax = plt.subplots()
            ax.axis('tight')
            ax.axis('off')
            ax.table(cellText=df.values, colLabels=df.columns, cellLoc='center', loc='center')
            fig.savefig(output_file, bbox_inches='tight')
            logging.info(f"Excel file converted to PDF: {input_path} to {output_file}")
        elif ext == '.pptx':
            logging.error(f"PowerPoint file conversion requires Pandoc, which is not installed.")
        else:
            logging.error(f"Unsupported file type: {ext}")
    except Exception as e:
        logging.error(f"Failed to convert {input_path}. Error: {e}")

def main():
    input_directory = input("Enter the path to the directory where the files that need to be converted are located: ")
    output_directory = input("Enter the path to the directory where the output files should be saved: ")

    if not os.path.isdir(input_directory):
        logging.error(f"Input directory does not exist: {input_directory}")
        return

    for filename in os.listdir(input_directory):
        input_path = os.path.join(input_directory, filename)
        if os.path.isfile(input_path):
            convert_to_pdf(input_path, output_directory)

    logging.info(f"Files have been converted and saved to {output_directory}")

if __name__ == "__main__":
    main()


Enter the path of the file to convert:  /Volumes/One Touch/Input
Enter the output directory:  /Volumes/One Touch/output


2024-06-05 12:02:43,674 - ERROR - File has no extension: /Volumes/One Touch/Input


File has no extension: /Volumes/One Touch/Input
File has been converted and saved to /Volumes/One Touch/output
