diff --git a/Scripts/Miscellaneous/HTML_to_PDF_converter/README.md b/Scripts/Miscellaneous/HTML_to_PDF_converter/README.md new file mode 100644 index 000000000..0334b506a --- /dev/null +++ b/Scripts/Miscellaneous/HTML_to_PDF_converter/README.md @@ -0,0 +1,39 @@ +# HTML to PDF Converter +## A python script to convert HTML to PDF by entering input file url / website url and output file name of the pdf as arg + +## Setup Instructions + +``` + +# Go to the root directoy of the project and install the requirements by typing : + sudo pip3 install -r requirements.txt + +# Run the script by typing : + python3 app.py -inp -out + + Example (For web) : + python3 app.py -inp https://www.google.com -out test_g.pdf + + Example (For local HTML file): + python3 app.py -inp /home/ankdos/ind.html -out test_f.pdf + +# The output file will be stored in the ./outputs folder + +# NOTE : If you face the problem of "parse() got an unexpected keyword argument 'override_encoding' " , then upgrade your html5lib by typing : + + pip3 install --upgrade html5lib + +``` + +## Screenshot taken of cli command : + + ![output](Screenshots/cli.jpg) + +## Screenshot of the pdf output : + + ![output](Screenshots/out.jpg) + + +### Author + +[Ankur Pandey](https://github.com/ankdos) \ No newline at end of file diff --git a/Scripts/Miscellaneous/HTML_to_PDF_converter/Screenshots/cli.jpg b/Scripts/Miscellaneous/HTML_to_PDF_converter/Screenshots/cli.jpg new file mode 100644 index 000000000..06874ff2c Binary files /dev/null and b/Scripts/Miscellaneous/HTML_to_PDF_converter/Screenshots/cli.jpg differ diff --git a/Scripts/Miscellaneous/HTML_to_PDF_converter/Screenshots/out.jpg b/Scripts/Miscellaneous/HTML_to_PDF_converter/Screenshots/out.jpg new file mode 100644 index 000000000..c9ead145a Binary files /dev/null and b/Scripts/Miscellaneous/HTML_to_PDF_converter/Screenshots/out.jpg differ diff --git a/Scripts/Miscellaneous/HTML_to_PDF_converter/app.py b/Scripts/Miscellaneous/HTML_to_PDF_converter/app.py new file mode 100644 index 000000000..ea2b6bf61 --- /dev/null +++ b/Scripts/Miscellaneous/HTML_to_PDF_converter/app.py @@ -0,0 +1,27 @@ +import argparse +import weasyprint + +class Html2Pdf: + """""" + + def __init__(self, url, output_filename): + """""" + self.url = url + self.output_filename = output_filename + + def get_pdf(self): + """get the file url and create output""" + pdf = weasyprint.HTML(self.url).write_pdf() + file_name = 'output_files/' + self.output_filename + with open(file_name, 'wb') as file_ : + file_.write(pdf) + + +if __name__ == '__main__': + #taking the inputs from cli + parser = argparse.ArgumentParser() + parser.add_argument("-inp", "--input", help="input file url") + parser.add_argument("-out", "--output", help="output file name") + args = parser.parse_args() + obj = Html2Pdf(url=args.input, output_filename=args.output) + obj.get_pdf() \ No newline at end of file diff --git a/Scripts/Miscellaneous/HTML_to_PDF_converter/output_files/test_f.pdf b/Scripts/Miscellaneous/HTML_to_PDF_converter/output_files/test_f.pdf new file mode 100644 index 000000000..88792301c Binary files /dev/null and b/Scripts/Miscellaneous/HTML_to_PDF_converter/output_files/test_f.pdf differ diff --git a/Scripts/Miscellaneous/HTML_to_PDF_converter/requirements.txt b/Scripts/Miscellaneous/HTML_to_PDF_converter/requirements.txt new file mode 100644 index 000000000..00505a396 --- /dev/null +++ b/Scripts/Miscellaneous/HTML_to_PDF_converter/requirements.txt @@ -0,0 +1,18 @@ +cairocffi==1.1.0 +CairoSVG==2.4.2 +certifi==2020.6.20 +cffi==1.14.3 +chardet==3.0.4 +cssselect2==0.3.0 +defusedxml==0.6.0 +html5lib==1.1 +idna==2.10 +Pillow==8.0.0 +pycparser==2.20 +Pyphen==0.9.5 +requests==2.24.0 +six==1.15.0 +tinycss2==1.0.2 +urllib3==1.25.11 +WeasyPrint==51 +webencodings==0.5.1 \ No newline at end of file