# Algorithm: PDF / word Compressor
### Input: pdf or word and compression factor 
### Output: a pdf or word file with the compressed documents

##### compression factor explanation: 
###### 1 takes all text at once
###### 2 takes text between images 
###### 3 takes text for each pharagraph or a page

# Import required libraries

In [1]:
# ! pip install docx
# ! pip install python-docx 
# ! python3 -m pip install python-docx
# ! pip install exceptions

In [1]:
import ipywidgets as widgets
import PyPDF2
import docx
from transformers import pipeline
from IPython.display import HTML, display



In [2]:
# Creating the summarizer instance:
summarizer = pipeline("summarization", model="knkarthick/MEETING_SUMMARY")

# Helper functions

In [18]:
def compress_pdf(file_name, compression_factor):
    # Open the PDF file
    #with open(file_name, 'rb') as pdf_file:
    # Create a PDF reader object
    pdf_reader = PyPDF2.PdfFileReader(file_name)
    # Create a PDF writer object
    pdf_writer = PyPDF2.PdfFileWriter()
    # Loop through all pages of the PDF
    output = ""
    for page_num in range(pdf_reader.numPages):
        # Get the current page
        page = pdf_reader.getPage(page_num)
        # Compress the page text using the summarizer function
        text = str(page.extractText())
        #print(text)
        page_text = summarizer(text, truncation=True)  #, compression_factor
        # Create a new page with the compressed text
        output += "\n" + str(page_text[0]['summary_text'])
        #new_page = PyPDF2.generic.TextStringObject(output)

        # Add the new page to the PDF writer object
        #pdf_writer.addPage(new_page)
    # Create a new PDF file with the compressed pages
    #with open('compressed_' + file_name, 'wb') as compressed_file:
        #pdf_writer.write(compressed_file)
    return output
            

def compress_word(file_name, compression_factor):
    # Open the Word file
    doc = docx.Document(file_name)
    output = ""
    txt = ""
    # Loop through all paragraphs in the Word file
    for para in doc.paragraphs:
        txt+= ". "+ para.text
    
    # Compress the paragraph text using the summarizer function     para.text 
    output += "\n" + summarizer(txt, truncation=True)[0]['summary_text'] #, compression_factor
    # Save the compressed Word file
    #doc.save('compressed_' + file_name)
    return output
    
    
# Define a function to handle the button click event
def on_button_clicked(b):
    # Clear the output widget
    output.clear_output()
    # Get the uploaded file
    file = upload.value[list(upload.value.keys())[0]]
    # Get the file name and extension
    file_name, file_ext = file['metadata']['name'].split('.')
    # Get the selected compression factor
    factor = compression_factor.value
    # Check the file extension and call the appropriate compression function
    if file_ext == 'pdf':
        out = compress_pdf(file_name + '.' + file_ext, factor)
    elif file_ext == 'docx':
        out = compress_word(file_name + '.' + file_ext, factor)
    else:
        print('Error: Unsupported file type')
    
    # Convert the compressed file to HTML
    html = HTML(out)
    # Append the HTML to the output widget
    output.append_display_data(html)

In [20]:
# Import required libraries
import PyPDF2
import docx
import ipywidgets as widgets
from IPython.display import HTML, display

# Create a custom style for the GUI elements
style = """
<style>
    .widget-label {
        font-size: 18px;
        font-weight: bold;
        color: #333;
    }
    .widget-upload {
        font-size: 16px;
        padding: 10px 20px;
        border: 2px solid #333;
        border-radius: 5px;
        transition: all 0.3s;
    }
    .widget-upload:hover {
        border-color: #999;
        cursor: pointer;
    }
    .widget-dropdown {
        font-size: 16px;
        color: #333;
    }
    .widget-button {
        font-size: 16px;
        font-weight: bold;
        padding: 10px 20px;
        background-color: #333;
        color: #fff;
        border-radius: 5px;
        transition: all 0.3s;
    }
    .widget-button:hover {
        background-color: #999;
        cursor: pointer;
    }
    
</style>
"""

# Create a file upload widget
upload = widgets.FileUpload(
    accept='.pdf,.docx',  # Accept only PDF and Word files
    multiple=False,  # Allow only one file to be uploaded
    description='Select file:',
    layout=widgets.Layout(width='50%')
)

# Create a dropdown widget to select the compression factor
compression_factor = widgets.Dropdown(
    options=[('All text at once', 1), ('Text between images', 2), ('Text for each paragraph or page', 3)],
    value=1,  # Set the default value to 1
    description='Compression factor:',
    disabled=False
)

# Create a button to initiate the compression process
button = widgets.Button(
    description='Compress',
    disabled=False,
    button_style='success',  # Use a green color for the button
    tooltip='Click to compress the selected file',
    icon='check'
)

# Create an output widget to display the compressed file
output = widgets.Output()


    
    
# Attach the button click event to the on_button_clicked function
button.on_click(on_button_clicked)

# Use a Box to layout the GUI elements
box = widgets.VBox([upload, compression_factor, button, output])

# Add the custom styles
HTML(style)

display(box)

VBox(children=(FileUpload(value={}, accept='.pdf,.docx', description='Select file:', layout=Layout(width='50%'…