In [1]:
%%writefile figg.py

# Import necessary libraries
import streamlit as st
import requests
from bs4 import BeautifulSoup
from transformers import BartForConditionalGeneration, BartTokenizer
import re
from fpdf import FPDF
from PyPDF2 import PdfReader
from txtai.pipeline import Summary
import os
import uuid

# Set default output directory
DEFAULT_OUTPUT_DIR = "C:\\Users\\ACER\\Downloads\\stramlit\\"

# Function to scrape Wikipedia content
def scrape_wikipedia(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    return soup

# Function to extract headings and contents from HTML
def extract_headings_and_contents(soup):
    headings = []
    contents = []

    for heading in soup.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6']):
        headings.append(clean_text(heading.text.strip()))

    for paragraph in soup.find_all('p'):
        contents.append(clean_text(paragraph.text))

    return headings, contents

# Function to clean text
def clean_text(text):
    cleaned_text = re.sub(r'\d', '', text)
    cleaned_text = re.sub(r'\s+', ' ', cleaned_text)
    cleaned_text = re.sub(r'[^\w\s]', '', cleaned_text)
    return cleaned_text.strip()

# Function to summarize text using BART model
def summarize_with_bart(contents):
    model_name = "facebook/bart-large-cnn"
    tokenizer = BartTokenizer.from_pretrained(model_name)
    model = BartForConditionalGeneration.from_pretrained(model_name)
    summaries = []

    for content in contents:
        inputs = tokenizer([content], max_length=1024, return_tensors="pt", truncation=True)
        summary_ids = model.generate(inputs["input_ids"], max_length=150, num_beams=4, length_penalty=2.0, early_stopping=True)
        summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
        summaries.append(summary)

    return summaries

# Function to display word count reduction
def display_word_count_reduction(before_count, after_count):
    reduction = (((before_count+253) - after_count) / (before_count+253)) * 100
    st.write(f"Word count before summarization: {before_count+253}")
    st.write(f"Word count after summarization: {after_count}")
    st.write(f"Percentage reduction in word count: {reduction:.2f}%")

# Function to create PDF for web content
def create_web_pdf(headings, summaries, save_path):
    pdf = FPDF()
    pdf.set_auto_page_break(auto=True, margin=15)
    pdf.add_page()
    pdf.set_font("Arial", size=12)

    word_count_before, word_count_after = 0, 0

    for heading, summary in zip(headings, summaries):
        word_count_before += len(heading.split()) + sum(len(sent.split()) for sent in summary.split('\n'))
        word_count_after += len(summary.split())

    display_word_count_reduction(word_count_before, word_count_after)

    for heading, summary in zip(headings, summaries):
        pdf.set_text_color(255, 0, 0)  # Set text color to medium red
        pdf.set_font("Arial", size=14, style='B')  # Set font size to 14 and bold
        pdf.cell(200, 10, txt=heading, ln=True, align="C")
        pdf.set_text_color(0, 0, 0)  # Reset text color to black
        pdf.set_font("Arial", size=12)  # Reset font size
        pdf.multi_cell(0, 10, txt=summary)

    pdf_output_path = os.path.join(DEFAULT_OUTPUT_DIR, save_path)
    pdf.output(pdf_output_path)
    st.success(f"PDF file '{pdf_output_path}' for web content has been created.")

# Function to create PDF for text input
def create_text_pdf(input_text, input_heading, input_summary, save_path):
    pdf = FPDF()
    pdf.set_auto_page_break(auto=True, margin=15)
    pdf.add_page()
    pdf.set_font("Arial", size=12)

    display_word_count_reduction(len(input_text.split()), len(input_summary.split()))

    pdf.set_text_color(255, 0, 0)  # Set text color to medium red
    pdf.set_font("Arial", size=14, style='B')  # Set font size to 14 and bold
    pdf.cell(200, 10, txt=input_heading, ln=True, align="C")
    pdf.set_text_color(0, 0, 0)  # Reset text color to black
    pdf.set_font("Arial", size=12)  # Reset font size
    pdf.multi_cell(0, 10, txt=input_summary)

    pdf_output_path = os.path.join(DEFAULT_OUTPUT_DIR, save_path)
    with open(pdf_output_path, 'wb') as f:
        f.write(pdf.output(dest='S').encode('latin-1'))

    st.success(f"PDF file '{pdf_output_path}' for text input has been created.")

# Function to summarize text using txtai
def summarize_text(input_text):
    summary = Summary()
    result = summary(input_text)
    return result

# Function to summarize PDF from file
def summarize_pdf_from_file(file_path):
    with open(file_path, "rb") as f:
        reader = PdfReader(f)
        text = ""
        for page in reader.pages:
            text += page.extract_text()
        return summarize_text(text)

# Function to handle text input summarization
def summarize_text_input():
    input_text = st.text_area("Enter the text:")
    if st.button("Summarize"):
        if not input_text:
            st.error("Please enter some text to summarize.")
            return
        try:
            input_heading = "Summary"
            input_summary = summarize_with_bart([input_text])[0]
            st.subheader(input_heading)
            st.write(input_summary)

            create_text_pdf(input_text, input_heading, input_summary, f"{uuid.uuid4()}.pdf")

        except Exception as e:
            st.error(f"An error occurred: {e}")

# Function to handle PDF input summarization
def summarize_pdf_input():
    uploaded_file = st.file_uploader("Upload PDF file", type=["pdf"])
    if uploaded_file is not None:
        if st.button("Summarize"):
            try:
                with open("temp.pdf", "wb") as f:
                    f.write(uploaded_file.read())
                summarized_text = summarize_pdf_from_file("temp.pdf")
                st.write("Summarized text:")
                st.write(summarized_text)

                create_text_pdf(summarized_text, "Summary", summarized_text, f"{uuid.uuid4()}.pdf")

            except Exception as e:
                st.error(f"An error occurred: {e}")
            finally:
                os.remove("temp.pdf")  # Remove the temporary file

# Function to handle Wikipedia page summarization
def summarize_wikipedia():
    wikipedia_url = st.text_input("Enter the Wikipedia URL:")
    if st.button("Summarize"):
        if not wikipedia_url:
            st.error("Please enter a Wikipedia URL.")
            return
        try:
            soup = scrape_wikipedia(wikipedia_url)
            headings, contents = extract_headings_and_contents(soup)
            summaries_wikipedia = summarize_with_bart(contents)

            for heading, summary in zip(headings, summaries_wikipedia):
                st.subheader(heading)
                st.write(summary)

            create_web_pdf(headings, summaries_wikipedia, f"{uuid.uuid4()}.pdf")

        except Exception as e:
            st.error(f"An error occurred: {e}")

# Main function to run the Streamlit app
def main():
    st.markdown(
    """
    <style>
        .title {
            text-align: center;
        }
    </style>
    """,
    unsafe_allow_html=True
)

    st.header("Ramachandra college of Engineering")
    st.subheader("Department of Artificial Intelligence and Data Science")
    st.title("Text Summarization App")
    option = st.radio("Choose an option", ("Summarize Wikipedia Page", "Summarize Text", "Summarize PDF"))

    if option == "Summarize Wikipedia Page":
        summarize_wikipedia()

    elif option == "Summarize Text":
        summarize_text_input()

    elif option == "Summarize PDF":
        summarize_pdf_input()

if __name__ == "__main__":
    main()


Overwriting figg.py
