<a href="https://colab.research.google.com/github/Chae-sang-jun/2024-2_Ajou_Softcon_Self_Project/blob/main/Intelligent_Document_Summary_Softcon_%EC%A0%9C%EC%B6%9C%EC%9A%A9_202020679_%EC%B1%84%EC%83%81%EC%A4%80.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Install required dependencies
!pip install gradio==4.44.1 transformers openai deep-translator langdetect PyPDF2 git+https://github.com/openai/whisper.git

# Import libraries
import gradio as gr
from transformers import pipeline
import whisper
from deep_translator import GoogleTranslator
from langdetect import detect
from PyPDF2 import PdfReader

# Initialize tools
translator = GoogleTranslator(source="auto", target="en")
translator_ko = GoogleTranslator(source="auto", target="ko")
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
qa_model = pipeline("question-answering")  # Hugging Face QA pipeline
whisper_model = whisper.load_model("base")

# Utility Functions
def preprocess_text(text):
    """Clean and preprocess the text."""
    return text.strip()

def save_to_file(content, filename):
    """Save the content to a file."""
    try:
        with open(filename, "w", encoding="utf-8") as f:
            f.write(content)
        return f"File saved as {filename}"
    except Exception as e:
        return f"Error saving file: {e}"

def split_text(text, max_length):
    """Split text into chunks of a maximum token length."""
    chunks = []
    while len(text) > max_length:
        split_index = text[:max_length].rfind(". ")  # Split at the nearest sentence boundary
        if split_index == -1:
            split_index = max_length
        chunks.append(text[:split_index + 1])
        text = text[split_index + 1:].strip()
    chunks.append(text)
    return chunks

def summarize_long_text(text):
    """Summarize text that exceeds the model's maximum input length."""
    max_input_length = 1024  # BART's max token length
    text_chunks = split_text(text, max_input_length)
    summarized_chunks = [summarizer(chunk, max_length=150, min_length=50, do_sample=False)[0]["summary_text"] for chunk in text_chunks]
    return " ".join(summarized_chunks)

def extract_text_from_pdf(file):
    """Extract text from a PDF file."""
    try:
        reader = PdfReader(file.name)
        text = ""
        for page in reader.pages:
            text += page.extract_text()
        return preprocess_text(text)
    except Exception as e:
        return f"Error reading PDF file: {e}"

def text_summarization_workflow(file):
    """Summarize text from the uploaded file."""
    try:
        if file.name.endswith(".pdf"):
            # Handle PDF files
            original_text = extract_text_from_pdf(file)
        else:
            # Handle plain text files
            with open(file.name, "r", encoding="utf-8") as f:
                original_text = f.read()

        original_text = preprocess_text(original_text)
        summarized_text = summarize_long_text(original_text)
        return original_text, summarized_text, ""
    except Exception as e:
        return "", "", f"Error processing text: {e}"

def stt_summarization_workflow(audio):
    """Transcribe and summarize text from the uploaded audio file."""
    try:
        # Transcribe audio using Whisper
        transcription_result = whisper_model.transcribe(audio)
        stt_text = transcription_result["text"]

        # Detect language of the transcribed text
        detected_lang = detect(stt_text)

        if detected_lang == "ko":
            # Translate Korean text to English
            stt_text_en = translator.translate(stt_text, target="en")

            # Summarize the translated English text
            summarized_text_en = summarize_long_text(stt_text_en)

            # Translate the summarized text back to Korean
            summarized_text_ko = translator_ko.translate(summarized_text_en, target="ko")
            return stt_text, summarized_text_ko, ""
        else:
            # Directly summarize if the text is in English
            summarized_text = summarize_long_text(stt_text)
            return stt_text, summarized_text, ""
    except Exception as e:
        return "", "", f"Error processing audio: {e}"

def generate_chat_response(context, question):
    """Generate a chatbot response using Hugging Face QA pipeline."""
    try:
        # Detect the question language
        lang = detect(question)

        if lang == "ko":
            # If the question is in Korean, translate to English
            question_en = translator.translate(question, target="en")
            # Generate the response in English
            response = qa_model(question=question_en, context=context)
            # Translate the response back to Korean
            response_ko = translator_ko.translate(response['answer'], target="ko")
            return response_ko
        else:
            # If the question is in English, process directly
            response = qa_model(question=question, context=context)
            return response['answer']
    except Exception as e:
        return f"Error generating response: {e}"

# Gradio Interface
def create_interface():
    with gr.Blocks() as app:
        # Initial Screen
        with gr.Row():
            gr.Markdown("## Welcome! Choose a Workflow:")
            btn_text_summary = gr.Button("Text Summarization")
            btn_stt_summary = gr.Button("Speech to Text + Summarization")

        # Text Summarization Workflow
        with gr.Row(visible=False) as text_summary_section:
            gr.Markdown("### Text Summarization Workflow")
            text_file = gr.File(label="Upload Text File (PDF or TXT)")
            original_text_display = gr.Textbox(label="Original Text", lines=10)
            summarized_text_display = gr.Textbox(label="Summarized Text", lines=10)
            btn_save_summary = gr.Button("Save Summarized Text")
            btn_proceed_chatbot = gr.Button("Proceed to Chatbot")

        # Speech-to-Text and Summarization Workflow
        with gr.Row(visible=False) as stt_summary_section:
            gr.Markdown("### Speech to Text + Summarization Workflow")
            audio_file = gr.Audio(label="Upload Audio File", type="filepath")
            stt_text_display = gr.Textbox(label="Transcribed Text", lines=10)
            summarized_text_display_stt = gr.Textbox(label="Summarized Text", lines=10)
            btn_save_stt = gr.Button("Save Transcribed Text")
            btn_save_summary_stt = gr.Button("Save Summarized Text")
            btn_proceed_chatbot_stt = gr.Button("Proceed to Chatbot")

        # Chatbot Section
        with gr.Row(visible=False) as chatbot_section:
            gr.Markdown("### Chatbot Interaction (한국어/영어 지원)")
            chatbot_context_display = gr.Textbox(label="Chatbot Context", lines=10, interactive=False)
            chatbot_user_input = gr.Textbox(label="Ask a Question (English or Korean)", placeholder="Enter your question...")
            chatbot_response_display = gr.Textbox(label="Chatbot Response", lines=5)
            btn_chatbot_submit = gr.Button("Submit Question")

        # Navigation Logic
        def show_text_summary():
            return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False)

        def show_stt_summary():
            return gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)

        def show_chatbot(context):
            return gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), context

        btn_text_summary.click(show_text_summary, outputs=[text_summary_section, stt_summary_section, chatbot_section])
        btn_stt_summary.click(show_stt_summary, outputs=[text_summary_section, stt_summary_section, chatbot_section])
        btn_proceed_chatbot.click(
            lambda text: show_chatbot(text),
            inputs=[summarized_text_display],
            outputs=[text_summary_section, stt_summary_section, chatbot_section, chatbot_context_display],
        )
        btn_proceed_chatbot_stt.click(
            lambda text: show_chatbot(text),
            inputs=[summarized_text_display_stt],
            outputs=[text_summary_section, stt_summary_section, chatbot_section, chatbot_context_display],
        )

        # Workflow Logic
        text_file.change(
            text_summarization_workflow,
            inputs=[text_file],
            outputs=[original_text_display, summarized_text_display],
        )
        btn_save_summary.click(lambda text: save_to_file(text, "summary.txt"), inputs=[summarized_text_display], outputs=[])

        audio_file.change(
            stt_summarization_workflow,
            inputs=[audio_file],
            outputs=[stt_text_display, summarized_text_display_stt],
        )
        btn_save_stt.click(lambda text: save_to_file(text, "stt_text.txt"), inputs=[stt_text_display], outputs=[])
        btn_save_summary_stt.click(lambda text: save_to_file(text, "summarized_stt.txt"), inputs=[summarized_text_display_stt], outputs=[])

        btn_chatbot_submit.click(
            generate_chat_response,
            inputs=[chatbot_context_display, chatbot_user_input],
            outputs=[chatbot_response_display],
        )

    return app

# Launch the application
create_interface().launch(share=True, debug=True)


Collecting git+https://github.com/openai/whisper.git
  Cloning https://github.com/openai/whisper.git to /tmp/pip-req-build-79jz2z3f
  Running command git clone --filter=blob:none --quiet https://github.com/openai/whisper.git /tmp/pip-req-build-79jz2z3f
  Resolved https://github.com/openai/whisper.git to commit 173ff7dd1d9fb1c4fddea0d41d704cfefeb8908c
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
No model was supplied, defaulted to distilbert/distilbert-base-cased-distilled-squad and revision 564e9b5 (https://huggingface.co/distilbert/distilbert-base-cased-distilled-squad).
Using a pipeline without specifying a model name and revision in production is not recommended.
  checkpoint = torch.load(fp, map_location=device)


Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
Running on public URL: https://030d81386297c6362a.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://030d81386297c6362a.gradio.live


