In [None]:
# Boilerplate: This block goes into every notebook.
# It sets up the environment, installs the requirements, and checks for the required environment variables.

import os
from IPython.display import clear_output

requirements_installed = False
max_retries = 3
retries = 0
REQUIRED_ENV_VARS = ["OPENAI_API_KEY"]


def install_requirements():
    """Installs the requirements from requirements.txt file"""
    global requirements_installed
    if requirements_installed:
        print("Requirements already installed.")
        return

    print("Installing requirements...")
    install_status = os.system("pip install -r requirements.txt")
    if install_status == 0:
        print("Requirements installed successfully.")
        requirements_installed = True
    else:
        print("Failed to install requirements.")
        if retries < max_retries:
            print("Retrying...")
            retries += 1
            return install_requirements()
        exit(1)
    return


from dotenv import load_dotenv
import os


def setup_env():
    """Sets up the environment variables"""

    def check_env(env_var):
        value = os.getenv(env_var)
        if value is None:
            print(f"Please set the {env_var} environment variable.")
            exit(1)
        else:
            print(f"{env_var} is set.")

    load_dotenv()

    variables_to_check = REQUIRED_ENV_VARS

    for var in variables_to_check:
        check_env(var)


install_requirements()
setup_env()
clear_output()
print("🚀 Setup complete. Continue to the next cell.")

In [33]:
import traceback
from openai import OpenAI
import os
from uuid import uuid4

DEFAULT_OPENAI_MODEL = "gpt-4o-mini"

SIMPLE_SUMMARIZATION_SYSTEM_PROMPT = """
    You are SummarizerGPT, an advanced AI system specialized in text summarization. Your core function is to process and analyze various types of text input, preparing the groundwork for generating high-quality summaries. Your capabilities include:

    1. Text Analysis: Quickly assess the structure, style, and content of any given text.
    2. Context Recognition: Identify the domain, target audience, and purpose of the text.
    3. Language Processing: Understand and process text in multiple languages and dialects.
    4. Semantic Comprehension: Grasp complex ideas, abstract concepts, and subtle nuances in the text.
    5. Information Hierarchy: Recognize the relative importance of different pieces of information within the text.
    6. Cross-referencing: Identify and connect related ideas across different parts of the text.
    7. Bias Detection: Recognize potential biases or slants in the original text.
    8. Data Extraction: Pull out key statistics, dates, names, and other crucial data points.
    9. Tone Analysis: Understand the emotional tone and rhetorical style of the text.
    10. Multi-format Handling: Process various text formats including plain text, HTML, PDF extracts, and more.

    You do not generate the summary directly. Instead, you prepare a comprehensive analysis of the text, which will be used by the summarization module to create the final output. Your analysis should include:

    - Text type and structure
    - Main topic and key themes
    - Target audience and purpose
    - Important data points and statistics
    - Identified biases or controversial points
    - Tone and style characteristics
    - Any unique or standout elements in the text

    Await the input text, and be ready to provide this detailed analysis to support the summarization process.
"""

SIMPLE_SUMMARIZATION_PROMPT = """
    1. Analyze the input:
    - Determine the text type (article, research paper, conversation, etc.)
    - Identify the main topic and key themes
    - Assess the length and complexity of the content

    2. Generate the summary:
    - Provide a concise yet informative summary
    - Maintain the original tone and style where appropriate
    - Ensure factual accuracy and avoid introducing new information
    - Use clear, coherent language suitable for a general audience

    3. Structure the summary:
    - Begin with a brief overview of the main topic
    - Organize key points logically, using paragraphs or bullet points as appropriate
    - Conclude with the most significant takeaway or implication

    4. Adapt to specific requirements:
    - If a word/character limit is specified, adhere to it strictly
    - If the text contains technical terms, provide brief explanations
    - For multi-section documents, summarize each section separately, then provide an overall summary

    5. Handle edge cases:
    - For very short texts, provide a condensed version without losing essential information
    - For extremely long or complex texts, focus on the most crucial points and indicate that it's a high-level summary
    - If the text contains conflicting viewpoints, present them objectively without bias

    6. Enhance readability:
    - Use transition words to improve flow between ideas
    - Employ varied sentence structures to maintain engagement
    - Highlight key terms or concepts using bold text when appropriate

    7. Quality check:
    - Ensure the summary is self-contained and understandable without the original text
    - Verify that no critical information is omitted
    - Check for consistency in tense, voice, and perspective

    8. Metadata (if applicable):
    - Include the original title, author, and date of publication
    - Mention the word count of the original text and the summary

    Now, summarize the following text, adhering to the above guidelines.

    Text: '{text}'
    Respond in the format '{format}' STRICTLY.
    IF THE FORMAT IS 'plain_text', THEN RESPOND IN PLAIN TEXT ONLY, NOT MARKDOWN.
    IF THE FORMAT IS 'markdown'. DIRECTLY GIVE THE MARKDOWN. DON'T WRAP IT IN ```markdown``` tags.
"""

ITERATIVE_REFINEMENT_SYSTEM_PROMPT = """
    You are a Refinement AI specializing in improving text quality. Your task is to refine the given text based on the given instructions.
"""
ITERATIVE_REFINEMENT_PROMPT = """
   You are a Refinement AI specializing in improving text quality. Your task is to refine the given text in a single iteration. Follow these steps:

    1. Analyze the input:
    - Identify the source text and the summary
    - Assess strengths and weaknesses in content, structure, and style of the summary 

    2. Prioritize improvements:
    - Focus on 2-3 key areas that will have the most significant impact that could be made in the summary
    - Consider clarity, coherence, conciseness, and effectiveness

    3. Refine the text:
    - Make targeted improvements in the summary based on your analysis
    - Maintain the original intent and core message in the source text
    - Ensure changes enhance overall quality without introducing new issues

    4. Provide a summary of changes:
    - Briefly explain the key modifications made in the revised summary 
    - Justify your refinement decisions with clear reasoning

    5. Self-evaluate:
    - Rate the improvement on a scale of 1-10
    - Briefly explain your rating

    Source Text: '{source_text}'

    Summary to be refined: '{summary}'
    
    Respond only with the final revised summary after all improvements are made. 

    Respond in the format '{format}' STRICTLY.
    IF THE FORMAT IS 'plain_text', THEN RESPOND IN PLAIN TEXT ONLY, NOT MARKDOWN.
    IF THE FORMAT IS 'markdown'. DIRECTLY GIVE THE MARKDOWN. DON'T WRAP IT IN ```markdown``` tags.
"""


class SelfLearningSummarizer:
    """Queue-based Summarizer implementation"""

    def __init__(self, model="gpt-4o-mini"):
        self.llm = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
        self.model = model

    def get_summary(self, source_text: str, format="plain_text") -> str:
        """Generates a summary of the given text"""
        try:
            if format not in ["plain_text", "markdown"]:
                raise ValueError("Invalid format. Use 'plain_text' or 'markdown'.")
            system = SIMPLE_SUMMARIZATION_SYSTEM_PROMPT
            prompt = SIMPLE_SUMMARIZATION_PROMPT.format(text=source_text, format=format)
            messages = [
                {
                    "role": "system",
                    "content": system,
                },
                {"role": "user", "content": prompt},
            ]
            response = self.llm.chat.completions.create(
                messages=messages, model=self.model
            )
            summary = response.choices[0].message.content
            return summary
        except Exception as e:
            print(f"Failed to generate summary for {item}")
            traceback.print_exc()
            return ""

    def iterative_refinement(
        self, source_text: str, summary: str, turns=3, format="plain_text"
    ) -> str:
        """Iteratively refines the summary based on self-generated feedback for given turns."""
        session_id = str(uuid4())
        print(f"Iterative Refinement ({session_id}): Session ID: {session_id}")
        current_summary = summary
        current_turn = 1
        try:
            while current_turn <= turns:
                print(f"Iterative Refinement ({session_id}): Turn {current_turn}.")
                system = ITERATIVE_REFINEMENT_SYSTEM_PROMPT
                prompt = ITERATIVE_REFINEMENT_PROMPT.format(
                    source_text=source_text, summary=current_summary, format=format
                )
                messages = [
                    {
                        "role": "system",
                        "content": system,
                    },
                    {"role": "user", "content": prompt},
                ]
                llm_response = self.llm.chat.completions.create(
                    messages=messages, model=self.model
                )
                current_summary = llm_response.choices[0].message.content
                current_turn += 1
                print(
                    f"Iterative Refinement ({session_id}): Turn {current_turn} completed. Updated rolling summary."
                )
            return current_summary
        except Exception as e:
            print(
                f"Iterative Refinement ({session_id}): Failed to complete all turns for {source_text} and {summary}."
            )
            print(
                f"Iterative Refinement ({session_id}): Turns completed: {current_turn}"
            )
            traceback.print_exc()
            return current_summary

    def compare_summaries(self, source_text: str, summary1: str, summary2: str) -> str:
        """Compares two summaries and provides feedback on their quality."""
        try:
            print(f"Comparing summaries for {source_text}.")
            system = "You are a Comparison AI specializing in evaluating text quality. Your task is to compare two summaries and provide feedback on their quality."
            prompt = f"""
            Compare the two summaries below and provide feedback on their quality. 
            Provide score comparison for both summaries, the old summary score and the new summary score.
            This will help us compare the two summaries on various parameters.
            Refer to the source text when making your evaluation. \n\n 
            Source Text: {source_text}
            Initial Summary: {summary1} 
            Refined Summary: {summary2}
            STRICTLY PROVIDE YOUR RESPONSE AS MARKDOWN TABLE WITH SCORES AND JUSTIFICATIONS.
            """
            messages = [
                {
                    "role": "system",
                    "content": system,
                },
                {"role": "user", "content": prompt},
            ]
            response = self.llm.chat.completions.create(
                messages=messages, model=self.model
            )
            feedback = response.choices[0].message.content
            return feedback
        except Exception as e:
            print(f"Failed to compare summaries for {source_text}")
            traceback.print_exc()
            return ""

    def get(self):
        return self.q.get()

    def empty(self):
        return self.q.empty()

In [None]:
# Let's get the summary and test our prompts which seem to be solid.

summarizer = SelfLearningSummarizer()

# Credits: Arpit Bhayani
# Post Link: https://www.linkedin.com/posts/arpitbhayani_asliengineering-careergrowth-activity-7280566114894430208-tjB2?utm_source=share&utm_medium=member_desktop

text = """
When working on a new project, we engineers almost always start with the most fascinating part. But, while it's exciting for us, it's not always what's best for the project.

The easiest way to become an effective lead/manager is to break down the project into tasks and prioritize the most important items. So, it is always a good idea that before the work begins, step back and ask

1. what is the most critical piece?
2. which items are highest risk and need early attention?
3. which deliverables provide the most immediate value?

We naturally gravitate towards easily doable, less impactful, and tangential parts of the project. This happens because of a lack of a broader context. So, if you are leading a project, make sure,

1. define a clear roadmap and align it with business outcomes
2. define milestones and priorities

A good leader doesn’t micromanage but ensures that the team starts on the right foot. Check-in periodically to ensure the alignment while giving engineers ownership of their tasks.

Prioritization is what separates effective leads from those simply managing tasks. As a lead, you are not just there to oversee execution but to set the direction.
"""
format = "plain_text"

summary = summarizer.get_summary(text, format=format)
print(summary)

In [None]:
# Let's try a markdown response now
from IPython.display import Markdown, display

summarizer = SelfLearningSummarizer()
format = "markdown"

summary = summarizer.get_summary(text, format=format)
markdown_summary = Markdown(f"## Summary\n{summary}")
display(markdown_summary)

In [None]:
turns = 3
summarizer = SelfLearningSummarizer()

summary = summarizer.get_summary(text, format=format)

refined_summary = summarizer.iterative_refinement(
    text, summary, turns=turns, format=format
)

markdown_summary = Markdown(f"## Refined Summary (turns={turns})\n{refined_summary}")

display(markdown_summary)

In [None]:
from IPython.display import Markdown, clear_output

summarizer = SelfLearningSummarizer()
comparison = summarizer.compare_summaries(text, summary, refined_summary)
clear_output()
markdown = Markdown(f"## Comparison\n{comparison}")
display(markdown)