In [2]:
import openai
import os
import requests
from bs4 import BeautifulSoup
from dotenv import load_dotenv

# Load environment variables and setup
load_dotenv()
client = openai.OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

In [3]:
def get_webpage_summary(url):
    """
    Extract text from URL and generate a summary using OpenAI API
    """
    try:
        # Fetch and parse webpage content
        response = requests.get(url)
        soup = BeautifulSoup(response.content, "html.parser")
        text = " ".join([p.get_text() for p in soup.find_all("p")])
        
        # Split text into chunks of approximately 4000 characters
        chunks = [text[i:i + 4000] for i in range(0, len(text), 4000)]
        
        # Summarization prompt
        summarization_prompt = """
        Please provide a concise summary of the following text. The summary should:
        - Capture the main ideas and key points
        - Be well-organized and coherent
        - Maintain the original meaning and context
        - Exclude any redundant or unnecessary information
        
        Text to summarize:
        """
        
        # Process chunks and get summaries
        summaries = []
        for i, chunk in enumerate(chunks, 1):
            print(f"Processing part {i} of {len(chunks)}...")
            
            response = client.chat.completions.create(
                model="gpt-3.5-turbo",
                messages=[
                    {"role": "system", "content": "You are a precise and concise text summarizer."},
                    {"role": "user", "content": f"{summarization_prompt}\n{chunk}"}
                ],
                max_tokens=300,
                temperature=0.5
            )
            summaries.append(response.choices[0].message.content.strip())
        
        # Combine summaries if needed
        final_summary = "\n\n".join(summaries)
        if len(chunks) > 1:
            # Create a final, consolidated summary
            response = client.chat.completions.create(
                model="gpt-3.5-turbo",
                messages=[
                    {"role": "system", "content": "You are a precise and concise text summarizer."},
                    {"role": "user", "content": f"Please consolidate these summaries into a single coherent summary:\n{final_summary}"}
                ],
                max_tokens=300,
                temperature=0.5
            )
            final_summary = response.choices[0].message.content.strip()
            
        return final_summary
        
    except Exception as e:
        return f"An error occurred: {str(e)}"

In [5]:
input_url = input("Enter the URL to summarize: ")
print("Processing webpage...")
summary = get_webpage_summary(input_url)
print("\n--- Summary ---\n")
print(summary)

Enter the URL to summarize:  https://en.wikipedia.org/wiki/ChatGPT


Processing webpage...
Processing part 1 of 13...
Processing part 2 of 13...
Processing part 3 of 13...
Processing part 4 of 13...
Processing part 5 of 13...
Processing part 6 of 13...
Processing part 7 of 13...
Processing part 8 of 13...
Processing part 9 of 13...
Processing part 10 of 13...
Processing part 11 of 13...
Processing part 12 of 13...
Processing part 13 of 13...

--- Summary ---

ChatGPT, an AI chatbot developed by OpenAI based on the GPT-4o model, has gained over 100 million users by January 2023, contributing to OpenAI's valuation of $86 billion. It uses supervised and reinforcement learning and has partnerships with companies like Microsoft and Apple. While versatile in tasks like writing, music composition, and idea generation, ChatGPT may produce incorrect responses and exhibit algorithmic bias. It faced criticisms for generating misinformation and has been banned on some platforms. OpenAI introduced premium services like ChatGPT Plus and API access for premium users. 

# Gradio

In [6]:
import openai
import os
import requests
from bs4 import BeautifulSoup
from dotenv import load_dotenv
import gradio as gr

# Load environment variables and setup
load_dotenv()
client = openai.OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

def get_webpage_summary(url):
    """
    Extract text from URL and generate a summary using OpenAI API
    """
    try:
        # Fetch and parse webpage content
        response = requests.get(url)
        soup = BeautifulSoup(response.content, "html.parser")
        text = " ".join([p.get_text() for p in soup.find_all("p")])
        
        # Split text into chunks of approximately 4000 characters
        chunks = [text[i:i + 4000] for i in range(0, len(text), 4000)]
        
        # Summarization prompt
        summarization_prompt = """
        Please provide a concise summary of the following text. The summary should:
        - Capture the main ideas and key points
        - Be well-organized and coherent
        - Maintain the original meaning and context
        - Exclude any redundant or unnecessary information
        
        Text to summarize:
        """
        
        # Process chunks and get summaries
        summaries = []
        for i, chunk in enumerate(chunks, 1):
            print(f"Processing part {i} of {len(chunks)}...")
            
            response = client.chat.completions.create(
                model="gpt-3.5-turbo",
                messages=[
                    {"role": "system", "content": "You are a precise and concise text summarizer."},
                    {"role": "user", "content": f"{summarization_prompt}\n{chunk}"}
                ],
                max_tokens=300,
                temperature=0.5
            )
            summaries.append(response.choices[0].message.content.strip())
        
        # Combine summaries if needed
        final_summary = "\n\n".join(summaries)
        if len(chunks) > 1:
            # Create a final, consolidated summary
            response = client.chat.completions.create(
                model="gpt-3.5-turbo",
                messages=[
                    {"role": "system", "content": "You are a precise and concise text summarizer."},
                    {"role": "user", "content": f"Please consolidate these summaries into a single coherent summary:\n{final_summary}"}
                ],
                max_tokens=300,
                temperature=0.5
            )
            final_summary = response.choices[0].message.content.strip()
            
        return final_summary
        
    except Exception as e:
        return f"An error occurred: {str(e)}"

def gradio_interface(url):
    """
    Wrapper function for Gradio interface
    """
    if not url:
        return "Please enter a URL"
    
    print("Processing webpage...")
    return get_webpage_summary(url)

# Create Gradio interface
demo = gr.Interface(
    fn=gradio_interface,
    inputs=gr.Textbox(label="Enter URL", placeholder="https://example.com"),
    outputs=gr.Textbox(label="Summary", lines=10),
    title="Webpage Summarizer",
    description="Enter a URL to get a concise summary of the webpage content.",
    examples=[
        ["https://en.wikipedia.org/wiki/Artificial_intelligence"],
        ["https://www.bbc.com/news"],
    ]
)

# Launch the interface
if __name__ == "__main__":
    demo.launch(share=True)

Running on local URL:  http://127.0.0.1:7860
Running on public URL: https://c09b0bfa1a092ff233.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


Processing webpage...
Processing part 1 of 4...
Processing part 2 of 4...
Processing part 3 of 4...
Processing part 4 of 4...
