# Medium Blog Summarizer - With OpenAI API

In [None]:
# imports
import os
import requests
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display
from openai import OpenAI


In [None]:
# Load environment variables in a file called .env

load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')

# Check the key
if not api_key:
    print("No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!")
elif not api_key.startswith("sk-proj-"):
    print("An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook")
elif api_key.strip() != api_key:
    print("An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook")
else:
    print("API key found and looks good so far!")


In [None]:
openai = OpenAI()

In [None]:
# A class to represent a Webpage
# Some websites need you to use proper headers when fetching them:
headers = {
 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

class Website:

    def __init__(self, url):
        """
        Create this Website object from the given url using the BeautifulSoup library
        """
        self.url = url
        response = requests.get(url, headers=headers)
        soup = BeautifulSoup(response.content, 'html.parser')
        self.title = soup.title.string if soup.title else "No title found"
        for irrelevant in soup.body(["script", "style", "img", "input"]):
            irrelevant.decompose()
        self.text = soup.body.get_text(separator="\n", strip=True)

In [None]:

def user_prompt_for_blog(website):
    user_prompt = f"You are looking at a blog titled {website.title}"
    user_prompt += "\nThe contents of this blog is as follows; \
please provide a summary of this blog in markdown. \
If it includes references any research papers and github links, please provide their links also.\n\n"
    user_prompt += website.text
    return user_prompt


def summarize_medium_blog(url, system_prompt, model="gpt-4o-mini"):
    website = Website(url)
    response = openai.chat.completions.create(
        model = model,
        messages = [{"role": "system", "content": system_prompt},
           {"role": "user", "content": user_prompt_for_blog(website)}]
    )
    return response.choices[0].message.content




In [None]:
# System prompt
system_prompt_guru = "You are a scientific assistant. You have to help summarize a blog post.\
Please tell about the most important findings and anything the author emphasizes.\
Please provide all the important links mentioned. \
Give the output in markdown."

# Blog URL
guru_blog = "https://gurudeep1998.medium.com/w-net-a-deep-model-for-fully-unsupervised-image-segmentation-reproduction-2651540eaed6"
#guru_blog = Website("https://medium.com/write-a-catalyst/you-are-fired-now-80458d77205a")

In [None]:
display(Markdown(summarize_medium_blog(guru_blog, system_prompt_guru, "gpt-4o-mini")))