https://newsletter.theaiedge.io/p/deep-dive-how-to-automate-writing

# Getting the top news

# The NewsAPI API

In [1]:
import os
from newsapi import NewsApiClient

NEWS_API_JSON_API_KEY = os.environ.get("NEWS_API_JSON_API_KEY")

# TODO Error when creating an account
newsapi = NewsApiClient(api_key=NEWS_API_JSON_API_KEY)

everything = newsapi.get_everything(
    q="artifical intelligence",
    from_param="2023-06-19",
    sort_by="relevancy",
    language="en",
)

everything["articles"][0]

NewsAPIException: {'status': 'error', 'code': 'apiKeyMissing', 'message': 'Your API key is missing. Append this to the URL with the apiKey param, or use the x-api-key HTTP header.'}

In [2]:
# Temporary workaround
# load fake_news data from a json file fake_news.json
import json

fake_news = json.load(open("fake_news.json"))

everything = fake_news

title_list = "\n".join([
    article["title"] for article in everything["articles"]
])

title_list

"Amazon plans to invest another $15 billion in India by 2030\nThis week in AI: Big tech bets billions on machine learning tools\nHow one software engineer is turning Silicon Valley Bank's collapse story into a musical\nDeal Dive: Caraway shows what else digital health can do\nHacker responsible for 2020 Twitter breach sentenced to prison\nWhat happens if regulators nix the $20B Adobe-Figma deal?\nOceanGate fires a whistleblower, hackers threaten to leak Reddit data, and Marvel embraces AI art\nWhy smart AI regulation is vital for innovation and U.S. leadership\nTechCrunch+ roundup: AI + travel, fusion investor survey, why you'll never get funding\nKeeping tabs on dry powder and university spinouts"

# Selecting the top news

Use GPT to identify the most 3 most interesting articles to focus on.

In [3]:
import os
from langchain import PromptTemplate
from langchain.chains import LLMChain
from langchain.chat_models import ChatOpenAI

OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")

top_news_tempalte = """
extract from the following list the 10 most important news about machine learning. Return the answer as a python list. 

For example: ["text 1", "text 2", "text 3"]

{list}
"""

TOP_NEWS_PROMPT = PromptTemplate(
    input_variables=["list"],
    template=top_news_tempalte,
)

llm = ChatOpenAI(openai_api_key=OPENAI_API_KEY)
chain = LLMChain(
    llm=llm,
    prompt=TOP_NEWS_PROMPT,
)


In [4]:
import ast

# We send the prompt to the LLM
top_str = chain.run(title_list)
print(top_str)

# And we convert the result as a python list
top_list = ast.literal_eval(top_str)
top_list

["This week in AI: Big tech bets billions on machine learning tools", "Hacker responsible for 2020 Twitter breach sentenced to prison", "Why smart AI regulation is vital for innovation and U.S. leadership", "TechCrunch+ roundup: AI + travel, fusion investor survey, why you'll never get funding", "Keeping tabs on dry powder and university spinouts", "Amazon plans to invest another $15 billion in India by 2030", "What happens if regulators nix the $20B Adobe-Figma deal?", "OceanGate fires a whistleblower, hackers threaten to leak Reddit data, and Marvel embraces AI art", "Deal Dive: Caraway shows what else digital health can do"]


['This week in AI: Big tech bets billions on machine learning tools',
 'Hacker responsible for 2020 Twitter breach sentenced to prison',
 'Why smart AI regulation is vital for innovation and U.S. leadership',
 "TechCrunch+ roundup: AI + travel, fusion investor survey, why you'll never get funding",
 'Keeping tabs on dry powder and university spinouts',
 'Amazon plans to invest another $15 billion in India by 2030',
 'What happens if regulators nix the $20B Adobe-Figma deal?',
 'OceanGate fires a whistleblower, hackers threaten to leak Reddit data, and Marvel embraces AI art',
 'Deal Dive: Caraway shows what else digital health can do']

In [5]:
# Match titles with the related URLs
top_news = [
    {
        "title": article["title"],
        "url": article["url"],
    }
    for article in everything["articles"]
    if article["title"] in top_list
]

top_news

[{'title': 'Amazon plans to invest another $15 billion in India by 2030',
  'url': 'https://techcrunch.com/2023/06/23/amazon-investment-india-jassy-modi/'},
 {'title': 'This week in AI: Big tech bets billions on machine learning tools',
  'url': 'https://techcrunch.com/2023/06/24/this-week-in-ai-big-tech-bets-billions-on-machine-learning-tools/'},
 {'title': 'Deal Dive: Caraway shows what else digital health can do',
  'url': 'https://techcrunch.com/2023/06/24/caraway-digital-health-fundraising/'},
 {'title': 'Hacker responsible for 2020 Twitter breach sentenced to prison',
  'url': 'https://techcrunch.com/2023/06/23/twitter-hacker-sentenced-prison/'},
 {'title': 'What happens if regulators nix the $20B Adobe-Figma deal?',
  'url': 'https://techcrunch.com/2023/06/23/adobe-figma-deal-regulators/'},
 {'title': 'OceanGate fires a whistleblower, hackers threaten to leak Reddit data, and Marvel embraces AI art',
  'url': 'https://techcrunch.com/2023/06/24/oceangate-fires-a-whistleblower-hac

# Summarizing the news

## Extracting the news text

In [6]:
from bs4 import BeautifulSoup
from urllib.request import urlopen

def get_text_from_articles(url):
    page = urlopen(url)
    html = page.read().decode("utf-8")
    soup = BeautifulSoup(html, "html.parser")

    return soup.get_text()

for news_dict in top_news:
    try:
        article_content = get_text_from_articles(news_dict["url"])
        news_dict["content"] = article_content
    except:
        news_dict["content"] = None

## Summarizing the content

Stuff strategy
Combine strategy
Refine strategy

In [10]:
import os
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains.summarize import load_summarize_chain

def get_summary(text):
    OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")

    combine_template = """
    Your job is to produce a concise summary of news article
    Provide the important points and explain why they matter
    The summary is for a blog so it has to be exciting to read

    Write a concise summary of the following:

    {text}

    CONCISE SUMMARY:
    """

    combine_prompt = PromptTemplate(
        template=combine_template,
        input_variables=["text"],
    )

    chart_text_splitter = CharacterTextSplitter(
        chunk_size=1000,
        chunk_overlap=0,
    )
    
    # Split the content into multiple chunks
    documents = chart_text_splitter.create_documents([text])
    
    # Load the chain with ChatGPT
    llm = ChatOpenAI(openai_api_key=OPENAI_API_KEY)
    chain = load_summarize_chain(
        llm=llm,
        chain_type="map_reduce",
        combine_prompt=combine_prompt,
    )

    summary = chain.run(documents)
    
    return summary

for news_dict in top_news:
    try:
        summary = get_summary(news_dict["content"])
        news_dict["summary"] = summary
    except Exception as e:
        news_dict["summary"] = None

print(top_news[0]["summary"])

Created a chunk of size 2868, which is longer than the specified 1000
Created a chunk of size 2609, which is longer than the specified 1000
Created a chunk of size 3233, which is longer than the specified 1000
Created a chunk of size 1873, which is longer than the specified 1000
Created a chunk of size 2280, which is longer than the specified 1000
Created a chunk of size 3814, which is longer than the specified 1000
Created a chunk of size 2757, which is longer than the specified 1000
Created a chunk of size 5546, which is longer than the specified 1000
Created a chunk of size 1384, which is longer than the specified 1000
Created a chunk of size 1297, which is longer than the specified 1000


Amazon CEO Andy Jassy plans to invest an additional $15bn in India by 2030, with most of the funds earmarked for expanding Amazon Web Services. This doubles the company's investment in the country, and follows a meeting with Indian Prime Minister Narendra Modi. Amazon and other major tech companies such as Walmart and Google are making significant investments in India, which could boost the country's tech industry and create jobs.


In [13]:
for news_dict in top_news:
    # Save each news summary to a markdown file with the title of the article
    with open(f"summaries/{news_dict['title']}.md", "w") as f:
        f.write(news_dict["summary"])


# Generating images with Stable Diffusion

## The Stable Diffusion API

In [8]:
import os
import requests
import json

STABLEDIFFUSION_API_KEY = os.environ.get("STABLEDIFFUSION_API_KEY")

url = "https://stablediffusionapi.com/api/v3/text2img"

data = {
    "key": STABLEDIFFUSION_API_KEY,
    "width": "1024",
    "height": "512",
    "prompt": "A man on the mooon",
}

headers = {
    "Content-Type": "application/json",
}

response = requests.post(url, json=data, headers=headers)

response.json()

{'status': 'success',
 'generationTime': 1.4018619060516357,
 'id': 23877262,
 'output': ['https://cdn.stablediffusionapi.com/generations/a83ed746-77b8-4c46-aa0d-04fb1bfa6bca-0.png'],
 'meta': {'H': 512,
  'W': 1024,
  'enable_attention_slicing': 'true',
  'file_prefix': 'a83ed746-77b8-4c46-aa0d-04fb1bfa6bca',
  'guidance_scale': 7,
  'model': 'runwayml/stable-diffusion-v1-5',
  'n_samples': 1,
  'negative_prompt': '',
  'outdir': 'out',
  'prompt': 'A man on the mooon',
  'revision': 'fp16',
  'safetychecker': 'no',
  'seed': 2870945865,
  'steps': 20,
  'vae': 'stabilityai/sd-vae-ft-mse'}}

In [9]:
# Download the image
image_url = response.json()["output"][0]
output_path = "man_on_the_moon.png"

img_data = requests.get(image_url).content

with open(output_path, "wb") as f:
    f.write(img_data)


## Generating images for the summaries

In [15]:
def submit_post(url, data):
    headers = {
        "Content-Type": "application/json",
    }

    return requests.post(url, json=data, headers=headers)

def save_image(image_url, output_path):
    img_data = requests.get(image_url).content
    with open(output_path, "wb") as f:
        f.write(img_data)

for news in top_news:
    data["prompt"] = news["summary"]
    file_name = "./summaries/" + news["title"] + ".png"

    try:
        response = submit_post(url, data)
        save_image(response.json()["output"][0], file_name)
    except Exception as e:
        print(e)


list index out of range
list index out of range


# Generating a title and abstract

## The abstract

In [18]:
abstract_template = """
Use the following article summaries to generate a 2 or 3 sentences abstract for a blog.

Those articles are the most impactful news of the past week May 22nd 2023 to May 30th 2023.

{summaries}
"""

ABSTRACT_PROMPT = PromptTemplate(
    input_variables=["summaries"],
    template=abstract_template,
)

llm = ChatOpenAI()
chain = LLMChain(
    llm=llm,
    prompt=ABSTRACT_PROMPT,
)

summaries = "\n\n".join([
    news["title"] + "\n" + news["summary"]
    for news in top_news
])

abstract = chain.run(summaries)

# Save abstract in summaries/abstract.md
with open("summaries/abstract.md", "w") as f:
    f.write(abstract)

## The title

For the title, a simple chain with work

In [19]:
title_template = """
Use the following article summaries to generate a title for a blog about tech crunch news.
Those articles are the most impactful news of the past week from May 22nd 2023 to May 30th 2023.
The title need to be appealing such that people are excited to read the blog.

{summaries}
"""

TITLE_PROMPT = PromptTemplate(
    input_variables=["summaries"],
    template=title_template,
)

llm = ChatOpenAI()
chain = LLMChain(
    llm=llm,
    prompt=TITLE_PROMPT,
)

title = chain.run(summaries)

# Save title in summaries/title.md
with open("summaries/title.md", "w") as f:
    f.write(title)