<a href="https://colab.research.google.com/github/AbhishekRP2002/Artificial-Intelligence-Deep-Learning-Machine-Learning-Tutorials/blob/master/HN_Q%26A_using_SearchAPI.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
!pip install openai



In [7]:
# Dependencies
from datetime import date, timedelta  # date handling for fetching recent news
from IPython import display  # for pretty printing
import json  # for parsing the JSON api responses and model outputs
from numpy import dot  # for cosine similarity
import openai  # for using GPT and getting embeddings
import os  # for loading environment variables
import requests  # for making the API requests
from tqdm.notebook import tqdm  # for printing progress bars

# Load environment variables
news_api_key = os.getenv("NEWS_API_KEY")

GPT_MODEL = "gpt-3.5-turbo"


# Helper functions
def json_gpt(input: str):
    completion = openai.ChatCompletion.create(
        model=GPT_MODEL,
        messages=[
            {"role": "system", "content": "Your are an intelligent AI Programmer. Your goal is to Output only valid JSON"},
            {"role": "user", "content": input},
        ],
        temperature=0.5,
    )

    text = completion.choices[0].message.content
    parsed = json.loads(text)

    return parsed


def embeddings(input: list[str]) -> list[list[str]]:
    response = openai.Embedding.create(model="text-embedding-ada-002", input=input)
    return [data.embedding for data in response.data]

In [8]:
# User asks a question
USER_QUESTION = "How can I use language models for my AI start up ?"

In [9]:
QUERIES_INPUT = f"""
You have access to a search API that returns recent news articles and information.
Generate an array of search queries that are relevant to this question.
Use a variation of related keywords for the queries, trying to be as general as possible.
Include as many queries as you can think of, including and excluding terms.
For example, include queries like ['keyword_1 keyword_2', 'keyword_1', 'keyword_2'].
Be creative. The more queries you include, the more likely you are to find relevant results.

User question: {USER_QUESTION}

Format: {{"queries": ["query_1", "query_2", "query_3"]}}
"""

queries = json_gpt(QUERIES_INPUT)["queries"]

# Let's include the original question as well for good measure
queries.append(USER_QUESTION)

queries

['language models AI start up',
 'using language models for AI start up',
 'language models for AI startups',
 'incorporating language models in AI start ups',
 'applying language models to AI start ups',
 'language models and AI start ups',
 'utilizing language models in AI start ups',
 'leveraging language models for AI start ups',
 'language models for AI companies',
 'using language models for AI companies',
 'language models and AI companies',
 'incorporating language models in AI companies',
 'applying language models to AI companies',
 'utilizing language models in AI companies',
 'leveraging language models for AI companies',
 'language models for AI businesses',
 'using language models for AI businesses',
 'language models and AI businesses',
 'incorporating language models in AI businesses',
 'applying language models to AI businesses',
 'utilizing language models in AI businesses',
 'leveraging language models for AI businesses',
 'How can I use language models for my AI sta

In [10]:
import requests

def search_hacker_news(query: str, num_articles: int = 50, sort_by: str = "relevance", page: int = 0) -> dict:
    base_url = "http://hn.algolia.com/api/v1/search"
    params = {
        "query": query,
        "page": page,
    }

    if sort_by == "relevance":
        params["tags"] = "story"
    elif sort_by == "date":
        params["tags"] = "story"
        params["numericFilters"] = "created_at_i>0"
        params["sortBy"] = "created_at"

    response = requests.get(base_url, params=params)

    return response.json()


In [11]:
articles = []

for query in tqdm(queries):
    result = search_hacker_news(query, num_articles=50, sort_by="relevance")
    if "hits" in result:
        articles += result["hits"]
    else:
        raise Exception("No hits found in the response.")

# Remove duplicates based on the "objectID" field
unique_articles = {article["objectID"]: article for article in articles}.values()

print("Total number of articles:", len(unique_articles))
print("Top 5 articles of query 1:", "\n")

for article in list(unique_articles)[:5]:
    print("Title:", article["title"])
    print("URL:", article["url"])
    print("Points:", article["points"])
    print("Number of Comments:", article["num_comments"])
    print()

  0%|          | 0/23 [00:00<?, ?it/s]

Total number of articles: 50
Top 5 articles of query 1: 

Title: Ask HN: Fastest way to get funding for a startup with the queue of early users?
URL: None
Points: 2
Number of Comments: 1

Title: Ask HN: How do I thrive in an AI dominated future?
URL: None
Points: 32
Number of Comments: 55

Title: Ask HN: Are Large Language Models Like GPT-3 a Hype?
URL: None
Points: 3
Number of Comments: 6

Title: Ask HN: Why doesn't Apple take AI more seriously?
URL: None
Points: 2
Number of Comments: 2

Title: Show HN: Retinello – An AI-Driven Learning Platform with Custom Context
URL: https://retinello.com/
Points: 1
Number of Comments: 0



In [12]:
HA_INPUT = f"""
Generate a hypothetical answer to the user's question. This answer will be used to rank search results.
Pretend you have all the information you need to answer, but don't use any actual facts. Instead, use placeholders
like NAME did something, or NAME said something at PLACE.

User question: {USER_QUESTION}

Format: {{"hypotheticalAnswer": "hypothetical answer text"}}
"""

hypothetical_answer = json_gpt(HA_INPUT)["hypotheticalAnswer"]

hypothetical_answer


'You can use language models for your AI start up by incorporating them into your natural language processing algorithms. This will enable your AI system to understand and generate human-like text, enhancing its ability to communicate with users and provide more accurate and personalized responses. Additionally, language models can be used for tasks such as sentiment analysis, text summarization, and language translation, which can further enhance the capabilities of your AI start up.'

In [13]:
hypothetical_answer_embedding = embeddings(hypothetical_answer)[0]
article_embeddings = embeddings(
    [
        f"{article['title']} {article['comment_text']} {article['story_text']}"
        for article in articles
    ]
)

# Calculate cosine similarity
cosine_similarities = []
for article_embedding in article_embeddings:
    cosine_similarities.append(dot(hypothetical_answer_embedding, article_embedding))

cosine_similarities[0:5]


[0.7837597776086492,
 0.8381212926361241,
 0.8348917042393856,
 0.853983530121543,
 0.7875939764093149,
 0.7634125568767332,
 0.8046866502043402,
 0.7914060086584567,
 0.7904447591200037,
 0.8214736297243437]

In [18]:
scored_articles = zip(articles, cosine_similarities)

# Sort articles by cosine similarity
sorted_articles = sorted(scored_articles, key=lambda x: x[1], reverse=True)

# Print top 5 articles
print("Top 5 articles:", "\n")

for article, score in sorted_articles[0:5]:
    print("Title:", article["title"])
    print("Description:", article["story_text"])
    print("Comment:", article["comment_text"])
    print("Score:", score)
    print()


Top 5 articles: 

Title: Ask HN: What are you building with the current wave of AI and LLMs?
Description: With the recent advancements in AI and large language models (LLMs) like GPT-4, I&#x27;m curious to learn what kind of projects you all have been working on utilizing these powerful tools.<p>It&#x27;s clear that we&#x27;re in the midst of a transformative period for AI, and the possibilities for its applications seem endless.<p>Personally, I&#x27;ve been experimenting with GPT-4 to create a platform that generates content for niche blogs, which has shown promising results so far. The ability of LLMs to understand context and generate relevant, high-quality content has made the process of content creation far more efficient.<p>Here are a few areas I&#x27;ve seen people leveraging AI and LLMs:<p>1. Personalized learning platforms: Utilizing AI to create tailored educational content for students based on their strengths and weaknesses.<p>2. Sentiment analysis: Companies analyzing cust

In [19]:
formatted_top_results = [
    {
        "title": article["title"],
        "description": article["story_text"],
        "comment": article["comment_text"],
    }
    for article, _score in sorted_articles[0:]
]

ANSWER_INPUT = f"""
Generate an answer to the user's question based on the given search results.
TOP_RESULTS: {formatted_top_results}
USER_QUESTION: {USER_QUESTION}

Include as much information as possible in the answer. Reference the relevant search result urls as markdown links.
"""

completion = openai.ChatCompletion.create(
    model=GPT_MODEL,
    messages=[{"role": "user", "content": ANSWER_INPUT}],
    temperature=0.5,
    stream=True,
)

text = ""
for chunk in completion:
    text += chunk.choices[0].delta.get("content", "")
    display.clear_output(wait=True)
    display.display(display.Markdown(text))

RateLimitError: ignored