In [14]:
import requests
import os
import datetime
import time
import json
import openai

from newsapi import NewsApiClient

def LLM_Query(system_prompt, user_query, temperature=0.9, top_p=1):
    response  = openai.ChatCompletion.create(
        model="gpt-4",
        messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": user_query},
            ],
        temperature=temperature,
        top_p=top_p
    )
    print(response)
    response_content = response.choices[0]["message"]["content"]
    query_tokens = response["usage"]["total_tokens"]
    return response_content, query_tokens

newsdata_api = "https://newsdata.io/api/1/news"
newsdata_archive_api = "https://newsdata.io/api/1/archive"

categories = "business,politics,technology"
newsdata_source = "wsj,,bloomberg"
full_content_source = "businessinsider_us, cnn, bbc"

def read_api_keys(key_file):
    keys = None
    with open(key_file) as f:
        keys = json.load(f)
    return keys

def GetRequestEmbedding(customer_query):
    response = openai.Embedding.create(
        input=customer_query,
        model="text-embedding-3-small"
    )
    embeddings = response['data'][0]['embedding']
    return embeddings

"""
    News Info needed:
    - Title
    - Content(Description, depends on the api)
    - URL
    - Source
    - Date
"""

# Read news from News IO source
# TODO: Needs to refactor this function
def read_news_io_api(api_key, categories, domain, date_from):
    url = newsdata_api + "?apikey=" + api_key \
        + "&category=" + categories + "&domain=" + domain + "full_content=1"
    response = requests.get(url).json()
    if response["status"] != "success":
        print("Failed to read news from News IO API")
        return None
    
    nextPage = response["nextPage"]
    articles = []
    for article in response["results"]:
        if datetime.datetime.strptime(article["pubDate"], "%Y-%m-%d %H:%M:%S") > date_from:
            articles.append(article)
    url += "&page=" + str(nextPage)
    
    has_next_page = len(articles) > 0 and len(response["results"]) < response["totalResults"]
    read_results = len(articles)
    while has_next_page:
        response = requests.get(url + "&page=" + str(nextPage)).json()
        if response["status"] != 200:
            break
        nextPage = response["next_page"]
        
        page_articles = []
        for article in response.json()["results"]:
            if datetime.datetime.strptime(article["pubDate"], "%Y-%m-%d %H:%M:%S") > date_from:
                page_articles.append(article)

        read_results += len(response["results"])
        has_next_page = len(page_articles) > 0 and len(page_articles) < response["totalResults"]
        articles.extend(page_articles)

    # Transform the articles to required format
    formalized_articles = []
    for article in articles:\
        formalized_articles.append({
            "title": article["title"],
            "content": article["description"],
            "url": article["link"],
            "source": article["source_id"],
            "date": datetime.datetime.strptime(article["pubDate"], "%Y-%m-%d %H:%M:%S")
        })
    return formalized_articles

In [3]:
api_keys = read_api_keys(os.environ["HOME"] + "/.api_keys.json")
news_data_api_key = api_keys["news_data_api"]
date = datetime.datetime.now() - datetime.timedelta(days=1)

In [5]:
api_keys["news_data_api"]

'pub_195891abb61f78b4184c1c4d20b6348d23a88'

In [105]:
data, response = read_news_io_api(news_data_api_key, categories, full_content_source, date)

NameError: name 'full_content_source' is not defined

In [104]:
domain = "bloomberg"
url = "https://newsdata.io/api/1/news" + "?apikey=" + news_data_api_key + "&category=" + categories + "&domain=guardian" + "&full_content=1"
response = requests.get(url).json()
response

{'status': 'success',
 'totalResults': 149,
 'results': [{'article_id': 'adbca84f05d4dc9bdcf65eba293b162b',
   'title': 'Restating significance of Oronsaye panel report',
   'link': 'https://guardian.ng/opinion/restating-significance-of-oronsaye-panel-report/',
   'keywords': ['columnists',
    'opinion',
    'asuu',
    'oronsaye panel report',
    'steve oronsaye'],
   'creator': ['Guardian Nigeria'],
   'video_url': None,
   'description': 'As I wrote here on Sunday May 10, 2020, the report of the 2012 Presidential Committee on Restructuring and Rationalisation of Federal Government Parastatals, Commissions and Agencies also known as the Oronsaye Panel Report.. The post Restating significance of Oronsaye panel report appeared first on The Guardian Nigeria News - Nigeria and World News.',
   'content': "6 As I wrote here on Sunday May 10, 2020, the report of the 2012 Presidential Committee on Restructuring and Rationalisation of Federal Government Parastatals, Commissions and Agencie

In [90]:
response

{'status': 'success', 'totalResults': 0, 'results': [], 'nextPage': None}