In [None]:
import requests
import bs4
from dotenv import load_dotenv
import os
import psycopg
from datetime import datetime as dt

load_dotenv()

In [None]:
dbconn = os.getenv("DBCONN")
conn = psycopg.connect(dbconn)
cur = conn.cursor()

In [None]:
cur.execute('''
    CREATE TABLE IF NOT EXISTS utoday_news(
        title VARCHAR(255),
        author VARCHAR(255),
        link VARCHAR(255),
        date DATE
    );
''')

In [None]:
conn.commit()

In [None]:
cur.close()
conn.close()

In [None]:
response = requests.get("https://u.today/search/node?keys=bitcoin")
document = bs4.BeautifulSoup(response.text)
news_items = document.select("div.search-result > .news__item")

In [None]:
len(news_items)

In [None]:
add_to_db = []

for item in news_items:
    try: 
        data = []
        data.append(item.select_one("div.news__item-title").get_text()) # title
        data.append(item.select_one("a.humble.humble--author").get_text().split("\n")[0]) # author
        data.append(item.select_one("div.humble").get_text().split(" - ")[0]) # date
        data.append(item.select_one("a.news__item-body")["href"]) # link
        add_to_db.append(data)
    except: print("value missing")

In [None]:
for item in add_to_db:
    item[2] = dt.strptime(item[2], "%b %d, %Y")
    cur.execute('''
        INSERT INTO utoday_news (title, author, date, link)
        VALUES (%s, %s, %s, %s);
    ''', item)

print("finished")

In [None]:
cur.execute('''
    ALTER TABLE utoday_news
    ADD sentiment VARCHAR(255);
''')

In [None]:
titles = cur.execute('''
    SELECT title FROM utoday_news ORDER BY date DESC;
''').fetchall()

titles = [title[0] for title in titles]

In [None]:
hf_api_key = os.getenv("HF_API_KEY")
url = "https://api-inference.huggingface.co/models/mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis"

payload = { "inputs": titles }
headers = { "Authorization": f"Bearer {hf_api_key}" }
response = requests.post(url, headers=headers, json=payload)
print("first response", response)

if response.status_code == 503:
    headers["x-wait-for-model"] = "true"
    response = requests.post(url, headers=headers, json=payload)
    print("second response", response)

In [None]:
import json

sentiment_results = response.json()[0]
print(json.dumps(sentiment_results, indent=2))

In [None]:
len(sentiment_results), len(titles)

In [None]:
sentiment_results[0]["label"]

In [None]:
if len(sentiment_results) == len(titles):
    for i in range(0, len(titles)):
        label = sentiment_results[i]["label"]
        # print(label)
        cur.execute('''
            UPDATE utoday_news
            SET sentiment = %s
            WHERE title = %s;
        ''', (label, titles[i]))

conn.commit()
print("finished")

In [None]:
cur.execute('''
    SELECT * FROM utoday_news ORDER BY date LIMIT 1;
''')
cur.fetchall()

In [None]:
cur.execute('''
    SELECT * FROM utoday_news;
''')
cur.fetchall()

In [None]:
cur.execute("ROLLBACK")

In [None]:
cur.execute('''
    DELETE FROM utoday_news WHERE author = '';
''')
conn.commit()