In [1]:
import pandas as pd 
import numpy as np
import requests
import os 

from azure.storage.blob import BlobServiceClient

from azure.core.credentials import AzureKeyCredential
from azure.ai.textanalytics import TextAnalyticsClient

from constants import BLOB_KEY, COGNITIVE_ENDPOINT, COGNITIVE_CREDENTIALS, HUGGINGFACE_KEY

In [3]:
# log in to the Blob Service Client
account_url = "https://mlstorageleo.blob.core.windows.net"
blob_service_client = BlobServiceClient(account_url, account_key=BLOB_KEY)

In [3]:
# connect to the container 
container_client = blob_service_client.get_container_client(container="stock-news-json") 

# list and download all currently available blobs
blob_list = container_client.list_blobs()
for blob in blob_list:
    download_file_path = os.path.join(".\data", str(blob.name))
    with open(file=download_file_path, mode="wb") as download_file:
        download_file.write(container_client.download_blob(blob.name).readall())

In [7]:
df = pd.read_json(".\\data\\TXN-1686603553.838044.json")
texts = df["texts"].values.tolist()

In [3]:
df

Unnamed: 0,name,description,datePublished,url,texts
0,What Does Texas Instruments Incorporated&#39;s...,The share price seems sensible at the moment a...,2023-06-09T14:00:00.0000000Z,https://finance.yahoo.com/news/does-texas-inst...,Let's talk about the popular Texas Instruments...
1,$100 Invested In Texas Instruments 15 Years Ag...,Texas Instruments (NASDAQ:TXN) has outperforme...,2023-06-07T17:00:25.0000000Z,https://www.msn.com/en-us/money/savingandinves...,Texas Instruments (NASDAQ:TXN) has outperforme...
2,Texas Instruments Incorporated (<b>TXN</b>) An...,Texas Instruments Incorporated (NASDAQ:TXN) pr...,2023-06-10T14:38:00.0000000Z,https://stocksregister.com/2023/06/10/texas-in...,Texas Instruments Incorporated (NASDAQ:TXN) pr...
3,Texas Instruments (<b>TXN</b>) Gains But Lags ...,Texas Instruments (TXN) closed at $170.36 in t...,2023-06-06T21:50:00.0000000Z,https://finance.yahoo.com/news/texas-instrumen...,Texas Instruments (TXN) Gains But Lags Market:...


In [8]:
credential = AzureKeyCredential("<api_key>")
text_analytics_client = TextAnalyticsClient(
    endpoint=COGNITIVE_ENDPOINT,
    credential=AzureKeyCredential(COGNITIVE_CREDENTIALS)
)

In [9]:
result = text_analytics_client.analyze_sentiment(texts, show_opinion_mining=True)
docs = [doc for doc in result if not doc.is_error]

In [17]:
for idx, doc in enumerate(docs):
    print(f"Document text: {texts[idx]}")
    print(f"Overall sentiment: {doc.sentiment}")
    print()

Document text: Let's talk about the popular Texas Instruments Incorporated (NASDAQ:TXN). The company's shares saw significant share price movement during recent months on the NASDAQGS, rising to highs of US$186 and falling to the lows of US$161. Some share price movements can give investors a better opportunity to enter into the stock, and potentially buy at a lower price. A question to answer is whether Texas Instruments' current trading price of US$172 reflective of the actual value of the large-cap? Or is it currently undervalued, providing us with the opportunity to buy? Let’s take a look at Texas Instruments’s outlook and value
Overall sentiment: negative

Document text: Texas Instruments (NASDAQ:TXN) has outperformed the market over the past 15 years by 4.45% on an annualized basis producing an average annual return of 12.47%. Currently, Texas Instruments has a market capitalization of $156.33 billion. Buying $100 In TXN: If an investor had bought $100 of TXN stock 15 years ago, 

In [4]:
# compare it to the HuggingFace model
import requests

def distilbert_sentiment_classifier(text: str, api_key: str) -> dict:
    headers = {"Authorization": f"Bearer {api_key}"}
    response = requests.post("https://api-inference.huggingface.co/models/KernAI/stock-news-destilbert", headers=headers, json={"inputs": text})
    json_response = response.json()
    while not isinstance(json_response, dict):
        json_response = json_response[0]
    if "label" not in json_response:
        return f"This didn't work, got: {json_response}"
    else:
        json_response = json_response["label"]
    return json_response

In [14]:
distilbert_sentiments = [distilbert_sentiment_classifier(text, HUGGINGFACE_KEY) for text in texts]

In [15]:
# the distilbert model is clearly better
distilbert_sentiments

['neutral', 'positive', 'negative', 'positive']

In [17]:
entity_result = text_analytics_client.recognize_entities(texts)
entity_result = [text for text in entity_result if not text.is_error]

In [19]:
for idx, text in enumerate(entity_result):
    for entity in text.entities:
        print(f"Entity '{entity.text}' has category '{entity.category}'")

Entity 'Texas Instruments Incorporated' has category 'Organization'
Entity 'TXN' has category 'Organization'
Entity 'cent' has category 'Quantity'
Entity 'NASDAQGS' has category 'Organization'
Entity 'US$186' has category 'Quantity'
Entity 'US$161' has category 'Quantity'
Entity 'investors' has category 'PersonType'
Entity 'Texas Instruments' has category 'Organization'
Entity 'trading' has category 'Skill'
Entity 'US$172' has category 'Quantity'
Entity 'Texas Instruments' has category 'Organization'
Entity 'Texas Instruments' has category 'Organization'
Entity 'TXN' has category 'Organization'
Entity 'past 15 years' has category 'DateTime'
Entity '4.45%' has category 'Quantity'
Entity 'annual' has category 'DateTime'
Entity '12.47%' has category 'Quantity'
Entity 'Texas Instruments' has category 'Organization'
Entity '$156.33 billion' has category 'Quantity'
Entity '$100' has category 'Quantity'
Entity '100 In' has category 'Quantity'
Entity 'TXN' has category 'Organization'
Entity 'i