In [1]:
import os
import torch
from transformers import pipeline

In [2]:
TICKERS = ["SPY", "SMH", "IBB", "GDX", "IYR", "KBE"] 

In [3]:
def get_device():
    os.environ["CUDA_VISIBLE_DEVICES"] = "7"
    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu", 0)
    return device

In [4]:
def load_prompt(ticker):
    prompt_dict = {
        "SPY": "The dataset contains historical closing prices of the SPY (SPDR S&P 500 ETF Trust). \
                The SPDR S&P 500 ETF Trust is an exchange-traded fund which trades on the NYSE Arca under the symbol SPY. \
                The ETF is designed to track the S&P 500 index by holding a portfolio comprising all 500 companies on the index. \
                The S&P 500 is widely regarded as the best gauge of overall performance in large-capitalized US equities, \
                and is comprised of 500 American companies representing a wide range of diverse market sectors.",
        "SMH": "The dataset contains historical closing prices of the SMH (VanEck Semiconductor ETF). \
                The VanEck Semiconductor ETF intended to track the overall performance of companies involved in semiconductor production and equipment. \
                The investment seeks to replicate as closely as possible, before fees and expenses, the price and yield performance of the MVIS® US Listed Semiconductor 25 Index. \
                The fund normally invests at least 80 percent of its total assets in securities that comprise the fund's benchmark index. \
                The index includes common stocks and depositary receipts of U.S. exchange-listed companies in the semiconductor industry.",
        "IBB": "The dataset contains historical closing prices of the IBB (iShares Biotechnology ETF). \
                The investment seeks to track the investment results of the NYSE Biotechnology Index composed of U.S.-listed equities in the biotechnology sector. \
                The fund generally will invest at least 80 percent of its assets in the component securities of its index and in investments \
                that have economic characteristics that are substantially identical to the component securities of its index and may invest up to 20 percent of its assets in certain futures, options and swap contracts, cash and cash equivalents.",
        "GDX": "The dataset contains historical closing prices of the GDX (VanEck Gold Miners ETF). \
                The investment seeks to replicate as closely as possible, before fees and expenses, the price and yield performance of the NYSE® Arca Gold Miners Index®. \
                The fund normally invests at least 80 percent of its total assets in common stocks and depositary receipts of companies involved in the gold mining industry.\
                The index is a modified market-capitalization weighted index primarily comprised of publicly traded companies involved in the mining for gold and silver.",
        "IYR": "The dataset contains historical closing prices of the IYR (iShares US Real Estate ETF). \
                The investment seeks to track the investment results of the Dow Jones U.S. Real Estate Capped Index. \
                The fund seeks to track the investment results of the Dow Jones U.S. Real Estate Capped Index, which measures the performance of the real estate sector of the U.S. equity market, as defined by the index provider. \
                It generally invests at least 80 percent of its assets in the component securities of its underlying index and in investments that have economic characteristics that are substantially identical to the component securities of its underlying index.",
        "KBE": "The dataset contains historical closing prices of the KBE (SPDR® S&P Bank ETF). \
                The investment seeks to provide investment results that, before fees and expenses, correspond generally to the total return performance of the S&P Banks Select Industry Index. \
                The fund generally invests substantially all, but at least 80%, of its total assets in the securities comprising the index. \
                The index represents the banks segment of the S&P Total Market Index (“S&P TMI”). The S&P TMI is designed to track the broad U.S. equity market. \
                It may invest in equity securities that are not included in the index, cash and cash equivalents or money market instruments, such as repurchase agreements and money market funds."
    }

    prompt_prefix = prompt_dict[ticker]
    return prompt_prefix

In [5]:
def load_sentiment_analyzer(device):
    sentiment_analyzer = pipeline("sentiment-analysis", model="textattack/bert-base-uncased-SST-2", device=device)
    return sentiment_analyzer

In [6]:
def analyze_sentiment(device, ticker, text):
    sentiment_analyzer = load_sentiment_analyzer(device)
    result = sentiment_analyzer(text)
    for res in result:
        print(f"Ticker: {ticker}")
        print(f"Label: {res['label']} (Positive or Negative)")
        print(f"Score: {res['score']:.4f} (Confidence Level)")
    return result

In [7]:
device = get_device()

for ticker in TICKERS:
    sentence = load_prompt(ticker)
    analyze_sentiment(device, ticker, sentence)

config.json:   0%|          | 0.00/477 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

Ticker: SPY
Label: LABEL_1 (Positive or Negative)
Score: 0.9841 (Confidence Level)
Ticker: SMH
Label: LABEL_1 (Positive or Negative)
Score: 0.6934 (Confidence Level)
Ticker: IBB
Label: LABEL_1 (Positive or Negative)
Score: 0.8039 (Confidence Level)
Ticker: GDX
Label: LABEL_1 (Positive or Negative)
Score: 0.7384 (Confidence Level)
Ticker: IYR
Label: LABEL_1 (Positive or Negative)
Score: 0.8493 (Confidence Level)
Ticker: KBE
Label: LABEL_1 (Positive or Negative)
Score: 0.7929 (Confidence Level)
