In [1]:
!pip install pmdarima, transformers, yfinance

ERROR: Invalid requirement: 'pmdarima,'

[notice] A new release of pip is available: 24.0 -> 24.3.1
[notice] To update, run: C:\Users\ANv\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip


In [2]:
# Import libraries
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoModelForSequenceClassification
from sklearn.linear_model import LinearRegression
from pmdarima import auto_arima
import warnings
import yfinance as yf
import torch
import pandas as pd
import requests
import numpy as np
from datetime import datetime, timedelta
import re
import os

  from .autonotebook import tqdm as notebook_tqdm


Stocks based on Market Reputation and Company Size can be divided to:
a) Blue-Chip Stocks

    Definition: Stocks of large, well-established, and financially stable companies with a history of reliable performance.
    Examples: Apple, Microsoft, Coca-Cola.
    Key Features:
        Considered low-risk investments.
        Often pay consistent dividends.
        Suitable for long-term, stable growth.

b) Penny Stocks

    Definition: Stocks of small companies that trade at very low prices, usually below $5 per share.
    Key Features:
        High-risk and speculative.
        Low market capitalization.
        Often traded over-the-counter (OTC).

c) Growth Stocks

    Definition: Stocks of companies expected to grow at a rate higher than the market average.
    Key Features:
        Usually reinvest earnings into growth (rarely pay dividends).
        High potential for capital appreciation.
        Often more volatile.

d) Value Stocks

    Definition: Stocks that are considered undervalued compared to their intrinsic value.
    Key Features:
        Low price-to-earnings (P/E) ratios.
        May pay dividends.
        Attractive to long-term investors.

Since penny stocks are highly volatile and succeptable to market manipulation and value stocks are based on subjective opinion, for evaluation we use Blue chip and Growth stocks

In [3]:
blue_chip_stocks = ["AAPL","MSFT","KO","PG","JNJ","DIS","WMT","JPM","MCD","GE"]
Growth_stocks = ["TSLA","AMZN","NVDA","GOOG","META","NFLX","SHOP","SQ","CRM","UBER"]
print("Number of tickers:", len(blue_chip_stocks)+len(Growth_stocks))

Number of tickers: 20


### Fetch Ticker News

In [4]:
def extract_company_name(company_string):
    """
    Extracts the main company name from a given string by removing common suffixes.

    Parameters:
        company_string (str): The full company name string.

    Returns:
        str: The cleaned company name.
    """
    # Define a regex pattern to match common suffixes
    pattern = r",?\s+(Inc\.|Incorporated|Corp\.|Corporation|Ltd\.|Limited|LLC|LLP|P\.L\.C\.|Co\.|Company|Group|Holdings)$"
    
    # Remove the suffix from the company name
    company_name = re.sub(pattern, "", company_string, flags=re.IGNORECASE)
    
    return company_name.strip()

def get_news_sentiment(API_KEY='C2ARQRXUKFAUTVP1', tickers=None, topics=None, limit=None, sort_by='LATEST',
start_date=None
):
    """
    Fetch news and sentiment data from Alpha Vantage.

    Parameters:
        tickers (str): Comma-separated tickers (e.g., "AAPL,MSFT").
        topics (str): Topics of interest (e.g., "technology,finance").
        limit (int): Number of news articles to retrieve.
        sort_by (str): Sort order: 'LATEST', 'RELEVANCE', or 'EARLIEST'.

    Returns:
        list: News articles with metadata.
    """
    url = "https://www.alphavantage.co/query"
    params = {
        "function": "NEWS_SENTIMENT",
        "apikey": API_KEY,
        "tickers": tickers,
        "topics": topics,
        "limit": limit,
        "sort_by": sort_by,
        "time_from": start_date,
    }

    response = requests.get(url, params=params)

    if response.status_code == 200:
        data = response.json()

        if "feed" in data:
            return data["feed"]
        else:
            print("No news data available.")
            return []
    else:
        print(f"Error: {response.status_code}, {response.text}")
        return []

def get_raw_news(ticker, interval="1y"):
    """
    Get raw news data for a specific ticker and interval and saves it in raw_data folder.

    Parameters:
        interval (str): Specifies how long ago the news should be fetched. Should be one of ("1w", "1m", "1y").
        ticker (str): Name of ticker

    Returns:
        list: None.
    """

    if interval == "1w":
        start_date = datetime.today() - timedelta(days=7)
        start_date = start_date.strftime('%Y%m%dT%H%M')
    elif interval == "1m":
        start_date = datetime.today() - timedelta(days=31)
        start_date = start_date.strftime('%Y%m%dT%H%M')
    elif interval == "1y":
        start_date = datetime.today() - timedelta(days=365)
        start_date = start_date.strftime('%Y%m%dT%H%M')

    else:
        start_date = None

    news = get_news_sentiment(tickers=ticker, start_date=start_date)
    data = {
        "title": [],
        "published": [],
        "source": [],
        "summary": [],
        "sentiment_label": [],
        "sentiment_score": [],
        "relevance_score": []
    }

    if news: # If the query recieved any news
        
        for article in news:
            
            ticker_sentiment = article['ticker_sentiment']
            for i, sentiment in enumerate(ticker_sentiment):
                ticker_idx = i
                data['title'].append(article['title'])
                data['published'].append(article['time_published'])
                data['summary'].append(article['summary'])
                data['source'].append(article['source'])
                data['sentiment_label'].append(ticker_sentiment[ticker_idx]['ticker_sentiment_label'])
                data['sentiment_score'].append(ticker_sentiment[ticker_idx]['ticker_sentiment_score'])
                data['relevance_score'].append(ticker_sentiment[ticker_idx]['relevance_score'])

    # Convert to pandas dataframe and save as csv
    ticker_df = pd.DataFrame(data)
    # Check if the folder exists
    folder_name = "raw_data"
    if not os.path.exists(folder_name):
        # Create the folder
        os.makedirs(folder_name)

    # File name and path
    file_name = f"{str(ticker).lower()}_raw.csv"
    file_path = os.path.join(folder_name, file_name)
    ticker_df.to_csv(file_path)

def filter_news(ticker, relevance_score_threshold=0):

    """
    Filter a DataFrame from a CSV file if it exists in the 'raw_data' folder. Also removes duplicates and keeps the one with highest relevance score.
    
    Parameters:
        ticker (str): Ticker symbol of the stock.
        relevance_score_threshold (float): What is the minimum relevance score to be able to qualify as news.
    
    Returns:
        pd.DataFrame: Filtered DataFrame.
    
    Raises:
        FileNotFoundError: If the raw data CSV file does not exist.
    """
    file_path = f"raw_data/{str(ticker).lower()}_raw.csv"

    if not os.path.exists(file_path):
        raise FileNotFoundError(f"The file '{file_path}' does not exist in the 'raw_data' folder.")
    
    # Fetch stock information
    stock = yf.Ticker(ticker)
    summary = stock.info

    comp_name = extract_company_name(summary['longName'])

    # Load the CSV into a DataFrame
    df = pd.read_csv(file_path)

    # Drop duplicates by keeping the one with the highest relevance score
    df_unique = df.loc[df.groupby("title")["relevance_score"].idxmax()]

    # Reset index for clean presentation
    df_unique = df_unique.reset_index(drop=True)

    # Apply filtering
    new_df = df[df['relevance_score'] > relevance_score_threshold]

    return new_df, comp_name

### Generate Company Summary

In [5]:
def get_arima_forecast(data):
    warnings.filterwarnings("ignore") # Suppress warnings
    
    arima_model = auto_arima(pd.Series(data['Close']))
    forecast = arima_model.predict(n_periods=len(data)//4) 

    if forecast.iloc[-1] > forecast.iloc[0]:
        arima_forecast = "increase"
    elif forecast.iloc[-1] < forecast.iloc[0]:
        arima_forecast = "decrease"
    else:
        arima_forecast = None

    if arima_forecast:
        return f" An ARIMA model fit on the data predicts that the stock's price will {arima_forecast}."

    return None

def detect_trend(data, ticker):
    """
    Detects and categorizes the trend of a stock over the past year using a generalized method.

    Parameters:
        ticker (str): Stock ticker symbol.

    Returns:
        str: Natural language summary of the trend.
    """

    # Prepare data for regression
    data['Days'] = (data.index - data.index[0]).days
    X = data['Days'].values.reshape(-1, 1)
    y = data['Close'].values

    # Fit a linear regression model for the entire dataset
    model = LinearRegression()
    model.fit(X, y)
    overall_slope = model.coef_[0]

    # Dynamically calculate slopes for all possible sub-periods
    n = len(y)
    window_sizes = range(5, max(5, n // 4))  # Use a range of window sizes (5 to 25% of the dataset length)
    slopes = []

    for window in window_sizes:
        for i in range(n - window):
            X_window = X[i:i + window]
            y_window = y[i:i + window]
            model.fit(X_window, y_window)
            slopes.append(model.coef_[0])

    # Calculate percentiles dynamically
    slope_percentiles = np.percentile(slopes, [25, 50, 75])  # Quartiles

    # Dynamically categorize trend based on the overall slope
    if overall_slope > slope_percentiles[2]:  # Above 75th percentile
        trend_category = "a strong upward trend"
    elif slope_percentiles[1] <= overall_slope <= slope_percentiles[2]:  # Between median and 75th percentile
        trend_category = "a moderate upward trend"
    elif slope_percentiles[0] <= overall_slope < slope_percentiles[1]:  # Between 25th percentile and median
        trend_category = "a stable or sideways trend"
    else:  # Below 25th percentile
        trend_category = "a downward trend"

    # Create a natural language summary
    summary = (
        f"Over the past year, {ticker} exhibited {trend_category}. "
        f"The overall trend slope was approximately {overall_slope:.4f}."
    )

    return summary


def stock_info_to_text(ticker):
    """
    Converts Yahoo Finance stock information into qualitative text with generalized thresholds.

    Parameters:
        ticker (str): Stock ticker symbol.

    Returns:
        str: A qualitative text summary of the stock.
    """
    stock = yf.Ticker(ticker)
    info = stock.info

    if not info:
        return f"No information available for {ticker}."

    # Extract relevant information
    name = info.get("longName", ticker)
    sector = info.get("sector", "N/A")
    industry = info.get("industry", "N/A")
    market_cap = info.get("marketCap", None)
    pe_ratio = info.get("trailingPE", None)
    dividend_yield = info.get("dividendYield", None)
    fifty_two_week_high = info.get("fiftyTwoWeekHigh", None)
    fifty_two_week_low = info.get("fiftyTwoWeekLow", None)
    current_price = info.get("regularMarketPrice", None)

    # Generalized thresholds for market cap
    if market_cap:
        if market_cap > 50e9:
            market_cap_text = "a large-cap company"
        elif market_cap > 2e9:
            market_cap_text = "a mid-cap company"
        else:
            market_cap_text = "a small-cap company"
    else:
        market_cap_text = "an unknown-cap company"

    # Generalized thresholds for PE ratio using percentiles
    pe_description = "N/A"
    if pe_ratio:
        pe_percentiles = np.percentile([10, 15, 25, 40], [25, 50, 75])  # Hypothetical PE ranges
        if pe_ratio < pe_percentiles[0]:
            pe_description = "undervalued"
        elif pe_ratio < pe_percentiles[1]:
            pe_description = "fairly valued"
        elif pe_ratio < pe_percentiles[2]:
            pe_description = "slightly overvalued"
        else:
            pe_description = "highly overvalued"

    pe_ratio_text = (
        f"The stock has a price-to-earnings (PE) ratio of {pe_ratio:.2f}, indicating it is {pe_description}."
        if pe_ratio else
        "The PE ratio is unavailable."
    )

    # Generalized thresholds for dividend yield using percentiles
    if dividend_yield:
        if dividend_yield > 0.03:  # Example threshold for high yield
            dividend_text = f"The stock offers a high dividend yield of {dividend_yield * 100:.2f}%, appealing to income investors."
        else:
            dividend_text = f"The stock offers a dividend yield of {dividend_yield * 100:.2f}%, which is modest."
    else:
        dividend_text = "The stock does not currently pay dividends."

    
    # Fetch historical data
    end_date = datetime.today()
    start_date = end_date - timedelta(days=365)
    data = stock.history(start=start_date, end=end_date)
    beta_value = stock.info.get("beta")
    
    trend=detect_trend(data, ticker)

    # Interpret beta for volatility
    if beta_value > 1.2:
        volatility_description = "highly volatile compared to the market"
    elif 0.8 <= beta_value <= 1.2:
        volatility_description = "as volatile as the market"
    else:
        volatility_description = "less volatile than the market"

    if data.empty:
        return f"No price data available for {ticker}."

    # Calculate metrics
    start_price = data['Close'].iloc[0]
    end_price = data['Close'].iloc[-1]
    annual_return = ((end_price - start_price) / start_price) * 100
    daily_returns = data['Close'].pct_change()
    max_price = data['Close'].max()
    min_price = data['Close'].min()
    max_drawdown = ((min_price - max_price) / max_price) * 100

    if current_price is None:
        current_price = data['Close'].iloc[-1]

    price_range_mid = (max_price + min_price) / 2
    price_range = max_price - min_price
    if current_price > (price_range_mid + price_range / 4):
        price_range_text = "near its yearly high"
    elif current_price < (price_range_mid - price_range / 4):
        price_range_text = "near its yearly low"
    else:
        price_range_text = "in the mid-range of its yearly performance"
    price_range_text = (
        f"Over the last year, the stock traded between ${fifty_two_week_low:.2f} and ${fifty_two_week_high:.2f}. "
        f"The current price is ${current_price:.2f}, which is {price_range_text}."
    )

    # Statistical thresholds (quartiles)
    return_percentiles = np.percentile(daily_returns.dropna(), [25, 50, 75])

    # Qualitative analysis of annual return
    if annual_return > return_percentiles[2]:
        performance = "strong growth"
    elif return_percentiles[1] <= annual_return <= return_percentiles[2]:
        performance = "moderate growth"
    elif return_percentiles[0] <= annual_return < return_percentiles[1]:
        performance = "stable performance"
    else:
        performance = "a decline"

    # Construct the summary
    summary = (
        f"{name} operates in the {sector} sector and the {industry} industry. It is {market_cap_text}. "
        f"{pe_ratio_text} {dividend_text} {price_range_text}"
        f"Over the past year, {ticker} showed {performance}, "
        f"with an annual return of approximately {annual_return:.2f}%. "
        f"The stock was {volatility_description}, "
        f"with a maximum price of ${max_price:.2f} and a minimum price of ${min_price:.2f}. "
        f"The largest drop during the year was {abs(max_drawdown):.2f}%, indicating the stock's maximum drawdown. "
    )

    # Add trend if available
    if trend != "no trend":
        summary += f"{trend}"

    # Add ARIMA forecast if available
    arima_forecast = get_arima_forecast(data)
    if arima_forecast:
        summary += arima_forecast

    return summary

### News Sentiment Analysis Using FinBERT

In [6]:
def in_range(number, range_tuple):
    """
    Checks if a number is within a range defined by a tuple.

    Args:
        number (float): The number to check.
        range_tuple (tuple): A tuple containing two numbers (start, end) defining the range.

    Returns:
        bool: True if the number is within the range, False otherwise.
    """
    start, end = range_tuple
    return start <= number <= end

def compute_weighted_average(sentiment_scores, relevance_scores):
    """
    Computes the weighted average of sentiment scores using relevance scores as weights.
    
    Args:
        sentiments (list): List of sentiment labels ("Positive", "Neutral", "Negative").
        relevance_scores (list): List of relevance scores (weights) for each sentiment.
        
    Returns:
        float: The weighted average sentiment score.
    """
    
    # Compute weighted average
    weighted_sum = sum(score * weight for score, weight in zip(sentiment_scores, relevance_scores))
    total_weights = sum(relevance_scores)
    
    return weighted_sum / total_weights if total_weights > 0 else 0

# Function to analyze sentiment
def analyze_sentiment(news_df, model, tokenizer):
    """
    Analyzes the sentiment of a list of texts using FinBERT.
    Args:
        news_df (DataFrame): Pandas DataFrame with all relevant news articles and their summaries.
    Returns:
        sentiment (str): The aggregated news sentiment (Positive, Slightly Positive, Neutral, Slightly Negative, Negative).
    """
    sentiments = []
    text_summaries = list(news_df['summary'])
    relevance_scores = list(news_df['relevance_score'])

    for text in text_summaries:
        # Tokenize input text
        inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
        
        # Get predictions from the model
        outputs = model(**inputs)
    
        predictions = torch.softmax(outputs.logits, dim=1)  # Convert logits to probabilities
 
        # Get the label with the highest probability
        label = torch.argmax(predictions, dim=1).item()
        
        # Map label to sentiment
        if label == 0:
            sentiments.append(-1)
        elif label == 1:
            sentiments.append(0)
        elif label == 2:
            sentiments.append(1)

    weighted_average = compute_weighted_average(sentiments, relevance_scores)

    thresholds = {
        "Negative": (-1, -0.6),
        "Slightly Negative": (-0.6, -0.2),
        "Neutral": (-0.2, 0.2),
        "Slightly Positive": (0.2, 0.6),
        "Positive": (0.6, 1),
    }

    # Dynamically categorize trend based on the overall slope
    for key, value in thresholds.items():
        if in_range(weighted_average,value):
            sentiment = key
            break
    
    return sentiment

### Generate System Prompt

In [7]:
def get_system_prompt(ticker, model, tokenizer, include_rec=False):

    # Check if ticker news exists in raw_news folder otherwise fetch raw news
    file_path = f"raw_data/{str(ticker).lower()}_raw.csv"

    if not os.path.exists(file_path):
        get_raw_news(ticker, interval="1y")
    
    news_df, comp_name = filter_news(ticker, relevance_score_threshold=0.7)
    sentiment = analyze_sentiment(news_df, model, tokenizer)
    company_summary = stock_info_to_text(ticker)

    recommendations = yf.Ticker(ticker).get_recommendations()

    sentiments = list(recommendations.keys())
    
    rec_first = recommendations.iloc[0, 1:]
    weights=dict()
    for key in rec_first.keys():
        weights[key] = rec_first[key]/rec_first.sum()
    
    weights=list(weights.values())

    mean_value=np.average(a=np.linspace(1,len(weights),len(weights)),weights=weights)
    recommendation = sentiments[round(mean_value)]
    print("Analyst recommendation:",recommendation)
    # Start with the company summary
    prompt = (
        "You are a financial analyst. Your task is to analyze the given company summary, the sentiment of the latest news articles and analyst recommendation to decide whether the company's stock should be classified as 'Strong Buy', 'Buy', 'Hold', 'Sell' or 'Strong Sell'. "
        "Provide a clear decision and a detailed explanation based on the information provided.\n\n"
        f"The company name is {comp_name} and the ticker is {ticker}\n\n"
        f"Company Summary: {company_summary}\n\n" 

    )   
    if include_rec:
        f"The majority of analysts give the sentiment: {recommendation}.\n\n"

    prompt += f"The sentiment of the latest news articles was: {sentiment}.\n\n"
    prompt += "Your response should include:\n1. A decision: 'Strong Buy', 'Buy', 'Hold', 'Sell' or 'Strong Sell'.\n2. A detailed explanation justifying your decision, citing specific points from the company summary and the articles."
    
    return prompt

### Llama pipeline

In [8]:
def run_llama(model, tokenizer, ticker, system_prompt):

    # Tokenize input
    inputs = tokenizer(system_prompt, return_tensors="pt").to("cuda")

    # Generate response
    output = model.generate(
        **inputs,
        max_length=2048,
        temperature=0.7,
        top_p=0.9,
        do_sample=True
    )

    # Decode and print the response
    response = tokenizer.decode(output[0], skip_special_tokens=True)

    response = response[len(system_prompt):]

    return response

### Evaluate response

In [9]:
def evaluate(response,ticker):
    
    weights=dict()
    recommendations = yf.Ticker(ticker).get_recommendations()
    
    rec_first = recommendations.iloc[0, 1:]
    
    for key in rec_first.keys():
        weights[key] = rec_first[key]/rec_first.sum()
    
    weights=list(weights.values())

    mean_value=np.average(a=np.linspace(1,len(weights),len(weights)),weights=weights)

    floor_value = np.floor(mean_value)
    ceil_value = np.ceil(mean_value)
    majority_value = np.argmax(weights)+1

    # Find all start indices of the substring
    possible_sentiments=['Strong Buy', 'Buy', 'Hold', 'Sell' ,'Strong Sell']
    sentiment_location=[]
    for substring in possible_sentiments:
        matches = [match.start() for match in re.finditer(re.escape(substring), response, re.IGNORECASE)]
        if len(matches) == 0:
            sentiment_location.append(1000000000)
        else:
            sentiment_location.append(min(matches))
                 
    model_value = np.argmin(sentiment_location)+1
    print("Model Sentiment:", possible_sentiments[model_value-1])

    if model_value == majority_value:
        correct = True
    
    elif model_value in [floor_value, ceil_value]:
        correct = True
    
    else:
        correct = False

    return correct

In [10]:
# Load Llama 3.2 model and tokenizer
model_name = "meta-llama/Llama-3.2-3B-Instruct"  # Replace with the model you have downloaded or hosted
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", torch_dtype=torch.float16)

Loading checkpoint shards: 100%|██████████| 2/2 [00:10<00:00,  5.29s/it]


In [11]:
# Load FinBERT model and tokenizer
sentiment_model_name = "yiyanghkust/finbert-tone"
sentiment_tokenizer = AutoTokenizer.from_pretrained(sentiment_model_name)
sentiment_model = AutoModelForSequenceClassification.from_pretrained(sentiment_model_name)

In [12]:
# Sanity check; get single ticker response
ticker = "TSLA"
system_prompt = get_system_prompt(ticker, sentiment_model, sentiment_tokenizer, include_rec=True)
response = run_llama(model, tokenizer, ticker, system_prompt)
print(response)

Analyst recommendation: hold


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


 
3. A recommendation for the investor.

**Based on the analysis, my decision is: Sell**

My detailed explanation is as follows:

Firstly, the company's stock price is highly overvalued with a price-to-earnings (PE) ratio of 116.21. This high valuation indicates that the stock may be due for a correction, and selling now may be a prudent decision. Furthermore, the stock does not pay dividends, which may not provide any income for investors.

Secondly, the stock has shown strong growth over the past year, but this growth is not sustainable in the long term. The stock's volatility is also a concern, with a maximum drop of 70.40% during the year. This volatility indicates that the stock may be susceptible to further price swings, which could negatively impact investor returns.

Thirdly, the sentiment of the latest news articles is slightly negative, which suggests that investors may be losing confidence in the company. This negative sentiment could lead to a decline in the stock price, ma

### Evaluate accuracy against analyst recommendations

In [13]:
ticker_groups=[blue_chip_stocks,Growth_stocks]
ticker_group_names=["blue_chip_stocks","growth_stocks"]
for t, ticker_group in enumerate(ticker_groups): # Run for each ticker group
    corrects = 0
    for ticker in ticker_group:
        print("Ticker:", ticker)
        ticker_corrects = []
        system_prompt = get_system_prompt(ticker, sentiment_model, sentiment_tokenizer)
        for i in range(3): # Run three times for each ticker and get majority of corrects
            print("Iteration",i)
            response = run_llama(model, tokenizer, ticker, system_prompt)
            correct = evaluate(response, ticker)
            print("Correct:",correct)
            #if not correct:
            #    print("Response:", response)
            ticker_corrects.append(correct)

        majority = True if ticker_corrects.count(True) > ticker_corrects.count(False) else False
        
        if majority:
            corrects += 1
    
    print(f"Accuracy for ticker group {ticker_group_names[t]}:",round(corrects/len(ticker_groups[t]),3)*100,"%")

Ticker: AAPL


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Analyst recommendation: buy
Iteration 0


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Model Sentiment: Sell
Correct: False
Iteration 1


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Model Sentiment: Hold
Correct: True
Iteration 2
Model Sentiment: Hold
Correct: True
Ticker: MSFT


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Analyst recommendation: buy
Iteration 0


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Model Sentiment: Hold
Correct: False
Iteration 1


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Model Sentiment: Hold
Correct: False
Iteration 2
Model Sentiment: Hold
Correct: False
Ticker: KO


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Analyst recommendation: buy
Iteration 0


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Model Sentiment: Buy
Correct: True
Iteration 1


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Model Sentiment: Sell
Correct: False
Iteration 2
Model Sentiment: Strong Sell
Correct: False
Ticker: PG


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Analyst recommendation: buy
Iteration 0


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Model Sentiment: Hold
Correct: True
Iteration 1


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Model Sentiment: Hold
Correct: True
Iteration 2
Model Sentiment: Hold
Correct: True
Ticker: JNJ


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Analyst recommendation: buy
Iteration 0


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Model Sentiment: Sell
Correct: False
Iteration 1


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Model Sentiment: Hold
Correct: True
Iteration 2
Model Sentiment: Sell
Correct: False
Ticker: DIS


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Analyst recommendation: buy
Iteration 0


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Model Sentiment: Sell
Correct: False
Iteration 1


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Model Sentiment: Sell
Correct: False
Iteration 2
Model Sentiment: Sell
Correct: False
Ticker: WMT


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Analyst recommendation: buy
Iteration 0


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Model Sentiment: Sell
Correct: False
Iteration 1


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Model Sentiment: Sell
Correct: False
Iteration 2
Model Sentiment: Sell
Correct: False
Ticker: JPM
No news data available.


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Analyst recommendation: buy
Iteration 0


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Model Sentiment: Hold
Correct: True
Iteration 1


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Model Sentiment: Hold
Correct: True
Iteration 2
Model Sentiment: Hold
Correct: True
Ticker: MCD


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Analyst recommendation: buy
Iteration 0


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Model Sentiment: Buy
Correct: True
Iteration 1


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Model Sentiment: Hold
Correct: True
Iteration 2
Model Sentiment: Buy
Correct: True
Ticker: GE


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Analyst recommendation: buy
Iteration 0


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Model Sentiment: Sell
Correct: False
Iteration 1


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Model Sentiment: Hold
Correct: False
Iteration 2
Model Sentiment: Strong Buy
Correct: True
Accuracy for ticker group blue_chip_stocks: 40.0 %
Ticker: TSLA


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Analyst recommendation: hold
Iteration 0


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Model Sentiment: Sell
Correct: False
Iteration 1


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Model Sentiment: Strong Buy
Correct: False
Iteration 2
Model Sentiment: Sell
Correct: False
Ticker: AMZN


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Analyst recommendation: buy
Iteration 0


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Model Sentiment: Sell
Correct: False
Iteration 1


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Model Sentiment: Sell
Correct: False
Iteration 2
Model Sentiment: Strong Sell
Correct: False
Ticker: NVDA


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Analyst recommendation: buy
Iteration 0


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Model Sentiment: Buy
Correct: True
Iteration 1


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Model Sentiment: Hold
Correct: False
Iteration 2
Model Sentiment: Hold
Correct: False
Ticker: GOOG


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Analyst recommendation: buy
Iteration 0


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Model Sentiment: Hold
Correct: False
Iteration 1


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Model Sentiment: Hold
Correct: False
Iteration 2
Model Sentiment: Hold
Correct: False
Ticker: META


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Analyst recommendation: buy
Iteration 0


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Model Sentiment: Sell
Correct: False
Iteration 1


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Model Sentiment: Strong Sell
Correct: False
Iteration 2
Model Sentiment: Sell
Correct: False
Ticker: NFLX


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Analyst recommendation: buy
Iteration 0


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Model Sentiment: Sell
Correct: False
Iteration 1


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Model Sentiment: Strong Sell
Correct: False
Iteration 2
Model Sentiment: Hold
Correct: True
Ticker: SHOP


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Analyst recommendation: buy
Iteration 0


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Model Sentiment: Hold
Correct: True
Iteration 1


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Model Sentiment: Sell
Correct: False
Iteration 2
Model Sentiment: Strong Buy
Correct: False
Ticker: SQ


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Analyst recommendation: buy
Iteration 0


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Model Sentiment: Sell
Correct: False
Iteration 1


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Model Sentiment: Hold
Correct: True
Iteration 2
Model Sentiment: Sell
Correct: False
Ticker: CRM


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Analyst recommendation: buy
Iteration 0


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Model Sentiment: Hold
Correct: True
Iteration 1


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Model Sentiment: Hold
Correct: True
Iteration 2
Model Sentiment: Hold
Correct: True
Ticker: UBER


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Analyst recommendation: buy
Iteration 0


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Model Sentiment: Sell
Correct: False
Iteration 1


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Model Sentiment: Sell
Correct: False
Iteration 2
Model Sentiment: Hold
Correct: False
Accuracy for ticker group growth_stocks: 10.0 %


In [14]:
print(corrects/len(ticker_groups[0]))

0.1
