In [10]:
import yfinance as yf
import numpy as np
from MongoDB_Connection import start_db
import pandas as pd
import time
import talib as ta
import nltk

In [None]:
# Connect to MongoDB
db = start_db()
print(db)

In [None]:
#Tickers for training
tickers = [
    "NVDA", "QBTS", "TSLA", "LCID", "F", "PLTR", "SOUN", "RGTI", "INFA", "AAL",
    "MARA", "BTG", "INTC", "CLF", "PDD", "JWN", "DJT", "AAPL", "OKLO", "SOFI",
    "X", "NIO", "WRD", "ITUB", "SMCI", "NU", "RIOT", "RIVN", "SMR", "AMD",
    "GOOGL", "ACHR", "GME", "UEC", "BBD", "ABEV", "AMZN", "AMCR", "RIG", "BAC",
    "IONQ", "HOOD", "AGNC", "WBD", "PFE", "CLSK", "CNH", "CCL", "IAG", "GRAB",
    "T", "KGC", "HBAN", "HIMS", "ERIC", "AUR", "MRVL", "PSLV", "GOOG", "VALE",
    "NGD", "SNAP", "CX", "RKLB", "BB", "UNH", "HOLX", "HL", "CSX", "NOK", "VOD",
    "PONY", "LYG", "MSFT", "HPE", "TEM", "KMI", "UAA", "ADT", "UBER", "GAP",
    "HAL", "WMB", "LYFT", "HLN", "CMCSA", "CDE", "TEVA", "GGB", "LUMN", "VTRS",
    "NCLH", "NXE", "PCG", "WMT", "MU", "LUV", "SLB", "PTEN", "CPNG"
]

In [None]:
# Entering documents into database based on ticker symbols above. More attributes will
# be added once the database is loaded with the initial data.
for ticker in tickers:
    try:
        # Gather 10 year historical data
        data = yf.download(ticker, period="10y", interval="1d", progress=False)

        if not data.empty:
            # Move 'Date' from index to column
            data.reset_index(inplace=True)
            data["Ticker"] = ticker

            # Convert NaNs to None (MongoDB does not accept NaN)
            data.replace({np.nan: None}, inplace=True)

            # Flatten MultiIndex columns if they exist. This prevents the data from being formatted incorrectly. Each value needed to be a string
            if isinstance(data.columns, pd.MultiIndex):
                data.columns = ['_'.join(filter(None, col)).strip() for col in data.columns]

            attributes = data.to_dict("records")

            # Insert into MongoDB
            db.Finance_Data.insert_many(attributes)
            print(f"Inserted {len(attributes)} attributes for {ticker}.")

        # Timer to prevent rate limit
        time.sleep(3)
    except Exception as e:
        print(f"{ticker} had an error being inserted: {e}")


In [None]:
# Restructuring the document fields in the database. From the yfinance API the fields are labeled as "close_{ticker}". This needed to be changed since the subsequent actions need to be easily manipulated across all tickers. Having them be uniform will make that process easier.

rename_fields = ["Open", "High", "Low", "Close", "Volume"]
tickers = db.Finance_Data.distinct("Ticker")

for ticker in tickers:
    suffix = f"_{ticker}"
    for field in rename_fields:
        old_field = f"{field}{suffix}"
        new_field = field
        db.Finance_Data.update_many(
            {old_field: {"$exists": True}},
            {"$rename": {old_field: new_field}}
        )
    print(f"Renamed fields for {ticker}")


In [None]:
"""
The next few sections will be for adding the additional columns to the database documents.
The first one will add a "Success" column. This will be based on whether the stock's "Close" value is greater than the "Open" value.
This is the only one that will require a manual calculation to be performed.
Next, the "Sentiment Score" column will be added using the nltk package. This library aggregates a list of related news articles pertaining to the ticker symbol in question. A value will be produced from -1 to +1.
The columns for SMA, EMA, RSI and MACD will use a group of values from the ticker symbol in order to make the calculations.
This action will be performed using the TA-lib package.
"""


In [27]:
# Adding the 'Success' column

for doc in db.Finance_Data.find():
    testdoc_close = doc.get("Close")
    testdoc_open = doc.get("Open")

    if testdoc_close is not None and testdoc_open is not None:
        success_flag = 1 if testdoc_close > testdoc_open else 0
        db.Finance_Data.update_one(
            {"_id": doc["_id"]},
            {"$set": {"Success": success_flag}}
        )


In [None]:
# Adding the 'Sentiment Score' column

DON'T USE UNTIL THE CODE IS IMPLEMENTED SPECIFICALLY FOR THE DATABASE.

import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import yfinance as yf

# Download VADER lexicon (only needs to be done once)
nltk.download('vader_lexicon')

# Set up the stock ticker symbol
ticker_symbol = "AAPL"
ticker = yf.Ticker(ticker_symbol)

# Fetch news articles for the stock
news_items = ticker.news

# Initialize VADER Sentiment Analyzer
sia = SentimentIntensityAnalyzer()
sentiment_scores = {}

# Analyze sentiment for each news summary
for item in news_items:
    content = item.get("content", {})
    headline = content.get("title", "No Title Available")
    summary = content.get("summary", "No Summary Available")

    sentiment = sia.polarity_scores(summary)
    sentiment_scores[summary] = sentiment

    print(f"Headline: {headline}")
    print(f"Sentiment: {sentiment}")
    print("-" * 50)

# Build summary dictionary for the stock
aapl_summary = {
    "Symbol": ticker_symbol,
    "Name": ticker.info.get("longName", "Unknown Company"),
    "Market Data": ticker.info,
    "Sentiment Scores": sentiment_scores
}

print(aapl_summary)
