In [5]:
import yfinance as yf
import numpy as np
from MongoDB_Connection import start_db
import pandas as pd
import time

In [2]:
# Connect to MongoDB
db = start_db()
print(db)

['C964_Database', 'admin', 'config', 'local']
Entry exists
Database(MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True), 'C964_Database')


In [3]:
#Tickers for training
tickers = [
    "NVDA", "QBTS", "TSLA", "LCID", "F", "PLTR", "SOUN", "RGTI", "INFA", "AAL",
    "MARA", "BTG", "INTC", "CLF", "PDD", "JWN", "DJT", "AAPL", "OKLO", "SOFI",
    "X", "NIO", "WRD", "ITUB", "SMCI", "NU", "RIOT", "RIVN", "SMR", "AMD",
    "GOOGL", "ACHR", "GME", "UEC", "BBD", "ABEV", "AMZN", "AMCR", "RIG", "BAC",
    "IONQ", "HOOD", "AGNC", "WBD", "PFE", "CLSK", "CNH", "CCL", "IAG", "GRAB",
    "T", "KGC", "HBAN", "HIMS", "ERIC", "AUR", "MRVL", "PSLV", "GOOG", "VALE",
    "NGD", "SNAP", "CX", "RKLB", "BB", "UNH", "HOLX", "HL", "CSX", "NOK", "VOD",
    "PONY", "LYG", "MSFT", "HPE", "TEM", "KMI", "UAA", "ADT", "UBER", "GAP",
    "HAL", "WMB", "LYFT", "HLN", "CMCSA", "CDE", "TEVA", "GGB", "LUMN", "VTRS",
    "NCLH", "NXE", "PCG", "WMT", "MU", "LUV", "SLB", "PTEN", "CPNG"
]

In [6]:
# Entering documents into database based on ticker symbols above. More attributes will
# be added once the database is loaded with the initial data.
for ticker in tickers:
    try:
        # Gather 10 year historical data
        data = yf.download(ticker, period="10y", interval="1d", progress=False)

        if not data.empty:
            # Move 'Date' from index to column
            data.reset_index(inplace=True)
            data["Ticker"] = ticker

            # Convert NaNs to None (MongoDB does not accept NaN)
            data.replace({np.nan: None}, inplace=True)

            # Flatten MultiIndex columns if they exist. This prevents the data from being formatted incorrectly. Each value needed to be a string
            if isinstance(data.columns, pd.MultiIndex):
                data.columns = ['_'.join(filter(None, col)).strip() for col in data.columns]

            attributes = data.to_dict("records")

            # Insert into MongoDB
            db.Finance_Data.insert_many(attributes)
            print(f"Inserted {len(attributes)} attributes for {ticker}.")

        # Timer to prevent rate limit
        time.sleep(3)
    except Exception as e:
        print(f"{ticker} had an error being inserted: {e}")


Inserted 2515 attributes for NVDA.
Inserted 1118 attributes for QBTS.
Inserted 2515 attributes for TSLA.
Inserted 1177 attributes for LCID.
Inserted 2515 attributes for F.
Inserted 1169 attributes for PLTR.
Inserted 772 attributes for SOUN.
Inserted 1029 attributes for RGTI.
Inserted 898 attributes for INFA.
Inserted 2515 attributes for AAL.
Inserted 2515 attributes for MARA.
Inserted 2515 attributes for BTG.
Inserted 2515 attributes for INTC.
Inserted 2515 attributes for CLF.
Inserted 1718 attributes for PDD.
Inserted 2511 attributes for JWN.
Inserted 917 attributes for DJT.
Inserted 2515 attributes for AAPL.
Inserted 976 attributes for OKLO.
Inserted 1104 attributes for SOFI.
Inserted 2515 attributes for X.
Inserted 1685 attributes for NIO.
Inserted 145 attributes for WRD.
Inserted 2515 attributes for ITUB.
Inserted 2515 attributes for SMCI.
Inserted 868 attributes for NU.
Inserted 2303 attributes for RIOT.
Inserted 888 attributes for RIVN.
Inserted 813 attributes for SMR.
Inserted 2

In [7]:
# Restructuring the document fields in the database. From the yfinance API the fields are labeled as "close_{ticker}". This needed to be changed since the subsequent actions need to be easily manipulated across all tickers. Having them be uniform will make that process easier.

rename_fields = ["Open", "High", "Low", "Close", "Volume"]
tickers = db.Finance_Data.distinct("Ticker")

for ticker in tickers:
    suffix = f"_{ticker}"
    for field in rename_fields:
        old_field = f"{field}{suffix}"
        new_field = field
        db.Finance_Data.update_many(
            {old_field: {"$exists": True}},
            {"$rename": {old_field: new_field}}
        )
    print(f"Renamed fields for {ticker}")


Renamed fields for AAL
Renamed fields for AAPL
Renamed fields for ABEV
Renamed fields for ACHR
Renamed fields for ADT
Renamed fields for AGNC
Renamed fields for AMCR
Renamed fields for AMD
Renamed fields for AMZN
Renamed fields for AUR
Renamed fields for BAC
Renamed fields for BB
Renamed fields for BBD
Renamed fields for BTG
Renamed fields for CCL
Renamed fields for CDE
Renamed fields for CLF
Renamed fields for CLSK
Renamed fields for CMCSA
Renamed fields for CNH
Renamed fields for CPNG
Renamed fields for CSX
Renamed fields for CX
Renamed fields for DJT
Renamed fields for ERIC
Renamed fields for F
Renamed fields for GAP
Renamed fields for GGB
Renamed fields for GME
Renamed fields for GOOG
Renamed fields for GOOGL
Renamed fields for GRAB
Renamed fields for HAL
Renamed fields for HBAN
Renamed fields for HIMS
Renamed fields for HL
Renamed fields for HLN
Renamed fields for HOLX
Renamed fields for HOOD
Renamed fields for HPE
Renamed fields for IAG
Renamed fields for INFA
Renamed fields for 