## **Imports**

In [25]:
from dotenv import load_dotenv
import os
import pandas as pd
from pymongo import MongoClient
import kagglehub
from kaggle.api.kaggle_api_extended import KaggleApi

In [17]:
load_dotenv()

True

## **MongoDB Setup**
- sylvr_finance_db

In [14]:
user = os.getenv("MONGO_USER")
password = os.getenv("MONGO_PASS")

uri = f"mongodb+srv://{user}:{password}@sylvr-financial-cluster.jz9cn66.mongodb.net/?retryWrites=true&w=majority"
client = MongoClient(uri)

In [29]:
print(client.list_database_names()) 

['sample_analytics', 'sylvr_finance_db', 'admin', 'local']


In [30]:
DB_NAME = os.getenv("DATABASE_NAME")
db = client[DB_NAME]

def get_collection(collection_name):
    return db[collection_name]

In [31]:
print(db.list_collection_names())

['stocks', 'EFTs']


In [32]:
source_db = client["sample_analytics"]
target_db = client["sylvr_finance_db"]

collections = source_db.list_collection_names()

for col_name in collections:
    source_col = source_db[col_name]
    target_col = target_db[col_name]

    documents = source_col.find()
    if documents:
        target_col.insert_many(documents)
        print(f"Moved collection '{col_name}' with {source_col.count_documents({})} documents.")

    source_col.drop()

Moved collection 'transactions' with 1746 documents.
Moved collection 'accounts' with 1746 documents.
Moved collection 'customers' with 500 documents.


## **Kaggle Dataset**

In [26]:
api = KaggleApi()
api.authenticate()

In [27]:
save_path = "Dataset"
os.makedirs(save_path, exist_ok=True)

In [28]:
api.dataset_download_files("borismarjanovic/price-volume-data-for-all-us-stocks-etfs", path=save_path, unzip=True)

Dataset URL: https://www.kaggle.com/datasets/borismarjanovic/price-volume-data-for-all-us-stocks-etfs


## **Uploading to Database**

In [35]:
def insert_stock_file(file_path, collection):
    symbol = os.path.splitext(os.path.basename(file_path))[0]
    
    try:
        df = pd.read_csv(file_path)
        if df.empty:
            print(f"⚠️ Skipped empty file: {file_path}")
            return
        if 'Date' in df.columns:
            df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
        records = df.to_dict(orient='records')
        for record in records:
            record['symbol'] = symbol
        collection.insert_many(records)
        print(f"✅ Inserted {len(records)} records for {symbol}")
    
    except pd.errors.EmptyDataError:
        print(f"⚠️ EmptyDataError: Skipped file with no data: {file_path}")
    
    except Exception as e:
        print(f"❌ Failed to process {file_path}: {e}")




def load_all_stock_data(folder_path, target_collection):
    collection = db[target_collection]
    for file in os.listdir(folder_path):
        if file.endswith(".txt"):
            insert_stock_file(os.path.join(folder_path, file), collection)

In [36]:
if __name__ == "__main__":
    STOCKS_DIR = r"Dataset\Stocks"
    ETFS_DIR = r"Dataset\ETFs"
    
    print("Loading Stocks...")
    load_all_stock_data(STOCKS_DIR, "stocks")
    
    print("Loading ETFs...")
    load_all_stock_data(ETFS_DIR, "EFTs")

    print("All data uploaded!")

Loading Stocks...
✅ Inserted 505 records for aaap.us
✅ Inserted 785 records for aac.us
✅ Inserted 989 records for aal.us
✅ Inserted 1211 records for aamc.us
✅ Inserted 1041 records for aaoi.us
✅ Inserted 1717 records for aat.us
✅ Inserted 1827 records for abac.us
✅ Inserted 1224 records for abbv.us
✅ Inserted 1970 records for abcd.us
✅ Inserted 885 records for abdc.us
✅ Inserted 729 records for abeo.us
✅ Inserted 515 records for abeow.us
✅ Inserted 470 records for abil.us
✅ Inserted 13 records for ablx.us
✅ Inserted 875 records for abrn.us
✅ Inserted 1144 records for abr_a.us
✅ Inserted 1042 records for abr_b.us
✅ Inserted 832 records for abr_c.us
✅ Inserted 529 records for abtx.us
✅ Inserted 862 records for aby.us
✅ Inserted 491 records for ac.us
✅ Inserted 512 records for acbi.us
⚠️ EmptyDataError: Skipped file with no data: Dataset\Stocks\accp.us.txt
✅ Inserted 635 records for acerw.us
✅ Inserted 1552 records for acfc.us
✅ Inserted 59 records for acglo.us
✅ Inserted 280 records for 