In [1]:
import json
from pathlib import Path
from datetime import datetime, timedelta
import yfinance as yf
import pandas as pd

In [2]:
import sys
import os

current_dir = os.getcwd()
parent_dir = os.path.abspath(os.path.join(current_dir, ".."))
sys.path.append(parent_dir)

In [3]:
from src.utils.config import load_config
from src.data_extractor.tickers import get_most_recent_tickers_file

In [4]:
tickers_folder = "../metadata/tickers_list/"
config_path = "../configs/main.yaml"

In [5]:
config = load_config(config_path)
config.keys()

dict_keys(['metadata', 'stock'])

In [6]:
most_recent_tickers_file = get_most_recent_tickers_file(tickers_folder, config['metadata']['tickers_list_filename'])

In [7]:
with open(most_recent_tickers_file, "r") as f:
    tickers_list = json.load(f)

In [8]:
from src.utils.path import verify_saving_path, verify_existing_path
from src.data_extractor.stocks import update_stock_data, download_stock_data
import shutil

In [17]:
from datetime import datetime, timedelta
import yfinance as yf
from pathlib import Path
import pandas as pd
import sys
import os
from src.utils.path import verify_saving_path, verify_existing_path
import shutil
import numpy as np



        


def update_stock_data(data_folder, stock_data_start_date, keep_n_archived=3, column="Adj Close"):

    # Check the input
    verify_existing_path(data_folder)

    if not isinstance(stock_data_start_date, str):
        raise TypeError(f"Expected stock_data_start_date to be a string. Got {type(stock_data_start_date).__name__}.")

    if not isinstance(keep_n_archived, int):
        raise TypeError(f"Expected keep_n_archived to be an integer. Got {type(keep_n_archived).__name__}.")

    if not isinstance(column, str):
        raise TypeError(f"Expected column to be a string. Got {type(column).__name__}.")
    

    # Convert the data folder to Path object
    data_folder_path = Path(data_folder)

    # Get todays date
    todays_date = datetime.today().strftime("%Y-%m-%d")
    print(f"Todays date: {todays_date}")

    # List to store tickers for update
    tickers_to_update = []

    # For each ticker folder
    for ticker_index, ticker_folder in enumerate(data_folder_path.glob("*")):

        # --------------------- Get ticker and data information ----------------------- 

        # Get the ticker name from the folder
        ticker = ticker_folder.stem

        print(f"Processing ticker {ticker_index}: {ticker}")

        # Get sorted non-archived csv files
        all_ticker_csv_files = sorted(ticker_folder.glob(f"{ticker}_*.csv"))


        # --------------------- If the most recent file exists and it's date is todays date, skip. Else, add it to the update list -----------------------

        if (all_ticker_csv_files and all_ticker_csv_files[-1].stem.split("_")[-1] == todays_date):
            continue

        else:
            tickers_to_update.append(ticker)
            
    # If no stock data found to be update, quit the function
    if not tickers_to_update:
        return


    # --------------------- Update the tickers in the list -----------------------
    updated_stock_data = download_stock_data(ticker=tickers_to_update,
                               stock_data_start_date=stock_data_start_date,
                               stock_data_end_date=todays_date,
                               column=column) 

    # Remove any stocks that have missing data
    updated_stock_data = updated_stock_data.loc[:, updated_stock_data.isna().sum() == 0]

    # Save each ticker data separately
    for ticker in updated_stock_data.columns:

        # Define the folder for that specific ticker
        ticker_data_save_path = data_folder_path / ticker

        # Get sorted ticker csv files
        all_ticker_csv_files = sorted(ticker_data_save_path.glob(f"{ticker}_*.csv"))

        # Save the updated stock data
        updated_stock_data[[ticker]].to_csv(str(ticker_data_save_path / f"{ticker}_{todays_date}.csv"))

        # Delete the old stock data
        remove_file(all_ticker_csv_files[-1])


    # --------------------- Create or update archives -----------------------

    # Iterate over the updated data
    for ticker_index, ticker_folder in enumerate(data_folder_path.glob("*")):

        # Get all archived csv files and ticker data
        all_archive_csv_files = sorted(ticker_folder.glob(f"archive_*.csv"))
        all_ticker_csv_files = sorted(ticker_folder.glob(f"{ticker}_*.csv"))

        # Get the most recent csv file
        most_recent_ticker_file = all_ticker_csv_files[-1]

        # Get the ticker name from the folder
        ticker = ticker_folder.stem
            
        # If no archived files
        if not all_archive_csv_files:
            
            print("No archived files")
            
            # Define a new filename to archive the current most recent ticker file
            save_a_copy = most_recent_ticker_file.with_name("archive_" + most_recent_ticker_file.stem + most_recent_ticker_file.suffix)
            
            # Copy the most recent ticker file
            copy_file(most_recent_ticker_file, save_a_copy)
    
            # Append the new archive file to the all_archive_csv_files
            all_archive_csv_files.append(save_a_copy)

        else:
        
            # --------------------- Check if the most recent archived data was a week ago, create a new archive file ----------------------- 
            most_recent_archived_file = all_archive_csv_files[-1]
        
            # The date of the most recent archived file
            most_recent_archived_file_date = most_recent_archived_file.stem.split("_")[-1]
            most_recent_archived_date = datetime.strptime(most_recent_archived_file_date, "%Y-%m-%d").date()

            # Check if the most recent archived file date was a week ago, create a new archive file
            if datetime.today().date() >= most_recent_archived_date + timedelta(weeks=1):

                print("There is an archived file and it's date is a week before. Time to create a new one")
        
                # Define a new filename to archive the current most recent ticker file
                new_archive = most_recent_ticker_file.with_name("archive_" + most_recent_ticker_file.stem + most_recent_ticker_file.suffix)
                
                # Copy the most recent ticker file
                copy_file(most_recent_ticker_file, new_archive)

                # Add the new archive to the archive list
                all_archive_csv_files.append(new_archive)

            
            # --------------------- If there are more than "keep_n_archived" archived data files, remove oldest ones ----------------------- 
            
            # If there are more than "keep_n_archived" archived files, remove all
            n_archived_files = len(all_archive_csv_files)
            while n_archived_files > keep_n_archived:
        
                # Get the oldest archive file
                oldest_archive_file = all_archive_csv_files[0]
        
                # Remove the oldest archive file
                remove_file(oldest_archive_file)
        
                # Check the number of archived files again
                all_archive_csv_files = sorted(ticker_folder.glob(f"archive_*.csv"))
                n_archived_files = len(all_archive_csv_files)
        



In [18]:
update_stock_data("../data/", stock_data_start_date = "2010-01-01")

Todays date: 2025-07-24
Processing ticker 0: A
Processing ticker 1: AAPL
Processing ticker 2: ABT
Processing ticker 3: ACGL
Processing ticker 4: ACN
Processing ticker 5: ADBE
Processing ticker 6: ADI
Processing ticker 7: ADM
Processing ticker 8: ADP
Processing ticker 9: ADSK
Processing ticker 10: AEE
Processing ticker 11: AEP
Processing ticker 12: AES
Processing ticker 13: AFL
Processing ticker 14: AIG
Processing ticker 15: AIZ
Processing ticker 16: AJG
Processing ticker 17: AKAM
Processing ticker 18: ALB
Processing ticker 19: ALGN
Processing ticker 20: ALL
Processing ticker 21: AMAT
Processing ticker 22: AMD
Processing ticker 23: AME
Processing ticker 24: AMGN
Processing ticker 25: AMP
Processing ticker 26: AMT
Processing ticker 27: AMZN
Processing ticker 28: AON
Processing ticker 29: AOS
Processing ticker 30: APA
Processing ticker 31: APD
Processing ticker 32: APH
Processing ticker 33: ARE
Processing ticker 34: ATO
Processing ticker 35: AVB
Processing ticker 36: AVGO
Processing ticke