In [1]:
import yfinance as yf
import pandas as pd
from google.colab import drive
import os
from datetime import datetime
import pytz
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [9]:
# Function to load existing tickers from log.csv
def load_existing_tickers(log_file):
    if not os.path.exists(log_file):
        return dict()
    existing_tickers = dict()
    with open(log_file, 'r') as file:
        lines = file.readlines()
        for line in lines[1:]:  # Skip the header line
            ticker, last_updated = line.strip().split(',')
            existing_tickers[ticker] = datetime.strptime(last_updated, '%Y-%m-%d')
    return existing_tickers

# Function to update log.csv with new tickers
def update_log(log_file, new_ticker, last_updated):
    with open(log_file, 'a') as file:
        file.write(f"{new_ticker},{last_updated}\n")

def remove_duplicates_from_log(log_file):
    if not os.path.exists(log_file):
        return

    log_data = {}
    with open(log_file, 'r') as file:
        lines = file.readlines()
        for line in lines[1:]:
            ticker, last_updated = line.strip().split(',')
            log_data.setdefault(ticker, []).append(last_updated)

    with open(log_file, 'w') as file:
        file.write("Ticker,LastUpdated\n")
        for ticker, dates in log_data.items():
            latest_date = max(dates)
            file.write(f"{ticker},{latest_date}\n")

# Function to fetch and store stock price data
def fetch_stock_data(ticker_list, storage_folder, tracking_directory):
    log_file = os.path.join(tracking_directory, 'log.csv')
    existing_tickers = load_existing_tickers(log_file)

    for ticker in ticker_list:
        if ticker not in existing_tickers:
            # Fetch data from Yahoo Finance
            df = yf.download(ticker, end=datetime.today().strftime('%Y-%m-%d'))

            # Group data by year
            data_by_year = df.groupby(df.index.year)

            # Create a folder for each ticker
            ticker_directory = os.path.join(storage_folder, ticker[0], ticker)
            os.makedirs(ticker_directory, exist_ok=True)

            # Save data into separate CSV files for each year in the ticker folder
            for year, year_data in data_by_year:
                year_file_name = os.path.join(ticker_directory, f"{year}.csv")
                year_data.to_csv(year_file_name)

            # Update the log file
            update_log(log_file, ticker, datetime.today().strftime('%Y-%m-%d'))
            print(f"Data for {ticker} saved to findata folder")
        else:
            print(f"Data for {ticker} already exists. Updating...")
            # Fetch data for the existing ticker from the last updated date onwards
            last_updated = existing_tickers[ticker]
            start_date = f"{last_updated.year}-01-01"
            df = yf.download(ticker, start=start_date)

            # Group data by year
            data_by_year = df.groupby(df.index.year)

            # Create a folder for each ticker
            ticker_directory = os.path.join(storage_folder, ticker[0], ticker)
            os.makedirs(ticker_directory, exist_ok=True)

            # Save data into separate CSV files for each year in the ticker folder
            for year, year_data in data_by_year:
                year_file_name = os.path.join(ticker_directory, f"{year}.csv")
                year_data.to_csv(year_file_name)

            # Update the log file
            update_log(log_file, ticker, datetime.today().strftime('%Y-%m-%d'))
            print(f"Data for {ticker} updated from {last_updated.strftime('%Y-%m-%d')} to today.")

if __name__ == "__main__":
    storage_folder = "/content/drive/MyDrive/findata/eoddata"  # Change this to your desired storage folder
    os.makedirs(storage_folder, exist_ok=True)

    # Create a directory to store the tracking file
    tracking_directory = "/content/drive/MyDrive/findata/log"
    os.makedirs(tracking_directory, exist_ok=True)

    # Prompt the user to upload a CSV file with a list of tickers
    #csv_file_path = input("Please enter the path to the CSV file containing tickers: ")

    # if not os.path.exists(csv_file_path):
    #     print("CSV file not found.")
    # else:
        # Read tickers from the CSV file
    ticker_list = ["AAPL", "MSFT", "GOOGL"]#pd.read_csv(csv_file_path)['Ticker'].tolist()
    fetch_stock_data(ticker_list, storage_folder, tracking_directory)

    # Run the function to remove duplicates from the log file
    log_file = os.path.join(tracking_directory, 'log.csv')
    remove_duplicates_from_log(log_file)


Data for AAPL already exists. Updating...
[*********************100%%**********************]  1 of 1 completed
Data for AAPL updated from 2023-09-28 to today.
Data for MSFT already exists. Updating...
[*********************100%%**********************]  1 of 1 completed
Data for MSFT updated from 2023-09-28 to today.
Data for GOOGL already exists. Updating...
[*********************100%%**********************]  1 of 1 completed
Data for GOOGL updated from 2023-09-28 to today.
