# Fetching data from the last point every 5 days auto

In [6]:
# !pip install schedule

In [None]:
import yfinance as yf
import pandas as pd
import datetime
import os
import schedule
import time

def fetch_intraday_data(ticker, start_date, end_date):
    ticker_obj = yf.Ticker(ticker)
    df = ticker_obj.history(interval='1m', start=start_date, end=end_date)
    
    # Removing unwanted columns
    columns_to_remove = ['Dividends', 'Stock Splits']
    df.drop(columns_to_remove, axis=1, inplace=True)
    
    # Reset the index to get the date and time as separate columns
    df.reset_index(inplace=True)
    
    # Extract date and time into separate columns
    df['Date'] = df['Datetime'].dt.date
    df['Time'] = df['Datetime'].dt.time
    
    # Drop the original Datetime column
    df.drop('Datetime', axis=1, inplace=True)
    
    return df
        
        
def update_intraday_data_for_ticker(ticker):
    # Determine the CSV file name
    csv_file_name = ticker + '_Data_From_20230812.csv'

    # Check if the CSV file already exists
    if os.path.exists(csv_file_name):
        # Load existing data from CSV and get the last datetime
        existing_df = pd.read_csv(csv_file_name)
        latest_datetime = pd.to_datetime(existing_df['Date'] + ' ' + existing_df['Time']).max()
        
        # Define date range for fetching new data
        end_date = datetime.datetime.now().date()
        start_date = (latest_datetime + datetime.timedelta(days=1)).date()
        
        if start_date < end_date:
            # Fetch intraday data
            intraday_data = fetch_intraday_data(ticker, start_date, end_date)
            
            # Append new data to the CSV file
            intraday_data.to_csv(csv_file_name, mode='a', index=False, header=False)
            print(f"{len(intraday_data)} new records added to the {csv_file_name} file.")
        else:
            print(f"No new records to add for {ticker}.")
    elif not os.path.exists(csv_file_name):
        
        end_timestamp = pd.Timestamp(datetime.datetime.now())
        start_date = (end_timestamp - datetime.timedelta(days=6)).date()
        end_date = datetime.datetime.now().date()
        
        intraday_data = fetch_intraday_data(ticker, start_date, end_date)
        
        # Save the DataFrame to a CSV file if the file doesn't exist
        intraday_data.to_csv(csv_file_name, index=False)
        print(f"{len(intraday_data)} new records added to a new  {csv_file_name} file.")
    else:
        print(f"No existing data found for {ticker}.")
        

# List of stock symbols
symbols = ["AAPL", "ORCL", "MSFT"]  # Add more symbols as needed

# Function to perform the initial data update for all symbols
def initial_data_update():
    for symbol in symbols:
        update_intraday_data_for_ticker(symbol)

# Schedule the automatic update every 5 days
for symbol in symbols:
    schedule.every(5).days.do(update_intraday_data_for_ticker, symbol)

# Perform the initial data update
initial_data_update()

# Run the scheduled tasks
while True:
    schedule.run_pending()
    time.sleep(1)