In [44]:
import pandas as pd
import sqlite3
import requests
import yfinance as yf

In [None]:
def fetch_intraday_data(ticker: str, interval: str = '1min', api_key: str = 'CL0DPF39WZVEOO0U') -> pd.DataFrame:
    url = f"https://www.alphavantage.co/query?function=TIME_SERIES_INTRADAY&symbol={ticker}&interval={interval}&apikey={api_key}&outputsize=full"
    response = requests.get(url)
    data = response.json()

    time_series = data.get(f"Time Series ({interval})", {})
    if not time_series:
        print(f"No data returned for {ticker}")
        return None
    
    df = pd.DataFrame(time_series).transpose()
    df.index.name = 'timestamp'
    df.columns = ["open", "high", "low", "close", "volume"]
    df = df.sort_index()
    df.reset_index(inplace=True)
    df['ticker'] = ticker

    return df

In [104]:
def fetch_intraday_data(ticker: str, interval: str = '1m') -> pd.DataFrame:
    tick = yf.Ticker(ticker)
    df = tick.history(interval=interval, period='1d')
    df.drop(columns=['Dividends', 'Stock Splits'], inplace=True)   
    df.columns = ["open", "high", "low", "close", "volume"]
    df.index.name = "timestamp"
    df.reset_index(inplace=True)
    df['ticker'] = ticker
    
    return df

In [105]:
conn = sqlite3.connect('intraday_stock_prices.db')
cursor = conn.cursor()

cursor.execute('''
    CREATE TABLE IF NOT EXISTS intraday_prices (
        ticker TEXT,
        timestamp TEXT,
        open REAL,
        high REAL,
        low REAL,
        close REAL,
        volume INTEGER
    )
''')

conn.commit()

In [106]:
# ensure no duplicates
def get_latest_timestamp(ticker: str):
    query = """
    SELECT MAX(timestamp) FROM intraday_prices WHERE ticker = ?
    """
    cursor.execute(query, (ticker,))
    result = cursor.fetchone()[0]
    return result

In [107]:
def store_data_to_db(data, latest_timestamp: str):
    if data is not None:
        if latest_timestamp:
            data = data[data["timestamp"] > latest_timestamp]
        if not data.empty:
            data.to_sql('intraday_prices', conn, if_exists='append', index=False)
        else:
            print("No new data to insert.")
    else:
        print("No data to store.")

In [112]:
def main():
    tickers = ['AAPL', 'MSFT']
    for ticker in tickers:
        latest_timestamp = get_latest_timestamp(ticker)
        data = fetch_intraday_data(ticker)
        store_data_to_db(data, latest_timestamp)

if __name__ == "__main__":
    main()
    

No new data to insert.
No new data to insert.


In [109]:
def query_data(ticker, start_time, end_time):
    query = f"""
    SELECT * FROM intraday_prices
    WHERE ticker = '{ticker}' AND timestamp BETWEEN '{start_time}' AND '{end_time}'
    """
    df = pd.read_sql_query(query, conn)
    return df

In [111]:
query_data('AAPL', '2025-11-17 09:34:00-05:00', '2025-11-17 09:30:00-05:00')

Unnamed: 0,ticker,timestamp,open,high,low,close,volume


In [113]:
tick = yf.Ticker("MSFT")
df = tick.history(interval="1m", period='1d')

In [114]:
df

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2025-11-17 09:30:00-05:00,508.445007,508.640015,506.179993,508.510101,849389,0.0,0.0
2025-11-17 09:31:00-05:00,508.549988,509.149994,507.549988,508.359985,80013,0.0,0.0
2025-11-17 09:32:00-05:00,508.480011,510.350006,508.230011,509.940002,120479,0.0,0.0
2025-11-17 09:33:00-05:00,509.980011,510.429993,509.670013,510.029999,60649,0.0,0.0
2025-11-17 09:34:00-05:00,510.049988,510.290009,509.720001,509.970001,30449,0.0,0.0
...,...,...,...,...,...,...,...
2025-11-17 15:55:00-05:00,506.339996,506.929993,506.304688,506.739990,55707,0.0,0.0
2025-11-17 15:56:00-05:00,506.750000,506.845001,506.459991,506.690002,51136,0.0,0.0
2025-11-17 15:57:00-05:00,506.679993,506.989990,506.239990,506.899994,73247,0.0,0.0
2025-11-17 15:58:00-05:00,506.920013,507.454987,506.829987,507.454987,102878,0.0,0.0


In [87]:
df.drop(columns=['Dividends', 'Stock Splits'], inplace=True)


In [99]:
df.columns = ["open", "high", "low", "close", "volume"]
df.index.name = "timestamp"

In [65]:
df.columns = ["open", "high", "low", "close", "volume"]
df.rename(columns={"index": "timestamp"}, inplace=True)

In [100]:
df.index.name

'timestamp'

In [102]:
df.reset_index(inplace=True)

In [103]:
df

Unnamed: 0,timestamp,open,high,low,close,volume
0,2025-11-17 09:30:00-05:00,508.445007,508.640015,506.179993,508.510101,849389
1,2025-11-17 09:31:00-05:00,508.549988,509.149994,507.549988,508.359985,80013
2,2025-11-17 09:32:00-05:00,508.480011,510.350006,508.230011,509.940002,120479
3,2025-11-17 09:33:00-05:00,509.980011,510.429993,509.670013,510.029999,60649
4,2025-11-17 09:34:00-05:00,510.049988,510.290009,509.720001,509.970001,30449
...,...,...,...,...,...,...
383,2025-11-17 15:55:00-05:00,506.339996,506.929993,506.304688,506.739990,55707
384,2025-11-17 15:56:00-05:00,506.750000,506.845001,506.459991,506.690002,51136
385,2025-11-17 15:57:00-05:00,506.679993,506.989990,506.239990,506.899994,73247
386,2025-11-17 15:58:00-05:00,506.920013,507.454987,506.829987,507.454987,102878
