In [1]:
import requests
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import yfinance as yf
import datetime
import os
from dotenv import load_dotenv
import sys
load_dotenv()

ALPHA_VANTAGE_API_KEY = os.getenv("ALPHA_VANTAGE_API_KEY")

sys.path.append("../../")
from src.config.paths import *
from src.config.settings import *


In [11]:
import time
import logging
from datetime import datetime, timedelta
from src.utils.log_utils import setup_logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = setup_logging(__name__)

def fetch_market_intraday(ticker: str, interval: str = "1min", output_dir: Path = RAW_INTRADAY, api_key: str = ALPHA_VANTAGE_API_KEY, datatype: str = "csv", adjusted: bool = True, extended_hours: bool = True, outputsize: str = "full"):
    """
    Fetch intraday market data for a given ticker from Alpha Vantage.
    For 1min interval, fetches data month by month for the past year.
    For other intervals, fetches the most recent 30 days of data.

    Args:
        ticker (str): The ticker symbol of the stock to fetch data for.
        interval (str): Time interval between data points. One of: 1min, 5min, 15min, 30min, 60min
        output_dir (Path): The directory to save the fetched data.
        api_key (str): Alpha Vantage API key
        datatype (str): Return data format - 'json' or 'csv'
        adjusted (bool): Whether to adjust for splits/dividends. Default True
        extended_hours (bool): Whether to include extended trading hours. Default True
        outputsize (str): Amount of data to return - 'compact' or 'full'. Default 'full'

    Returns:
        Path: Path to the saved data file
    """
    logger.info(f"Starting data fetch for {ticker} with {interval} interval")
    data_file = output_dir / f"{ticker}_{interval}.{datatype}"
    
    # For 1min interval, fetch month by month for past year
    if interval == "1min":
        logger.info("Using 1min interval - fetching data month by month for past year")
        # Get list of months to fetch (past 12 months)
        months = []
        current_date = datetime.now()
        for i in range(12):
            first_of_month = current_date.replace(day=1)
            months.append(first_of_month.strftime("%Y-%m"))
            current_date = first_of_month - timedelta(days=1)
        
        logger.info(f"Will fetch data for months: {months}")
        
        # Fetch data for each month
        all_data = []
        header = None
        for month in months:
            logger.info(f"Fetching data for {ticker} - {month}")
            params = {
                'function': 'TIME_SERIES_INTRADAY',
                'symbol': ticker,
                'interval': interval,
                'apikey': api_key,
                'datatype': datatype,
                'adjusted': str(adjusted).lower(),
                'extended_hours': str(extended_hours).lower(),
                'outputsize': outputsize,
                'month': month
            }
            
            url = 'https://www.alphavantage.co/query'
            r = requests.get(url, params=params)
            
            # Parse CSV data
            lines = r.text.split('\n')
            if not header:
                header = lines[0]
                all_data.append(header)
            data_lines = lines[1:]
            all_data.extend(data_lines)
            
            logger.info(f"Retrieved {len(data_lines)} data points for {month}")
            
            # Alpha Vantage has rate limits, so wait between requests
            # logger.info("Waiting 12 seconds before next request due to rate limits...")
            # time.sleep(12)  # Wait 12 seconds between requests
            
    else:
        logger.info(f"Using {interval} interval - fetching most recent data")
        # For other intervals, just fetch most recent data
        params = {
            'function': 'TIME_SERIES_INTRADAY',
            'symbol': ticker,
            'interval': interval,
            'apikey': api_key,
            'datatype': datatype,
            'adjusted': str(adjusted).lower(),
            'extended_hours': str(extended_hours).lower(),
            'outputsize': outputsize
        }
        
        url = 'https://www.alphavantage.co/query'
        r = requests.get(url, params=params)
        all_data = r.text.split('\n')
        logger.info(f"Retrieved {len(all_data)-1} data points")

    # Write combined data to file
    logger.info(f"Writing data to {data_file}")
    with open(data_file, "w") as f:
        f.write('\n'.join(all_data))

    logger.info("Data fetch completed successfully")
    return data_file

# # Example usage
# for ticker in DJ_TITANS_50_TICKER:
fetch_market_intraday("VIXM", interval="1min", outputsize="full")
# fetch_market_intraday("AAPL", interval="1min", outputsize="full")


2025-02-17 16:45:28,428 - __main__ - INFO - Starting data fetch for VIXM with 1min interval


2025-02-17 16:45:28,428 - INFO - Starting data fetch for VIXM with 1min interval


2025-02-17 16:45:28,430 - __main__ - INFO - Using 1min interval - fetching data month by month for past year


2025-02-17 16:45:28,430 - INFO - Using 1min interval - fetching data month by month for past year


2025-02-17 16:45:28,430 - __main__ - INFO - Will fetch data for months: ['2025-02', '2025-01', '2024-12', '2024-11', '2024-10', '2024-09', '2024-08', '2024-07', '2024-06', '2024-05', '2024-04', '2024-03']


2025-02-17 16:45:28,430 - INFO - Will fetch data for months: ['2025-02', '2025-01', '2024-12', '2024-11', '2024-10', '2024-09', '2024-08', '2024-07', '2024-06', '2024-05', '2024-04', '2024-03']


2025-02-17 16:45:28,431 - __main__ - INFO - Fetching data for VIXM - 2025-02


2025-02-17 16:45:28,431 - INFO - Fetching data for VIXM - 2025-02


2025-02-17 16:45:29,384 - __main__ - INFO - Retrieved 1076 data points for 2025-02


2025-02-17 16:45:29,384 - INFO - Retrieved 1076 data points for 2025-02


2025-02-17 16:45:29,384 - __main__ - INFO - Fetching data for VIXM - 2025-01


2025-02-17 16:45:29,384 - INFO - Fetching data for VIXM - 2025-01


2025-02-17 16:45:30,271 - __main__ - INFO - Retrieved 2238 data points for 2025-01


2025-02-17 16:45:30,271 - INFO - Retrieved 2238 data points for 2025-01


2025-02-17 16:45:30,272 - __main__ - INFO - Fetching data for VIXM - 2024-12


2025-02-17 16:45:30,272 - INFO - Fetching data for VIXM - 2024-12


2025-02-17 16:45:31,227 - __main__ - INFO - Retrieved 2406 data points for 2024-12


2025-02-17 16:45:31,227 - INFO - Retrieved 2406 data points for 2024-12


2025-02-17 16:45:31,227 - __main__ - INFO - Fetching data for VIXM - 2024-11


2025-02-17 16:45:31,227 - INFO - Fetching data for VIXM - 2024-11


2025-02-17 16:45:32,349 - __main__ - INFO - Retrieved 2288 data points for 2024-11


2025-02-17 16:45:32,349 - INFO - Retrieved 2288 data points for 2024-11


2025-02-17 16:45:32,350 - __main__ - INFO - Fetching data for VIXM - 2024-10


2025-02-17 16:45:32,350 - INFO - Fetching data for VIXM - 2024-10


2025-02-17 16:45:33,277 - __main__ - INFO - Retrieved 2103 data points for 2024-10


2025-02-17 16:45:33,277 - INFO - Retrieved 2103 data points for 2024-10


2025-02-17 16:45:33,278 - __main__ - INFO - Fetching data for VIXM - 2024-09


2025-02-17 16:45:33,278 - INFO - Fetching data for VIXM - 2024-09


2025-02-17 16:45:34,202 - __main__ - INFO - Retrieved 2290 data points for 2024-09


2025-02-17 16:45:34,202 - INFO - Retrieved 2290 data points for 2024-09


2025-02-17 16:45:34,202 - __main__ - INFO - Fetching data for VIXM - 2024-08


2025-02-17 16:45:34,202 - INFO - Fetching data for VIXM - 2024-08


2025-02-17 16:45:35,205 - __main__ - INFO - Retrieved 2972 data points for 2024-08


2025-02-17 16:45:35,205 - INFO - Retrieved 2972 data points for 2024-08


2025-02-17 16:45:35,206 - __main__ - INFO - Fetching data for VIXM - 2024-07


2025-02-17 16:45:35,206 - INFO - Fetching data for VIXM - 2024-07


2025-02-17 16:45:36,107 - __main__ - INFO - Retrieved 1632 data points for 2024-07


2025-02-17 16:45:36,107 - INFO - Retrieved 1632 data points for 2024-07


2025-02-17 16:45:36,108 - __main__ - INFO - Fetching data for VIXM - 2024-06


2025-02-17 16:45:36,108 - INFO - Fetching data for VIXM - 2024-06


2025-02-17 16:45:36,881 - __main__ - INFO - Retrieved 1218 data points for 2024-06


2025-02-17 16:45:36,881 - INFO - Retrieved 1218 data points for 2024-06


2025-02-17 16:45:36,882 - __main__ - INFO - Fetching data for VIXM - 2024-05


2025-02-17 16:45:36,882 - INFO - Fetching data for VIXM - 2024-05


2025-02-17 16:45:37,380 - __main__ - INFO - Retrieved 1586 data points for 2024-05


2025-02-17 16:45:37,380 - INFO - Retrieved 1586 data points for 2024-05


2025-02-17 16:45:37,380 - __main__ - INFO - Fetching data for VIXM - 2024-04


2025-02-17 16:45:37,380 - INFO - Fetching data for VIXM - 2024-04


2025-02-17 16:45:38,345 - __main__ - INFO - Retrieved 1982 data points for 2024-04


2025-02-17 16:45:38,345 - INFO - Retrieved 1982 data points for 2024-04


2025-02-17 16:45:38,345 - __main__ - INFO - Fetching data for VIXM - 2024-03


2025-02-17 16:45:38,345 - INFO - Fetching data for VIXM - 2024-03


2025-02-17 16:45:39,222 - __main__ - INFO - Retrieved 1407 data points for 2024-03


2025-02-17 16:45:39,222 - INFO - Retrieved 1407 data points for 2024-03


2025-02-17 16:45:39,223 - __main__ - INFO - Writing data to /Users/benpfeffer/Desktop/CODE/multi-factor-volatility-driven-liquidity-arbitrage/db/raw/intraday/VIXM_1min.csv


2025-02-17 16:45:39,223 - INFO - Writing data to /Users/benpfeffer/Desktop/CODE/multi-factor-volatility-driven-liquidity-arbitrage/db/raw/intraday/VIXM_1min.csv


2025-02-17 16:45:39,224 - __main__ - INFO - Data fetch completed successfully


2025-02-17 16:45:39,224 - INFO - Data fetch completed successfully


PosixPath('/Users/benpfeffer/Desktop/CODE/multi-factor-volatility-driven-liquidity-arbitrage/db/raw/intraday/VIXM_1min.csv')

In [8]:
def search_symbol(keywords: str, api_key: str = ALPHA_VANTAGE_API_KEY, datatype: str = "csv") -> dict:
    """
    Search for stock symbols and company information using Alpha Vantage's search endpoint.
    
    Args:
        keywords (str): Search keywords/terms
        api_key (str): Alpha Vantage API key
        datatype (str): Return data format - 'json' or 'csv'
    
    Returns:
        dict: Search results containing matching symbols and company information
    """
    base_url = "https://www.alphavantage.co/query"
    
    params = {
        "function": "SYMBOL_SEARCH",
        "keywords": keywords,
        "apikey": api_key,
        "datatype": datatype
    }
    
    try:
        response = requests.get(base_url, params=params)
        response.raise_for_status()
        
        if datatype == "csv":
            return response.text.split("\r\n")
        else:
            return response.text
            
    except requests.exceptions.RequestException as e:
        print(f"Error fetching data: {e}")
        return None

# Example usage
search_results = search_symbol("vix")
print(search_results)


['symbol,name,type,region,marketOpen,marketClose,timezone,currency,matchScore', 'VIXM,ProShares VIX Mid-Term Futures ETF,ETF,United States,09:30,16:00,UTC-04,USD,0.8571', 'VIXY,ProShares VIX Short-Term Futures ETF,ETF,United States,09:30,16:00,UTC-04,USD,0.8571', 'VIXL.LON,S&P 500 VIX Short-term Futures Index (0930-1600 EST),ETF,United Kingdom,08:00,16:30,UTC+01,USD,0.6667', 'VXSYF,ViXS Systems Inc.,Equity,United States,09:30,16:00,UTC-04,USD,0.5000', '']


In [4]:
from src.config.paths import *
import requests
import datetime
from datetime import datetime, timedelta
import time
import csv
import json

def fetch_market_sentiment(tickers: str = None, 
                         topics: str = None,
                         time_from: str = None,
                         time_to: str = None,
                         sort: str = "LATEST",
                         limit: int = 1000,
                         api_key: str = ALPHA_VANTAGE_API_KEY,
                         output_dir: Path = NEWS_CACHE) -> dict:
    """
    Fetch market news and sentiment data from Alpha Vantage for at least one year.
    Rate limited to 75 requests per minute.
    
    Args:
        tickers (str, optional): Comma-separated stock/crypto/forex symbols to filter for
        topics (str, optional): Comma-separated news topics to filter for
        time_from (str, optional): Start time in YYYYMMDDTHHMM format
        time_to (str, optional): End time in YYYYMMDDTHHMM format  
        sort (str, optional): Sort order - 'LATEST', 'EARLIEST' or 'RELEVANCE'
        limit (int, optional): Number of results to return (max 1000)
        api_key (str): Alpha Vantage API key
        output_dir (Path): Directory to save output files
        
    Returns:
        dict: Market news and sentiment data
    """
    base_url = "https://www.alphavantage.co/query"
    
    # Calculate date one year ago
    if not time_from:
        one_year_ago = datetime.now() - timedelta(days=365)
        time_from = one_year_ago.strftime("%Y%m%dT%H%M")
    
    params = {
        "function": "NEWS_SENTIMENT",
        "apikey": api_key,
        "sort": sort,
        "limit": limit,
        "time_from": time_from
    }
    
    if tickers:
        params["tickers"] = tickers
    if topics:
        params["topics"] = topics
    if time_to:
        params["time_to"] = time_to
        
    try:
        # Add rate limiting delay to stay under 75 requests/min
        time.sleep(0.8)  # 800ms delay between requests
        
        response = requests.get(base_url, params=params)
        response.raise_for_status()
        data = response.json()
        
        # Save to JSON file
        if tickers:
            output_file = output_dir / f"{tickers}_sentiment.json"
            with open(output_file, 'w') as f:
                json.dump(data, f, indent=4)
                
        return data
            
    except requests.exceptions.RequestException as e:
        print(f"Error fetching sentiment data: {e}")
        return None

for ticker in DJ_TITANS_50_TICKER:
    # Example usage
    sentiment_results = fetch_market_sentiment(tickers=ticker, 
                                        topics="technology,earnings,financial_markets,economy_macro,finance,economy_fiscal,economy_monetary,ipo,retail_wholesale",
                                        limit=1000)
    print(sentiment_results)


{'items': '0', 'sentiment_score_definition': 'x <= -0.35: Bearish; -0.35 < x <= -0.15: Somewhat-Bearish; -0.15 < x < 0.15: Neutral; 0.15 <= x < 0.35: Somewhat_Bullish; x >= 0.35: Bullish', 'relevance_score_definition': '0 < x <= 1, with a higher score indicating higher relevance.', 'feed': []}
{'items': '0', 'sentiment_score_definition': 'x <= -0.35: Bearish; -0.35 < x <= -0.15: Somewhat-Bearish; -0.15 < x < 0.15: Neutral; 0.15 <= x < 0.35: Somewhat_Bullish; x >= 0.35: Bullish', 'relevance_score_definition': '0 < x <= 1, with a higher score indicating higher relevance.', 'feed': []}
{'items': '0', 'sentiment_score_definition': 'x <= -0.35: Bearish; -0.35 < x <= -0.15: Somewhat-Bearish; -0.15 < x < 0.15: Neutral; 0.15 <= x < 0.35: Somewhat_Bullish; x >= 0.35: Bullish', 'relevance_score_definition': '0 < x <= 1, with a higher score indicating higher relevance.', 'feed': []}
{'items': '0', 'sentiment_score_definition': 'x <= -0.35: Bearish; -0.35 < x <= -0.15: Somewhat-Bearish; -0.15 < x 

In [10]:
import requests
import datetime
from datetime import datetime, timedelta
import time
import csv
import json

def fetch_federal_funds_rate(api_key, interval='daily', years=5, output_dir=FEDERAL_FUNDS_CACHE):
    """Fetch Federal Funds Rate data from Alpha Vantage.
    
    Args:
        api_key (str): Alpha Vantage API key
        interval (str, optional): Data interval - 'daily', 'weekly' or 'monthly'. Defaults to 'monthly'.
        years (int, optional): Number of years of historical data to fetch. Defaults to 5.
        
    Returns:
        dict: Federal Funds Rate data
    """
    base_url = "https://www.alphavantage.co/query"
    
    # Calculate date N years ago
    n_years_ago = datetime.now() - timedelta(days=365*years)
    time_from = n_years_ago.strftime("%Y-%m-%d")
    
    params = {
        "function": "FEDERAL_FUNDS_RATE",
        "apikey": api_key,
        "interval": interval,
        "outputsize": "full"
    }
    
    try:
        # Add rate limiting delay to stay under 75 requests/min
        time.sleep(0.8)  # 800ms delay between requests
        
        response = requests.get(base_url, params=params)
        response.raise_for_status()
        data = response.json()
        
        # Filter data for last N years
        if data and 'data' in data:
            filtered_data = [d for d in data['data'] if d['date'] >= time_from]
            data['data'] = filtered_data
            
        # Save to CSV
        data_file = output_dir / f"federal_funds_{interval}.csv"
        with open(data_file, "w", newline='') as f:
            if data and 'data' in data:
                writer = csv.DictWriter(f, fieldnames=['date', 'value'])
                writer.writeheader()
                for row in data['data']:
                    writer.writerow(row)
            
        return data 
    except requests.exceptions.RequestException as e:
        print(f"Error fetching federal funds rate data: {e}")
        return None

# Example usage
fed_funds_rate = fetch_federal_funds_rate(ALPHA_VANTAGE_API_KEY, interval='daily', years=5)
if fed_funds_rate:
    print(json.dumps(fed_funds_rate, indent=2))


{
  "name": "Effective Federal Funds Rate",
  "interval": "daily",
  "unit": "percent",
  "data": [
    {
      "date": "2025-02-13",
      "value": "4.33"
    },
    {
      "date": "2025-02-12",
      "value": "4.33"
    },
    {
      "date": "2025-02-11",
      "value": "4.33"
    },
    {
      "date": "2025-02-10",
      "value": "4.33"
    },
    {
      "date": "2025-02-09",
      "value": "4.33"
    },
    {
      "date": "2025-02-08",
      "value": "4.33"
    },
    {
      "date": "2025-02-07",
      "value": "4.33"
    },
    {
      "date": "2025-02-06",
      "value": "4.33"
    },
    {
      "date": "2025-02-05",
      "value": "4.33"
    },
    {
      "date": "2025-02-04",
      "value": "4.33"
    },
    {
      "date": "2025-02-03",
      "value": "4.33"
    },
    {
      "date": "2025-02-02",
      "value": "4.33"
    },
    {
      "date": "2025-02-01",
      "value": "4.33"
    },
    {
      "date": "2025-01-31",
      "value": "4.33"
    },
    {
      "date"