In [None]:
import os
import requests
import pandas as pd
from dotenv import load_dotenv

# Load API key from .env file
load_dotenv()
API_KEY = os.getenv("ALPHA_VANTAGE_API_KEY")

# File path for tickers
TICKERS_FILE = r"C:\Users\Jrans\Desktop\Model Comparison Project\Data\unique_tickers.csv"

# Output file path for combined CSV
OUTPUT_FILE = r"C:\Users\Jrans\Desktop\Model Comparison Project\Data\all_tickers_monthly_data.csv"

# Read tickers from CSV, ensuring to exclude the header and correctly parse tickers
tickers = pd.read_csv(TICKERS_FILE, usecols=["Ticker"], dtype=str).dropna().squeeze().unique()

# API URL
API_URL = "https://www.alphavantage.co/query"

# Date range filter
START_DATE = "1995-01-01"
END_DATE = "2025-01-01"

def fetch_monthly_data(ticker):
    """Fetch monthly adjusted price data from Alpha Vantage API."""
    params = {
        "function": "TIME_SERIES_MONTHLY_ADJUSTED",
        "symbol": ticker,
        "apikey": API_KEY,
        "datatype": "json"
    }
    
    response = requests.get(API_URL, params=params)
    
    if response.status_code != 200:
        print(f"Failed to fetch data for {ticker}")
        return None
    
    data = response.json()
    
    if "Monthly Adjusted Time Series" not in data:
        print(f"No data found for {ticker}")
        return None
    
    # Convert JSON to DataFrame
    df = pd.DataFrame.from_dict(data["Monthly Adjusted Time Series"], orient="index")
    
    # Rename columns for clarity
    df.columns = [
        "open", "high", "low", "close", "adjusted_close", "volume", "dividend"
    ]
    
    # Convert index to datetime
    df.index = pd.to_datetime(df.index)
    
    # Convert columns to numeric
    df = df.apply(pd.to_numeric, errors="coerce")
    
    # Filter data within date range
    df = df[(df.index >= START_DATE) & (df.index <= END_DATE)]
    
    # Reset index to make "date" a column
    df.reset_index(inplace=True)
    df.rename(columns={"index": "date"}, inplace=True)
    
    # Add ticker column
    df["ticker"] = ticker
    
    return df

# Initialize an empty list to store data
all_data = []

# Fetch and append data for each ticker
for ticker in tickers:
    print(f"Fetching data for {ticker}...")
    df = fetch_monthly_data(ticker)
    
    if df is not None:
        all_data.append(df)

# Combine all data into a single DataFrame
if all_data:
    combined_df = pd.concat(all_data, ignore_index=True)
    
    # Save to a single CSV file
    combined_df.to_csv(OUTPUT_FILE, index=False)
    print(f"All tickers' data saved to {OUTPUT_FILE}")
else:
    print("No data was retrieved.")

print("Data collection complete.")


Fetching data for A...
Fetching data for AA...
Fetching data for AAL...
Fetching data for AAMI...
Fetching data for AAOI...
Fetching data for AAON...
Fetching data for AAP...
Fetching data for AAPL...
Fetching data for AAT...
Fetching data for AAWW...
Fetching data for ABAX...
Fetching data for ABBV...
Fetching data for ABCB...
Fetching data for ABG...
Fetching data for ABM...
Fetching data for ABMD...
Fetching data for ABNB...
Fetching data for ABR...
Fetching data for ABT...
Fetching data for ABX...
No data found for ABX
Fetching data for ACA...
Fetching data for ACAT...
Fetching data for ACGL...
Fetching data for ACHC...
Fetching data for ACIC...
Fetching data for ACIW...
Fetching data for ACLS...
Fetching data for ACM...
Fetching data for ACN...
Fetching data for ADBE...
Fetching data for ADC...
Fetching data for ADEA...
Fetching data for ADI...
Fetching data for ADM...
Fetching data for ADMA...
Fetching data for ADNT...
Fetching data for ADP...
Fetching data for ADSK...
Fetching d