<a href="https://colab.research.google.com/github/HAP2Y/Astro-Finance/blob/main/AstroFinanceProject.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Step 1: Install all required libraries
!pip install yfinance tabulate

# Step 2: Import all necessary libraries
import yfinance as yf
import pandas as pd
from datetime import datetime
from tabulate import tabulate
import time # Library for adding the "polite wait"
import os

# --- Configuration ---
# REVISED & EXPANDED dictionary focusing on Indian and US markets.
TICKER_INFO = {
    # --- Indian Markets (Indices) ---
    '^NSEI':                {'currency': 'INR', 'volume_unit': 'shares'}, # NIFTY 50
    '^NSEBANK':             {'currency': 'INR', 'volume_unit': 'shares'}, # NIFTY BANK
    'NIFTY_FIN_SERVICE.NS': {'currency': 'INR', 'volume_unit': 'shares'}, # NIFTY FINANCIAL SERVICES
    '^CNXIT':               {'currency': 'INR', 'volume_unit': 'shares'}, # NIFTY IT
    '^CNXPHARMA':           {'currency': 'INR', 'volume_unit': 'shares'}, # NIFTY PHARMA
    '^CNXAUTO':             {'currency': 'INR', 'volume_unit': 'shares'}, # NIFTY AUTO
    '^CNXMETAL':            {'currency': 'INR', 'volume_unit': 'shares'}, # NIFTY METAL
    '^CNXFMCG':             {'currency': 'INR', 'volume_unit': 'shares'}, # NIFTY FMCG
    '^INDIAVIX':            {'currency': 'INR', 'volume_unit': 'points'}, # India VIX

    # --- Indian Markets (Key Stocks) ---
    'RELIANCE.NS':          {'currency': 'INR', 'volume_unit': 'shares'}, # Reliance Industries
    'TCS.NS':               {'currency': 'INR', 'volume_unit': 'shares'}, # Tata Consultancy Services
    'HDFCBANK.NS':          {'currency': 'INR', 'volume_unit': 'shares'}, # HDFC Bank

    # --- US Markets (Indices) ---
    '^GSPC':        {'currency': 'USD', 'volume_unit': 'points'}, # S&P 500
    '^DJI':         {'currency': 'USD', 'volume_unit': 'points'}, # Dow Jones Industrial Average
    '^NDX':         {'currency': 'USD', 'volume_unit': 'points'}, # NASDAQ 100
    '^RUT':         {'currency': 'USD', 'volume_unit': 'points'}, # Russell 2000 (Small-Cap)
    '^VIX':         {'currency': 'USD', 'volume_unit': 'points'}, # CBOE Volatility Index
    '^TNX':         {'currency': 'USD', 'volume_unit': 'points'}, # US 10-Yr Treasury Yield

    # --- US Markets (Key Stocks) ---
    'AAPL':         {'currency': 'USD', 'volume_unit': 'shares'}, # Apple Inc.
    'MSFT':         {'currency': 'USD', 'volume_unit': 'shares'}, # Microsoft Corp.
    'NVDA':         {'currency': 'USD', 'volume_unit': 'shares'}, # NVIDIA Corp.

    # --- Global Markets (Indices) ---
    '^N225':        {'currency': 'JPY', 'volume_unit': 'points'}, # Nikkei 225 (Japan)
    '^FTSE':        {'currency': 'GBP', 'volume_unit': 'points'}, # FTSE 100 (UK)
    '^GDAXI':       {'currency': 'EUR', 'volume_unit': 'points'}, # DAX (Germany)
    '000001.SS':    {'currency': 'CNY', 'volume_unit': 'shares'}, # SSE Composite (Shanghai)
    '^HSI':         {'currency': 'HKD', 'volume_unit': 'points'}, # Hang Seng (Hong Kong)

    # --- Commodities ---
    'GC=F':         {'currency': 'USD', 'volume_unit': 'contracts'}, # Gold
    'CL=F':         {'currency': 'USD', 'volume_unit': 'contracts'}, # Crude Oil
    'SI=F':         {'currency': 'USD', 'volume_unit': 'contracts'}, # Silver

    # --- Currencies & DXY ---
    'DX-Y.NYB':     {'currency': 'USD', 'volume_unit': 'points'},    # US Dollar Index
    'USDINR=X':     {'currency': 'INR', 'volume_unit': 'rate'},
    'EURUSD=X':     {'currency': 'USD', 'volume_unit': 'rate'},
}

TICKERS = list(TICKER_INFO.keys())
START_DATE = '2000-01-01'
END_DATE = datetime.now().strftime('%Y-%m-%d') # Automatically set end date to today
OUTPUT_FILENAME_PREFIX = 'financial_data'
WAIT_TIME_SECONDS = 2 # The "polite wait" time between API calls

# List to store summary results for final verification
results_summary = []

# --- Main Script: Acquire, Save, and Verify ---

print(f"🚀 Starting long-term data acquisition from {START_DATE} to {END_DATE}...")
print(f"Total tickers to process: {len(TICKERS)}")

for i, ticker in enumerate(TICKERS):
    print("\n" + "="*60)
    print(f"Processing Ticker {i+1}/{len(TICKERS)}: {ticker}")
    print("="*60)

    status = "Failed"
    message = ""

    try:
        # Define a clean filename
        safe_ticker_name = ticker.replace('^', '').replace('=X', '').replace('=F', '').replace('-','_')
        filename = f"{OUTPUT_FILENAME_PREFIX}_{safe_ticker_name}.parquet"

        # --- Part 1: Data Acquisition ---
        print(f"Fetching data...")
        data = yf.download(ticker, start=START_DATE, end=END_DATE, progress=False)

        if data.empty:
            message = "No data returned for this ticker in the specified date range."
            print(f"⚠️ {message} Skipping.")
            results_summary.append({'Ticker': ticker, 'Status': 'Skipped', 'Details': message})
            continue

        # --- Part 2: Processing and Saving ---
        data['currency'] = TICKER_INFO[ticker]['currency']
        data['volume_unit'] = TICKER_INFO[ticker]['volume_unit']

        data.reset_index(inplace=True)
        data.rename(columns={'Date': 'date', 'Open': 'open', 'High': 'high', 'Low': 'low', 'Close': 'close', 'Adj Close': 'adj_close', 'Volume': 'volume'}, inplace=True)
        data['date'] = pd.to_datetime(data['date']).dt.date

        column_order = ['date', 'open', 'high', 'low', 'close', 'volume', 'currency', 'volume_unit', 'adj_close']
        final_columns = [col for col in column_order if col in data.columns]
        data = data[final_columns]

        data.to_parquet(filename, index=False)
        print(f"✅ Successfully saved data to '{filename}'")

        # --- Part 3: Verification & Reporting ---
        first_date = data['date'].min()
        last_date = data['date'].max()
        row_count = len(data)

        print(f"📈 Data available from {first_date} to {last_date} ({row_count} rows).")

        # Display a sample of the data (first 5 rows)
        print(f"\n--- Data Sample ---")
        print(tabulate(data.head(), headers='keys', tablefmt='psql', showindex=False))

        status = "Success"
        message = f"{row_count} rows | {first_date} to {last_date}"
        results_summary.append({'Ticker': ticker, 'Status': status, 'Details': message})

    except Exception as e:
        message = f"An error occurred: {e}"
        print(f"❌ {message}")
        results_summary.append({'Ticker': ticker, 'Status': status, 'Details': message})

    finally:
        # --- Part 4: The "Polite Wait" ---
        print(f"\nWaiting for {WAIT_TIME_SECONDS} seconds...")
        time.sleep(WAIT_TIME_SECONDS)

# --- Part 5: Final Summary ---
print("\n" + "="*60)
print("📊 Final Verification Summary")
print("="*60)

summary_df = pd.DataFrame(results_summary)
print(tabulate(summary_df, headers='keys', tablefmt='psql', showindex=False))

print("\n🎉 All tickers processed. Long-term data acquisition complete.")

In [None]:
# Step 1: Install all required libraries
!pip install yfinance tabulate pyswisseph

# Step 2: Import all necessary libraries
import yfinance as yf
import pandas as pd
from datetime import datetime
from tabulate import tabulate
import time
import os
import swisseph as swe
from google.colab import drive

# --- Part 1: Setup Google Drive and Folder Structure ---

print("📂 Setting up Google Drive and project folders...")
try:
    drive.mount('/content/drive')

    # Define the base path for your project in Google Drive
    BASE_PATH = '/content/drive/MyDrive/AstroFinanceProject'
    FINANCIAL_DATA_PATH = os.path.join(BASE_PATH, 'financial_data')
    ASTRO_DATA_PATH = os.path.join(BASE_PATH, 'astro_data')

    # Create the directories if they don't exist
    os.makedirs(FINANCIAL_DATA_PATH, exist_ok=True)
    os.makedirs(ASTRO_DATA_PATH, exist_ok=True)

    print(f"✅ Project folders are ready in: {BASE_PATH}")

except Exception as e:
    print(f"❌ Could not mount Google Drive. Error: {e}")
    # Exit if drive mounting fails, as we can't save files.
    exit()

# --- Part 2: Fetch and Save ALL Financial Data ---

# Configuration for all tickers
TICKER_INFO = {
    # Indian Markets (Indices)
    '^NSEI':                {'currency': 'INR', 'volume_unit': 'shares'}, # NIFTY 50
    '^NSEBANK':             {'currency': 'INR', 'volume_unit': 'shares'}, # NIFTY BANK
    'NIFTY_FIN_SERVICE.NS': {'currency': 'INR', 'volume_unit': 'shares'}, # NIFTY FINANCIAL SERVICES
    '^CNXIT':               {'currency': 'INR', 'volume_unit': 'shares'}, # NIFTY IT
    '^CNXPHARMA':           {'currency': 'INR', 'volume_unit': 'shares'}, # NIFTY PHARMA
    '^CNXAUTO':             {'currency': 'INR', 'volume_unit': 'shares'}, # NIFTY AUTO
    '^CNXMETAL':            {'currency': 'INR', 'volume_unit': 'shares'}, # NIFTY METAL
    '^CNXFMCG':             {'currency': 'INR', 'volume_unit': 'shares'}, # NIFTY FMCG
    '^INDIAVIX':            {'currency': 'INR', 'volume_unit': 'points'}, # India VIX

    # Indian Markets (Key Stocks)
    'RELIANCE.NS':          {'currency': 'INR', 'volume_unit': 'shares'}, # Reliance Industries
    'TCS.NS':               {'currency': 'INR', 'volume_unit': 'shares'}, # Tata Consultancy Services
    'HDFCBANK.NS':          {'currency': 'INR', 'volume_unit': 'shares'}, # HDFC Bank

    # US Markets (Indices)
    '^GSPC':        {'currency': 'USD', 'volume_unit': 'points'}, # S&P 500
    '^DJI':         {'currency': 'USD', 'volume_unit': 'points'}, # Dow Jones Industrial Average
    '^NDX':         {'currency': 'USD', 'volume_unit': 'points'}, # NASDAQ 100
    '^RUT':         {'currency': 'USD', 'volume_unit': 'points'}, # Russell 2000 (Small-Cap)
    '^VIX':         {'currency': 'USD', 'volume_unit': 'points'}, # CBOE Volatility Index
    '^TNX':         {'currency': 'USD', 'volume_unit': 'points'}, # US 10-Yr Treasury Yield

    # US Markets (Key Stocks)
    'AAPL':         {'currency': 'USD', 'volume_unit': 'shares'}, # Apple Inc.
    'MSFT':         {'currency': 'USD', 'volume_unit': 'shares'}, # Microsoft Corp.
    'NVDA':         {'currency': 'USD', 'volume_unit': 'shares'}, # NVIDIA Corp.

    # Global Markets (Indices)
    '^N225':        {'currency': 'JPY', 'volume_unit': 'points'}, # Nikkei 225 (Japan)
    '^FTSE':        {'currency': 'GBP', 'volume_unit': 'points'}, # FTSE 100 (UK)
    '^GDAXI':       {'currency': 'EUR', 'volume_unit': 'points'}, # DAX (Germany)
    '000001.SS':    {'currency': 'CNY', 'volume_unit': 'shares'}, # SSE Composite (Shanghai)
    '^HSI':         {'currency': 'HKD', 'volume_unit': 'points'}, # Hang Seng (Hong Kong)

    # Commodities
    'GC=F':         {'currency': 'USD', 'volume_unit': 'contracts'}, # Gold
    'CL=F':         {'currency': 'USD', 'volume_unit': 'contracts'}, # Crude Oil
    'SI=F':         {'currency': 'USD', 'volume_unit': 'contracts'}, # Silver

    # Currencies & DXY
    'DX-Y.NYB':     {'currency': 'USD', 'volume_unit': 'points'},    # US Dollar Index
    'USDINR=X':     {'currency': 'INR', 'volume_unit': 'rate'},
    'EURUSD=X':     {'currency': 'USD', 'volume_unit': 'rate'},
}

TICKERS = list(TICKER_INFO.keys())
START_DATE = '2000-01-01'
END_DATE = datetime.now().strftime('%Y-%m-%d')
WAIT_TIME_SECONDS = 1
financial_results_summary = []

print("\n" + "="*60)
print(f"📈 Starting financial data acquisition for {len(TICKERS)} tickers...")
print("="*60)

for i, ticker in enumerate(TICKERS):
    print(f"\n--- Processing Ticker {i+1}/{len(TICKERS)}: {ticker} ---")
    try:
        safe_ticker_name = ticker.replace('^', '').replace('=X', '').replace('=F', '').replace('-','_').replace('.','_')
        filename = os.path.join(FINANCIAL_DATA_PATH, f"financial_data_{safe_ticker_name}.parquet")

        data = yf.download(ticker, start=START_DATE, end=END_DATE, progress=False)

        if data.empty:
            message = "No data returned in date range."
            print(f"⚠️ {message} Skipping.")
            financial_results_summary.append({'Ticker': ticker, 'Status': 'Skipped', 'Details': message})
            continue

        data['currency'] = TICKER_INFO[ticker]['currency']
        data['volume_unit'] = TICKER_INFO[ticker]['volume_unit']
        data.reset_index(inplace=True)
        data.rename(columns={'Date': 'date', 'Open': 'open', 'High': 'high', 'Low': 'low', 'Close': 'close', 'Adj Close': 'adj_close', 'Volume': 'volume'}, inplace=True)
        data['date'] = pd.to_datetime(data['date']).dt.date
        column_order = ['date', 'open', 'high', 'low', 'close', 'volume', 'currency', 'volume_unit', 'adj_close']
        final_columns = [col for col in column_order if col in data.columns]
        data = data[final_columns]
        data.to_parquet(filename, index=False)

        first_date, last_date, row_count = data['date'].min(), data['date'].max(), len(data)
        message = f"{row_count} rows | {first_date} to {last_date}"
        print(f"✅ Saved to Drive. {message}")
        financial_results_summary.append({'Ticker': ticker, 'Status': 'Success', 'Details': message})

    except Exception as e:
        message = f"Error: {e}"
        print(f"❌ {message}")
        financial_results_summary.append({'Ticker': ticker, 'Status': 'Failed', 'Details': message})

    finally:
        time.sleep(WAIT_TIME_SECONDS)

print("\n" + "="*60)
print("📊 Financial Data Verification Summary")
print("="*60)
summary_df = pd.DataFrame(financial_results_summary)
print(tabulate(summary_df, headers='keys', tablefmt='psql', showindex=False))

print("\n🎉🎉🎉 Phase 1 - Financial Data Generation Complete! 🎉🎉🎉")

📂 Setting up Google Drive and project folders...
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
✅ Project folders are ready in: /content/drive/MyDrive/AstroFinanceProject

📈 Starting financial data acquisition for 32 tickers...

--- Processing Ticker 1/32: ^NSEI ---
✅ Saved to Drive. 4442 rows | 2007-09-17 to 2025-10-27


  data = yf.download(ticker, start=START_DATE, end=END_DATE, progress=False)



--- Processing Ticker 2/32: ^NSEBANK ---
✅ Saved to Drive. 4167 rows | 2007-09-17 to 2025-10-27


  data = yf.download(ticker, start=START_DATE, end=END_DATE, progress=False)



--- Processing Ticker 3/32: NIFTY_FIN_SERVICE.NS ---
✅ Saved to Drive. 3465 rows | 2011-09-07 to 2025-10-27


  data = yf.download(ticker, start=START_DATE, end=END_DATE, progress=False)



--- Processing Ticker 4/32: ^CNXIT ---
✅ Saved to Drive. 4152 rows | 2007-09-17 to 2025-10-27


  data = yf.download(ticker, start=START_DATE, end=END_DATE, progress=False)



--- Processing Ticker 5/32: ^CNXPHARMA ---
✅ Saved to Drive. 3630 rows | 2011-01-31 to 2025-10-27


  data = yf.download(ticker, start=START_DATE, end=END_DATE, progress=False)



--- Processing Ticker 6/32: ^CNXAUTO ---
✅ Saved to Drive. 3504 rows | 2011-07-12 to 2025-10-27


  data = yf.download(ticker, start=START_DATE, end=END_DATE, progress=False)



--- Processing Ticker 7/32: ^CNXMETAL ---
✅ Saved to Drive. 3504 rows | 2011-07-12 to 2025-10-27


  data = yf.download(ticker, start=START_DATE, end=END_DATE, progress=False)



--- Processing Ticker 8/32: ^CNXFMCG ---
✅ Saved to Drive. 3615 rows | 2011-01-31 to 2025-10-27


  data = yf.download(ticker, start=START_DATE, end=END_DATE, progress=False)



--- Processing Ticker 9/32: ^INDIAVIX ---
✅ Saved to Drive. 4324 rows | 2008-03-03 to 2025-10-27


  data = yf.download(ticker, start=START_DATE, end=END_DATE, progress=False)



--- Processing Ticker 10/32: RELIANCE.NS ---
✅ Saved to Drive. 6440 rows | 2000-01-03 to 2025-10-27


  data = yf.download(ticker, start=START_DATE, end=END_DATE, progress=False)



--- Processing Ticker 11/32: TCS.NS ---
✅ Saved to Drive. 5761 rows | 2002-08-12 to 2025-10-27


  data = yf.download(ticker, start=START_DATE, end=END_DATE, progress=False)



--- Processing Ticker 12/32: HDFCBANK.NS ---
✅ Saved to Drive. 6443 rows | 2000-01-03 to 2025-10-27


  data = yf.download(ticker, start=START_DATE, end=END_DATE, progress=False)



--- Processing Ticker 13/32: ^GSPC ---
✅ Saved to Drive. 6494 rows | 2000-01-03 to 2025-10-27


  data = yf.download(ticker, start=START_DATE, end=END_DATE, progress=False)



--- Processing Ticker 14/32: ^DJI ---
✅ Saved to Drive. 6494 rows | 2000-01-03 to 2025-10-27


  data = yf.download(ticker, start=START_DATE, end=END_DATE, progress=False)



--- Processing Ticker 15/32: ^NDX ---


  data = yf.download(ticker, start=START_DATE, end=END_DATE, progress=False)


✅ Saved to Drive. 6494 rows | 2000-01-03 to 2025-10-27

--- Processing Ticker 16/32: ^RUT ---
✅ Saved to Drive. 6494 rows | 2000-01-03 to 2025-10-27


  data = yf.download(ticker, start=START_DATE, end=END_DATE, progress=False)



--- Processing Ticker 17/32: ^VIX ---
✅ Saved to Drive. 6494 rows | 2000-01-03 to 2025-10-27


  data = yf.download(ticker, start=START_DATE, end=END_DATE, progress=False)



--- Processing Ticker 18/32: ^TNX ---
✅ Saved to Drive. 6488 rows | 2000-01-03 to 2025-10-27


  data = yf.download(ticker, start=START_DATE, end=END_DATE, progress=False)



--- Processing Ticker 19/32: AAPL ---
✅ Saved to Drive. 6494 rows | 2000-01-03 to 2025-10-27


  data = yf.download(ticker, start=START_DATE, end=END_DATE, progress=False)



--- Processing Ticker 20/32: MSFT ---
✅ Saved to Drive. 6494 rows | 2000-01-03 to 2025-10-27


  data = yf.download(ticker, start=START_DATE, end=END_DATE, progress=False)



--- Processing Ticker 21/32: NVDA ---
✅ Saved to Drive. 6494 rows | 2000-01-03 to 2025-10-27


  data = yf.download(ticker, start=START_DATE, end=END_DATE, progress=False)



--- Processing Ticker 22/32: ^N225 ---
✅ Saved to Drive. 6325 rows | 2000-01-04 to 2025-10-27


  data = yf.download(ticker, start=START_DATE, end=END_DATE, progress=False)



--- Processing Ticker 23/32: ^FTSE ---
✅ Saved to Drive. 6522 rows | 2000-01-04 to 2025-10-27


  data = yf.download(ticker, start=START_DATE, end=END_DATE, progress=False)



--- Processing Ticker 24/32: ^GDAXI ---
✅ Saved to Drive. 6558 rows | 2000-01-03 to 2025-10-27


  data = yf.download(ticker, start=START_DATE, end=END_DATE, progress=False)



--- Processing Ticker 25/32: 000001.SS ---
✅ Saved to Drive. 6248 rows | 2000-01-04 to 2025-10-27


  data = yf.download(ticker, start=START_DATE, end=END_DATE, progress=False)



--- Processing Ticker 26/32: ^HSI ---
✅ Saved to Drive. 6362 rows | 2000-01-03 to 2025-10-27


  data = yf.download(ticker, start=START_DATE, end=END_DATE, progress=False)



--- Processing Ticker 27/32: GC=F ---
✅ Saved to Drive. 6313 rows | 2000-08-30 to 2025-10-27


  data = yf.download(ticker, start=START_DATE, end=END_DATE, progress=False)



--- Processing Ticker 28/32: CL=F ---
✅ Saved to Drive. 6322 rows | 2000-08-23 to 2025-10-27


  data = yf.download(ticker, start=START_DATE, end=END_DATE, progress=False)



--- Processing Ticker 29/32: SI=F ---
✅ Saved to Drive. 6315 rows | 2000-08-30 to 2025-10-27


  data = yf.download(ticker, start=START_DATE, end=END_DATE, progress=False)



--- Processing Ticker 30/32: DX-Y.NYB ---
✅ Saved to Drive. 6523 rows | 2000-01-03 to 2025-10-27


  data = yf.download(ticker, start=START_DATE, end=END_DATE, progress=False)



--- Processing Ticker 31/32: USDINR=X ---
✅ Saved to Drive. 5681 rows | 2003-12-01 to 2025-10-27


  data = yf.download(ticker, start=START_DATE, end=END_DATE, progress=False)



--- Processing Ticker 32/32: EURUSD=X ---
✅ Saved to Drive. 5684 rows | 2003-12-01 to 2025-10-27


  data = yf.download(ticker, start=START_DATE, end=END_DATE, progress=False)



📊 Financial Data Verification Summary
+----------------------+----------+--------------------------------------+
| Ticker               | Status   | Details                              |
|----------------------+----------+--------------------------------------|
| ^NSEI                | Success  | 4442 rows | 2007-09-17 to 2025-10-27 |
| ^NSEBANK             | Success  | 4167 rows | 2007-09-17 to 2025-10-27 |
| NIFTY_FIN_SERVICE.NS | Success  | 3465 rows | 2011-09-07 to 2025-10-27 |
| ^CNXIT               | Success  | 4152 rows | 2007-09-17 to 2025-10-27 |
| ^CNXPHARMA           | Success  | 3630 rows | 2011-01-31 to 2025-10-27 |
| ^CNXAUTO             | Success  | 3504 rows | 2011-07-12 to 2025-10-27 |
| ^CNXMETAL            | Success  | 3504 rows | 2011-07-12 to 2025-10-27 |
| ^CNXFMCG             | Success  | 3615 rows | 2011-01-31 to 2025-10-27 |
| ^INDIAVIX            | Success  | 4324 rows | 2008-03-03 to 2025-10-27 |
| RELIANCE.NS          | Success  | 6440 rows | 2000-01-03 to

In [None]:
# ---
# This script is for Cell 2.
# It assumes Cell 1 has been run and Google Drive is mounted.
# It installs its own dependencies and generates the astrological data.
# ---

# Step 1: Install required libraries
# We do NOT need swisseph-data, as we will download the files manually
!pip install pandas tabulate pyswisseph

# Step 2: Import libraries
import pandas as pd
from datetime import datetime
from tabulate import tabulate
import os
import swisseph as swe
# We no longer import swisseph_data
from google.colab import drive

# Step 3: Define constants (in sync with Cell 1)
BASE_PATH = '/content/drive/MyDrive/AstroFinanceProject'
ASTRO_DATA_PATH = os.path.join(BASE_PATH, 'astro_data')
# This is our new, manually downloaded data folder
EPHE_DOWNLOAD_PATH = os.path.join(ASTRO_DATA_PATH, 'ephe')
START_DATE = '2000-01-01'
# This ensures the month is correctly formatted as a number (e.g., '10')
END_DATE = datetime.now().strftime('%Y-%m-%d')

# --- Part 3: Generate Vedic Astrological Ephemeris Data ---

print("\n" + "="*60)
print("🔭 Generating Vedic astrological ephemeris data (2000-Today)...")
print("="*60)

# --- NEW ROBUST APPROACH: Download and Unzip Master File ---

# Create the target directory if it doesn't exist
os.makedirs(EPHE_DOWNLOAD_PATH, exist_ok=True)

# Check for a key planetary file. If it's missing, download and unzip the master file.
check_file = os.path.join(EPHE_DOWNLOAD_PATH, 'seplm00.se1')
if not os.path.exists(check_file):
    print(f"Essential ephemeris file ({os.path.basename(check_file)}) not found.")

    # Define the ZIP file URL and local save path
    EPHE_ZIP_URL = 'https://www.astro.ch/ftp/swisseph/ephe.zip'
    EPHE_ZIP_PATH = os.path.join(ASTRO_DATA_PATH, 'ephe.zip') # Save zip one level up

    print(f"Downloading 'ephe.zip' (~111 MB) from {EPHE_ZIP_URL}...")
    print("This may take a minute, but only needs to run once.")

    # Download the single zip file
    !wget -q -O {EPHE_ZIP_PATH} {EPHE_ZIP_URL}

    print("Download complete. Unzipping...")

    # Unzip the file into the target 'ephe' directory
    # -o: overwrite existing files without prompting
    # -d: destination directory
    # -qq: quiet mode (less verbose)
    !unzip -o -qq {EPHE_ZIP_PATH} -d {EPHE_DOWNLOAD_PATH}

    print("Unzip complete. Deleting zip file to save space...")
    !rm {EPHE_ZIP_PATH}

    print("✅ Ephemeris files are ready.")
else:
    print(f"Essential ephemeris files already exist in {EPHE_DOWNLOAD_PATH}.")


# Set the ephemeris data path to our new downloaded folder
try:
    swe.set_ephe_path(EPHE_DOWNLOAD_PATH)
    print(f"Ephemeris data path set to: {EPHE_DOWNLOAD_PATH}")
except Exception as e:
    print(f"❌ CRITICAL ERROR: Could not set ephemeris data path. {e}")
    exit()

# Set to Lahiri Ayanamsha for Vedic astrology
swe.set_sid_mode(swe.SIDM_LAHIRI)

PLANETS = {
    'SUN': swe.SUN,
    'MOON': swe.MOON,
    'MERCURY': swe.MERCURY,
    'VENUS': swe.VENUS,
    'MARS': swe.MARS,
    'JUPITER': swe.JUPITER,
    'SATURN': swe.SATURN,
    'RAHU': swe.MEAN_NODE # Rahu (Mean North Node)
}

date_range = pd.date_range(start=START_DATE, end=END_DATE, freq='D')
astro_data = []
error_printed = False

for date in date_range:
    julian_day = swe.utc_to_jd(date.year, date.month, date.day, 0, 0, 0, 1)[0]
    daily_positions = {'date': date.date()}

    for name, planet_id in PLANETS.items():
        pos = swe.calc_ut(julian_day, planet_id)

        # Check if calculation was successful
        if isinstance(pos[0], float):
            daily_positions[f'{name}_lon'] = pos[0]
            daily_positions[f'{name}_speed'] = pos[3]
        else:
            # Store nulls on failure
            daily_positions[f'{name}_lon'] = None
            daily_positions[f'{name}_speed'] = None
            if not error_printed:
                print(f"⚠️ Warning: Could not calculate a position on {date.date()}. Storing null. This message will not repeat.")
                error_printed = True

    astro_data.append(daily_positions)

astro_df = pd.DataFrame(astro_data)

# Verification check
if astro_df.empty:
    print("\n" + "!"*60)
    print("❌ ERROR: The astrological DataFrame is empty.")
    print("Please check your START_DATE and END_DATE variables.")
    print("!"*60)
else:
    # Ensure the save directory exists (it should from above, but good to check)
    os.makedirs(ASTRO_DATA_PATH, exist_ok=True)

    astro_filename = os.path.join(ASTRO_DATA_PATH, 'vedic_ephemeris_2000_today.parquet')
    astro_df.to_parquet(astro_filename, index=False)

    print(f"✅ Successfully saved astrological data to '{astro_filename}'")
    print(f"📈 Total days processed: {len(astro_df)}")

    print("\n--- Astrological Data Sample ---")
    if astro_df.head().empty:
         print("WARNING: DataFrame was saved, but the first 5 rows (head) are empty.")
    elif astro_df.iloc[0].isnull().all():
         print("WARNING: DataFrame was saved, but the first row is all null values. Calculations are likely failing.")
         print(tabulate(astro_df.head(), headers='keys', tablefmt='psql', showindex=False))
    else:
         print(tabulate(astro_df.head(), headers='keys', tablefmt='psql', showindex=False))

print("\n🎉🎉🎉 Phase 1 - Astrological Data Generation Complete! 🎉🎉🎉")


🔭 Generating Vedic astrological ephemeris data (2000-Today)...
Essential ephemeris file (seplm00.se1) not found.
Downloading 'ephe.zip' (~111 MB) from https://www.astro.ch/ftp/swisseph/ephe.zip...
This may take a minute, but only needs to run once.
Download complete. Unzipping...
[/content/drive/MyDrive/AstroFinanceProject/astro_data/ephe.zip]
  End-of-central-directory signature not found.  Either this file is not
  a zipfile, or it constitutes one disk of a multi-part archive.  In the
  latter case the central directory and zipfile comment will be found on
  the last disk(s) of this archive.
unzip:  cannot find zipfile directory in one of /content/drive/MyDrive/AstroFinanceProject/astro_data/ephe.zip or
        /content/drive/MyDrive/AstroFinanceProject/astro_data/ephe.zip.zip, and cannot find /content/drive/MyDrive/AstroFinanceProject/astro_data/ephe.zip.ZIP, period.
Unzip complete. Deleting zip file to save space...
✅ Ephemeris files are ready.
Ephemeris data path set to: /content