<a href="https://colab.research.google.com/github/HAP2Y/Astro-Finance/blob/main/Astro_Finance.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
# Colab cell 1: Install required packages
!pip install --upgrade pandas numpy pytz pyarrow yfinance pyswisseph nsepy scikit-learn xgboost lightgbm shap vectorbt --quiet

# If using Google Drive to persist data
from google.colab import drive
drive.mount('/content/drive')  # follow the interactive prompt

# Make project folders
import os
PROJECT_ROOT = "/content/drive/MyDrive/astro_finance"  # change if you want another path
os.makedirs(PROJECT_ROOT, exist_ok=True)
os.makedirs(os.path.join(PROJECT_ROOT, "raw"), exist_ok=True)
os.makedirs(os.path.join(PROJECT_ROOT, "processed"), exist_ok=True)
print("Project root:", PROJECT_ROOT)



Mounted at /content/drive
Project root: /content/drive/MyDrive/astro_finance


In [3]:
# Colab cell 2A: Fetch using yfinance (example: NIFTY 50 via ^NSEI or a sample ticker)
import yfinance as yf
import pandas as pd

# Example tickers you can change: '^NSEI' (Nifty 50), '^NSEBANK' (Banking), use specific stock tickers for sectors
tickers = {
    "NIFTY50": "^NSEI",
    "RELIANCE": "RELIANCE.NS",
    "TCS": "TCS.NS",
    # or use individual stocks: "RELIANCE.NS", "TCS.NS"
}

start="2000-01-01"
end="2025-10-26"  # keep end inclusive; change as needed

dfs = {}
for name, tk in tickers.items():
    print("Downloading", tk)
    df = yf.download(tk, start=start, end=end, progress=False)
    if df.empty:
        print(f"Warning: {tk} returned empty — you may need to pick a different symbol or data source.")
    else:
        df.index = pd.to_datetime(df.index).tz_localize(None)  # remove tz for alignment
        dfs[name] = df

# Example: save a combined file
for name, df in dfs.items():
    path = f"{PROJECT_ROOT}/raw/{name}.parquet"
    df.to_parquet(path)
    print("Saved", path)


Downloading ^NSEI


  df = yf.download(tk, start=start, end=end, progress=False)


Downloading RELIANCE.NS


  df = yf.download(tk, start=start, end=end, progress=False)


Downloading TCS.NS


  df = yf.download(tk, start=start, end=end, progress=False)


Saved /content/drive/MyDrive/astro_finance/raw/NIFTY50.parquet
Saved /content/drive/MyDrive/astro_finance/raw/RELIANCE.parquet
Saved /content/drive/MyDrive/astro_finance/raw/TCS.parquet


In [4]:
# Colab cell 3: planetary ephemeris generation (daily) using pyswisseph
import swisseph as swe
import numpy as np
import pandas as pd
from datetime import datetime, timedelta

# configure sidereal mode to Lahiri
swe.set_sid_mode(swe.SIDM_LAHIRI)

PLANETS = {
    'Sun': swe.SUN,
    'Moon': swe.MOON,
    'Mercury': swe.MERCURY,
    'Venus': swe.VENUS,
    'Mars': swe.MARS,
    'Jupiter': swe.JUPITER,
    'Saturn': swe.SATURN,
    'Rahu': swe.MEAN_NODE  # mean node (Rahu); Ketu = node + 180
}

def generate_ephemeris(start_date, end_date, timezone='UTC', step='1D'):
    dates = pd.date_range(start=start_date, end=end_date, freq=step)
    rows = []
    for dt in dates:
        # convert to Julian Day (UT)
        # swe.julday expects year, month, day, hour (UT decimal)
        jday = swe.julday(dt.year, dt.month, dt.day, dt.hour + dt.minute/60.0 + dt.second/3600.0)
        row = {'date': dt.date()}
        for name, pid in PLANETS.items():
            ecl = swe.calc_ut(jday, pid)[0]  # returns array: [longitude, latitude, distance, ...]
            lon = float(ecl[0]) % 360.0
            row[f"{name}_lon"] = lon
            # compute speed (approx using 1-day difference) for retrograde flag
        rows.append(row)
    df = pd.DataFrame(rows).set_index('date')

    # compute speeds with small forward diff to get retrograde (vectorized)
    # recompute with half-day offset for better speed estimate
    def lon_for_date(dt, pid):
        jday = swe.julday(dt.year, dt.month, dt.day, dt.hour + dt.minute/60.0)
        return float(swe.calc_ut(jday, pid)[0]) % 360.0

    # Add rashi, nakshatra, retrograde approx by day-to-day diff
    for name, pid in PLANETS.items():
        lon_col = f"{name}_lon"
        # rashi 1-12 (1 = Aries). rashi_index 0..11
        df[f"{name}_rashi"] = (df[lon_col] // 30).astype(int) + 1
        # nakshatra 1-27 (each 13°20' = 13.3333333 deg)
        df[f"{name}_nakshatra"] = (df[lon_col] // (360.0/27.0)).astype(int) + 1

    # speed approximation: difference between consecutive days (handle circular wrap)
    for name in PLANETS.keys():
        lon = df[f"{name}_lon"].values
        # minimal circular difference
        diff = np.angle(np.deg2rad(lon[1:]) - np.deg2rad(lon[:-1]))  # returns radians in -pi..pi; careful
        # Simpler robust circular diff (deg)
        degdiff = ( (lon[1:] - lon[:-1] + 180) % 360 ) - 180
        degdiff = np.insert(degdiff, 0, np.nan)
        df[f"{name}_speed_deg_per_day"] = degdiff
        df[f"{name}_is_retrograde"] = df[f"{name}_speed_deg_per_day"] < 0

    # example: compute conjunctions within x degrees for each day (pairwise)
    # compute absolute angle difference (0..180)
    planet_names = list(PLANETS.keys())
    for i in range(len(planet_names)):
        for j in range(i+1, len(planet_names)):
            a = planet_names[i]; b = planet_names[j]
            diff = np.abs(((df[f"{a}_lon"] - df[f"{b}_lon"] + 180) % 360) - 180)  # 0..180
            df[f"{a}_{b}_sep_deg"] = diff
            # boolean near conjunction if separation < 5 degrees (tunable)
            df[f"{a}_{b}_conj_5deg"] = diff <= 5.0

    return df

# Run for your analysis window
ephem = generate_ephemeris("2000-01-01", "2025-10-26")
ephem.to_parquet(f"{PROJECT_ROOT}/processed/ephemeris_daily_lahiri.parquet")
print("Saved ephemeris to processed/ephemeris_daily_lahiri.parquet")



Saved ephemeris to processed/ephemeris_daily_lahiri.parquet


In [6]:
# Colab cell 4: align market daily OHLC with ephemeris daily
import pandas as pd

# load market data saved earlier (example uses TCS/NIFTY files)
market_files = [f for f in os.listdir(os.path.join(PROJECT_ROOT, "raw")) if f.endswith(".parquet")]
aligned = {}
for mf in market_files:
    mdf = pd.read_parquet(os.path.join(PROJECT_ROOT, "raw", mf))
    # Drop the 'Ticker' level from the columns of mdf
    if isinstance(mdf.columns, pd.MultiIndex) and 'Ticker' in mdf.columns.names:
        mdf.columns = mdf.columns.droplevel('Ticker')
    # ensure date index and daily frequency (close-to-close)
    mdf.index = pd.to_datetime(mdf.index).date
    # align to ephemeris index (dates)
    ep = pd.read_parquet(f"{PROJECT_ROOT}/processed/ephemeris_daily_lahiri.parquet")
    ep.index = pd.to_datetime(ep.index).date
    combined = ep.join(mdf, how='left')  # left join ephemeris; you can change to inner
    # compute daily returns (close-to-close)
    if 'Close' in combined.columns or 'close' in combined.columns:
        close_col = 'Close' if 'Close' in combined.columns else 'close'
        combined['return_1d'] = combined[close_col].pct_change()
    # save per-market aligned dataset
    outname = mf.replace('.parquet', '_aligned.parquet')
    combined.to_parquet(os.path.join(PROJECT_ROOT, "processed", outname))
    print("Saved aligned:", outname)

  combined['return_1d'] = combined[close_col].pct_change()


Saved aligned: NIFTY50_aligned.parquet


  combined['return_1d'] = combined[close_col].pct_change()


Saved aligned: RELIANCE_aligned.parquet


  combined['return_1d'] = combined[close_col].pct_change()


Saved aligned: TCS_aligned.parquet


In [7]:
# Colab cell 5: quick checks
import pandas as pd
sample = pd.read_parquet(f"{PROJECT_ROOT}/processed/ephemeris_daily_lahiri.parquet").head(5)
print("Ephemeris sample:\n", sample[[c for c in sample.columns if '_lon' in c]].head())

aligned_files = [f for f in os.listdir(os.path.join(PROJECT_ROOT, "processed")) if "_aligned" in f]
for af in aligned_files:
    df = pd.read_parquet(os.path.join(PROJECT_ROOT, "processed", af))
    print(af, "rows:", len(df), "columns:", len(df.columns))
    print(df[['return_1d']].dropna().describe().to_string())
    break


Ephemeris sample:
                Sun_lon    Moon_lon  Mercury_lon   Venus_lon    Mars_lon  \
date                                                                      
2000-01-01  279.859216  217.293367   271.111805  240.961421  327.575480   
2000-01-02  280.878648  229.317135   272.668066  242.170461  328.351154   
2000-01-03  281.898169  241.221768   274.229691  243.380617  329.126848   
2000-01-04  282.917757  253.060244   275.796853  244.591842  329.902547   
2000-01-05  283.937386  264.877534   277.369741  245.804088  330.678230   

            Jupiter_lon  Saturn_lon    Rahu_lon  
date                                             
2000-01-01    25.233072   40.405840  125.067123  
2000-01-02    25.273833   40.385896  125.014171  
2000-01-03    25.317966   40.367786  124.961229  
2000-01-04    25.365454   40.351522  124.908294  
2000-01-05    25.416276   40.337111  124.855365  
NIFTY50_aligned.parquet rows: 9431 columns: 102
         return_1d
count  6614.000000
mean      0.000322
