In [14]:
import yfinance as yf
import pandas as pd
import numpy as np
import random
from dateutil.relativedelta import relativedelta
from concurrent.futures import ProcessPoolExecutor, as_completed
import multiprocessing
import os


In [16]:
import yfinance as yf
import pandas as pd
import numpy as np
import random
from dateutil.relativedelta import relativedelta
import multiprocessing
import os

# --- 1) Configuration and Data Preloading ---
PAIRS = [
    ("SPY", "SPXL"),
    ("SOXX", "SOXL"),
    ("IWM", "TNA"),
    ("QQQ", "TQQQ"),
    ("DIA", "UDOW"),
    ("XBI", "LABU"),
    ("XLF", "FAS")
]
N_SIMULATIONS = 35

# Global cache for Close prices per pair
FULL_CLOSES = {}

def preload_data():
    for under, trip in PAIRS:
        data = yf.download([under, trip], period="max", auto_adjust=False)
        closes = data.xs("Close", axis=1, level=1).dropna(how="all")
        FULL_CLOSES[(under, trip)] = closes


# --- 2) Summary class, unchanged logic but uses preloaded data ---
class Summary:
    def __init__(self, under, trip, closes, start, end):
        self.UNDERLYING = under
        self.TRIPLE = trip
        self.STARTING_VALUE_UNDER = 3000.0
        self.STARTING_VALUE_TRIP = -1000.0
        self.cash = 0.0

        # slice by date window
        df_window = closes.loc[start:end]
        self.dat_under = df_window[under]
        self.dat_trip = df_window[trip]
        dates = df_window.index.strftime('%Y-%m-%d')

        # prepare DataFrame
        self.dfr = pd.DataFrame(index=dates)
        self.dfr[f"{under}_Close"] = self.dat_under.values
        self.dfr[f"{trip}_Close"]  = self.dat_trip.values

        # initial shares
        self.shares_under = self.STARTING_VALUE_UNDER / self.dat_under.iloc[0]
        self.shares_trip  = self.STARTING_VALUE_TRIP / self.dat_trip.iloc[0]

        # init result columns
        cols = [
            f"{under}_per_change", f"{trip}_per_change", "ideal_per_change",
            f"{under}_cumulative", f"{trip}_cumulative", "ideal_cumulative",
            f"portfolio_{under}_long", f"portfolio_{trip}_short", "portfolio_total",
            "cash_used", "beta_exposure", "beta_exposure_per", "cumulative_P/L", "P/L",
            f"shares_{under}", f"shares_{trip}"
        ]
        for c in cols:
            self.dfr[c] = 0.0

    def summary_stats(self):
        u = self.dat_under.pct_change().fillna(0) * 100
        t = self.dat_trip.pct_change().fillna(0) * 100
        i = u * 3

        self.dfr[f"{self.UNDERLYING}_per_change"] = u.values
        self.dfr[f"{self.TRIPLE}_per_change"]   = t.values
        self.dfr["ideal_per_change"]           = i.values

        self.dfr[f"{self.UNDERLYING}_cumulative"] = ((1+u/100).cumprod()*100).values
        self.dfr[f"{self.TRIPLE}_cumulative"]   = ((1+t/100).cumprod()*100).values
        self.dfr["ideal_cumulative"]           = ((1+i/100).cumprod()*100).values

    def init_portfolio(self):
        pu0 = self.dfr[f"{self.UNDERLYING}_Close"].iloc[0]
        pt0 = self.dfr[f"{self.TRIPLE}_Close"].iloc[0]
        self.dfr[f"portfolio_{self.UNDERLYING}_long"].iloc[0] = self.STARTING_VALUE_UNDER
        self.dfr[f"portfolio_{self.TRIPLE}_short"].iloc[0]  = self.STARTING_VALUE_TRIP
        self.dfr["portfolio_total"].iloc[0] = self.STARTING_VALUE_UNDER + self.STARTING_VALUE_TRIP
        beta0 = self.shares_under*pu0 + self.shares_trip*pt0*3
        self.dfr["beta_exposure"].iloc[0]     = beta0
        self.dfr["beta_exposure_per"].iloc[0] = 0.0
        self.dfr["cash_used"].iloc[0]         = 0.0
        self.dfr["cumulative_P/L"].iloc[0]   = 0.0
        self.dfr["P/L"].iloc[0]              = 0.0
        self.dfr[f"shares_{self.UNDERLYING}"].iloc[0] = self.shares_under
        self.dfr[f"shares_{self.TRIPLE}"].iloc[0]      = self.shares_trip

    def update_portfolio(self, i):
        pu = self.dfr[f"{self.UNDERLYING}_Close"].iloc[i]
        pt = self.dfr[f"{self.TRIPLE}_Close"].iloc[i]
        long_val  = self.shares_under * pu
        short_val = self.shares_trip  * pt
        total     = long_val + short_val

        self.dfr[f"portfolio_{self.UNDERLYING}_long"].iloc[i] = long_val
        self.dfr[f"portfolio_{self.TRIPLE}_short"].iloc[i]  = short_val
        self.dfr["portfolio_total"].iloc[i]                = total

        beta = self.shares_under*pu + self.shares_trip*pt*3
        self.dfr["beta_exposure"].iloc[i]     = beta
        self.dfr["beta_exposure_per"].iloc[i] = beta/total if total else 0

    def beta_norm_strategy(self, exposure_indicator):
        self.init_portfolio()
        for i in range(1, len(self.dfr)):
            self.update_portfolio(i)
            beta     = self.dfr["beta_exposure"].iloc[i]
            long_val = self.dfr[f"portfolio_{self.UNDERLYING}_long"].iloc[i]

            if beta > exposure_indicator*long_val:
                amt = beta/3 / self.dfr[f"{self.TRIPLE}_Close"].iloc[i]
                self.shares_trip -= amt
                self.cash      -= amt * self.dfr[f"{self.TRIPLE}_Close"].iloc[i]
                self.update_portfolio(i)

            elif beta < -exposure_indicator*long_val:
                amt = -beta/3 / self.dfr[f"{self.TRIPLE}_Close"].iloc[i]
                self.shares_trip += amt
                self.cash       += amt * self.dfr[f"{self.TRIPLE}_Close"].iloc[i]
                self.update_portfolio(i)

            self.dfr["cash_used"].iloc[i] = self.cash
            delta_cash = self.cash - self.dfr["cash_used"].iloc[i-1]
            pl = (self.dfr["portfolio_total"].iloc[i] -
                  self.dfr["portfolio_total"].iloc[i-1] - delta_cash)
            self.dfr["P/L"].iloc[i]            = pl
            self.dfr["cumulative_P/L"].iloc[i] = self.dfr["cumulative_P/L"].iloc[i-1] + pl

        return self.dfr["P/L"].sum()


# --- 3) Simulation worker, using preloaded data ---
def simulate_one(args):
    under, trip = args
    closes = FULL_CLOSES[(under, trip)]
    first_valid = {col: closes[col].first_valid_index() for col in closes.columns}
    fd = max(first_valid.values())
    ld = closes.index.max()

    total_days = (ld - fd).days
    max_years  = total_days // 365
    if max_years < 6:
        return None

    years = random.randint(6, max_years)
    end1  = fd + relativedelta(years=years)
    offset_max = total_days - (end1 - fd).days
    offset     = random.randint(0, offset_max)
    sd = fd + pd.Timedelta(days=offset)
    ed = sd + relativedelta(years=years)

    summ = Summary(under, trip, closes, sd.strftime('%Y-%m-%d'), ed.strftime('%Y-%m-%d'))
    summ.summary_stats()
    pl = summ.beta_norm_strategy(0.0001)

    ideal    = summ.dfr['ideal_cumulative'].iloc[-1]
    trip_cum = summ.dfr[f"{trip}_cumulative"].iloc[-1]
    return {'underlying': under, 'triple': trip, 'years': years, 'gap': ideal - trip_cum, 'P/L': pl}


# --- 4) Main: preload data once, then parallel map ---
def main():
    multiprocessing.freeze_support()
    preload_data()

    tasks = [(u, t) for u, t in PAIRS for _ in range(N_SIMULATIONS)]
    with multiprocessing.Pool(processes=os.cpu_count() or 4) as pool:
        results = pool.map(simulate_one, tasks)

    filtered = [r for r in results if r]
    df = pd.DataFrame(filtered)
    print(f"Correlation gap vs P/L: {df['gap'].corr(df['P/L']):.3f}")
    print(f"Correlation years vs gap: {df['years'].corr(df['gap']):.3f}")


if __name__ == '__main__':
    main()

[*********************100%***********************]  2 of 2 completed


KeyError: 'Close'