In [5]:
import yfinance as yf
import pandas as pd
import numpy as np
import random
from dateutil.relativedelta import relativedelta
from concurrent.futures import ProcessPoolExecutor, as_completed
import os


In [11]:
class Summary:
    def __init__(self, underlying, triple, start, end):
        self.UNDERLYING = underlying
        self.TRIPLE = triple
        self.STARTING_VALUE_UNDER = 3000.0
        self.STARTING_VALUE_TRIP = -1000.0
        self.TICKERS = [underlying, triple]
        self.cash = 0.0

        # Download data once
        data = yf.download(self.TICKERS, start=start, end=end, auto_adjust=False)
        self.dat_under = data[underlying]['Close']
        self.dat_trip = data[triple]['Close']

        dates = [d.strftime('%Y-%m-%d') for d in self.dat_under.index]
        self.dfr = pd.DataFrame(index=dates)
        self.dfr[f"{underlying}_Close"] = self.dat_under.values
        self.dfr[f"{triple}_Close"] = self.dat_trip.values

        # Initial shares
        self.shares_under = self.STARTING_VALUE_UNDER / self.dat_under.iloc[0]
        self.shares_trip = self.STARTING_VALUE_TRIP / self.dat_trip.iloc[0]

        # Prepare columns
        for col in [f"{underlying}_per_change", f"{triple}_per_change", "ideal_per_change"]:
            self.dfr[col] = 0.0
        for col in [f"{underlying}_cumulative", f"{triple}_cumulative", "ideal_cumulative"]:
            self.dfr[col] = 0.0
        for col in [f"portfolio_{underlying}_long", f"portfolio_{triple}_short", "portfolio_total",
                    "cash_used", "beta_exposure", "beta_exposure_per", "cumulative_P/L", "P/L"]:
            self.dfr[col] = 0.0
        for col in [f"shares_{underlying}", f"shares_{triple}"]:
            self.dfr[col] = 0.0

    def summary_stats(self):
        under_pct = self.dat_under.pct_change().fillna(0) * 100
        trip_pct = self.dat_trip.pct_change().fillna(0) * 100
        ideal_pct = under_pct * 3

        self.dfr[f"{self.UNDERLYING}_per_change"] = under_pct.values
        self.dfr[f"{self.TRIPLE}_per_change"] = trip_pct.values
        self.dfr["ideal_per_change"] = ideal_pct.values

        self.dfr[f"{self.UNDERLYING}_cumulative"] = ((1 + under_pct / 100).cumprod() * 100).values
        self.dfr[f"{self.TRIPLE}_cumulative"] = ((1 + trip_pct / 100).cumprod() * 100).values
        self.dfr["ideal_cumulative"] = ((1 + ideal_pct / 100).cumprod() * 100).values

    def init_portfolio(self):
        first_under_price = self.dfr[f"{self.UNDERLYING}_Close"].iloc[0]
        first_trip_price = self.dfr[f"{self.TRIPLE}_Close"].iloc[0]
        self.dfr[f"portfolio_{self.UNDERLYING}_long"].iloc[0] = self.STARTING_VALUE_UNDER
        self.dfr[f"portfolio_{self.TRIPLE}_short"].iloc[0] = self.STARTING_VALUE_TRIP
        self.dfr["portfolio_total"].iloc[0] = self.STARTING_VALUE_UNDER + self.STARTING_VALUE_TRIP
        self.dfr["beta_exposure"].iloc[0] = self.shares_under * first_under_price + self.shares_trip * first_trip_price * 3
        self.dfr["beta_exposure_per"].iloc[0] = 0.0
        self.dfr["cash_used"].iloc[0] = 0.0
        self.dfr["cumulative_P/L"].iloc[0] = 0.0
        self.dfr["P/L"].iloc[0] = 0.0
        self.dfr[f"shares_{self.UNDERLYING}"].iloc[0] = self.shares_under
        self.dfr[f"shares_{self.TRIPLE}"].iloc[0] = self.shares_trip

    def update_portfolio(self, i):
        price_u = self.dfr[f"{self.UNDERLYING}_Close"].iloc[i]
        price_t = self.dfr[f"{self.TRIPLE}_Close"].iloc[i]

        self.dfr[f"portfolio_{self.UNDERLYING}_long"].iloc[i] = self.shares_under * price_u
        self.dfr[f"portfolio_{self.TRIPLE}_short"].iloc[i] = self.shares_trip * price_t
        self.dfr["portfolio_total"].iloc[i] = self.dfr[f"portfolio_{self.UNDERLYING}_long"].iloc[i] + self.dfr[f"portfolio_{self.TRIPLE}_short"].iloc[i]
        beta = self.shares_under * price_u + self.shares_trip * price_t * 3
        self.dfr["beta_exposure"].iloc[i] = beta
        self.dfr["beta_exposure_per"].iloc[i] = beta / self.dfr[f"portfolio_{self.UNDERLYING}_long"].iloc[i]

    def beta_norm_strategy(self, exposure_indicator):
        self.init_portfolio()
        for i in range(1, len(self.dfr)):
            self.update_portfolio(i)
            beta_val = self.dfr["beta_exposure"].iloc[i]
            long_val = self.dfr[f"portfolio_{self.UNDERLYING}_long"].iloc[i]

            if beta_val > exposure_indicator * long_val:
                amt = beta_val/3 / self.dfr[f"{self.TRIPLE}_Close"].iloc[i]
                self.shares_trip -= amt
                self.cash -= amt * self.dfr[f"{self.TRIPLE}_Close"].iloc[i]
                self.update_portfolio(i)

            elif beta_val < -exposure_indicator * long_val:
                amt = -beta_val/3 / self.dfr[f"{self.TRIPLE}_Close"].iloc[i]
                self.shares_trip += amt
                self.cash += amt * self.dfr[f"{self.TRIPLE}_Close"].iloc[i]
                self.update_portfolio(i)

            self.dfr["cash_used"].iloc[i] = self.cash
            cash_change = self.dfr["cash_used"].iloc[i] - self.dfr["cash_used"].iloc[i-1]
            pl = self.dfr["portfolio_total"].iloc[i] - self.dfr["portfolio_total"].iloc[i-1] - cash_change
            self.dfr["P/L"].iloc[i] = pl
            self.dfr["cumulative_P/L"].iloc[i] = self.dfr["cumulative_P/L"].iloc[i-1] + pl

        return self.dfr["P/L"].sum()


def simulate_one(under, trip):
    try:
        full = yf.download([under, trip], period="max", auto_adjust=False)
        closes = full.xs("Close", axis=1, level=1)
        first_valid = pd.Series({col: closes[col].first_valid_index() for col in closes.columns})
        first_date = first_valid.max()
        last_date = full.index.max()

        diff_days = (last_date - first_date).days
        max_years = diff_days // 365
        if max_years < 6:
            return None

        years = random.randint(6, max_years)
        candidate_end = first_date + relativedelta(years=years)
        delta_days = (candidate_end - first_date).days
        max_offset = diff_days - delta_days
        offset = random.randint(0, max_offset)

        start_date = first_date + pd.Timedelta(days=offset)
        end_date = start_date + relativedelta(years=years)

        summ = Summary(under, trip, start_date.strftime("%Y-%m-%d"), end_date.strftime("%Y-%m-%d"))
        summ.summary_stats()
        pl = summ.beta_norm_strategy(exposure_indicator=0.0001)

        final_ideal = summ.dfr["ideal_cumulative"].iloc[-1]
        final_trip = summ.dfr[f"{trip}_cumulative"].iloc[-1]
        gap = final_ideal - final_trip

        return {"underlying": under, "triple": trip, "years": years,
                "start": start_date, "gap": gap, "P/L": pl}
    except Exception:
        return None


def main():
    pairs = [("SPY","SPXL"),("SOXX","SOXL"),("IWM","TNA"),
             ("QQQ","TQQQ"),("DIA","UDOW"),("XBI","LABU"),("XLF","FAS")]
    n_simulations = 35
    tasks = [(u, t) for u, t in pairs for _ in range(n_simulations)]
    results = []

    max_workers = os.cpu_count() or 4
    ctx = multiprocessing.get_context('fork')
    with ProcessPoolExecutor(max_workers=max_workers, mp_context=ctx) as executor:
        futures = [executor.submit(simulate_one, u, t) for u, t in tasks]
        for f in as_completed(futures):
            res = f.result()
            if res:
                results.append(res)

    df_res = pd.DataFrame(results)
    corr_gap_pl = df_res["gap"].corr(df_res["P/L"])
    corr_years_gap = df_res["years"].corr(df_res["gap"])
    print(f"Correlation gap vs P/L: {corr_gap_pl:.3f}")
    print(f"Correlation years vs gap: {corr_years_gap:.3f}")

    df_res.plot.scatter(x="gap", y="P/L")
    df_res.plot.scatter(x="years", y="gap")

if __name__ == "__main__":
    main()


NameError: name 'multiprocessing' is not defined