In [14]:
import yfinance as yf
import pandas as pd
import numpy as np
import random
from dateutil.relativedelta import relativedelta
from concurrent.futures import ProcessPoolExecutor, as_completed
import multiprocessing
import os


In [None]:
class Summary:
    def __init__(self, underlying, triple, start, end):
        self.UNDERLYING = underlying
        self.TRIPLE = triple
        self.STARTING_VALUE_UNDER = 3000.0
        self.STARTING_VALUE_TRIP = -1000.0
        self.TICKERS = [underlying, triple]
        self.cash = 0.0

        # Download data
        data = yf.download(self.TICKERS, start=start, end=end, auto_adjust=False)
        self.dat_under = data[underlying]['Close']
        self.dat_trip = data[triple]['Close']

        dates = [d.strftime('%Y-%m-%d') for d in self.dat_under.index]
        self.dfr = pd.DataFrame(index=dates)
        self.dfr[f"{underlying}_Close"] = self.dat_under.values
        self.dfr[f"{triple}_Close"] = self.dat_trip.values

        # Initial positions
        self.shares_under = self.STARTING_VALUE_UNDER / self.dat_under.iloc[0]
        self.shares_trip = self.STARTING_VALUE_TRIP / self.dat_trip.iloc[0]

        # Columns initialization
        cols = [f"{underlying}_per_change", f"{triple}_per_change", "ideal_per_change",
                f"{underlying}_cumulative", f"{triple}_cumulative", "ideal_cumulative",
                f"portfolio_{underlying}_long", f"portfolio_{triple}_short", "portfolio_total",
                "cash_used", "beta_exposure", "beta_exposure_per", "cumulative_P/L", "P/L",
                f"shares_{underlying}", f"shares_{triple}"]
        for col in cols:
            self.dfr[col] = 0.0

    def summary_stats(self):
        u = self.dat_under.pct_change().fillna(0) * 100
        t = self.dat_trip.pct_change().fillna(0) * 100
        i = u * 3
        self.dfr[f"{self.UNDERLYING}_per_change"] = u.values
        self.dfr[f"{self.TRIPLE}_per_change"] = t.values
        self.dfr["ideal_per_change"] = i.values
        self.dfr[f"{self.UNDERLYING}_cumulative"] = ((1+u/100).cumprod()*100).values
        self.dfr[f"{self.TRIPLE}_cumulative"] = ((1+t/100).cumprod()*100).values
        self.dfr["ideal_cumulative"] = ((1+i/100).cumprod()*100).values

    def init_portfolio(self):
        prices_u = self.dfr[f"{self.UNDERLYING}_Close"].iloc[0]
        prices_t = self.dfr[f"{self.TRIPLE}_Close"].iloc[0]
        self.dfr[f"portfolio_{self.UNDERLYING}_long"].iloc[0] = self.STARTING_VALUE_UNDER
        self.dfr[f"portfolio_{self.TRIPLE}_short"].iloc[0] = self.STARTING_VALUE_TRIP
        self.dfr["portfolio_total"].iloc[0] = self.STARTING_VALUE_UNDER + self.STARTING_VALUE_TRIP
        beta0 = self.shares_under*prices_u + self.shares_trip*prices_t*3
        self.dfr["beta_exposure"].iloc[0] = beta0
        self.dfr["beta_exposure_per"].iloc[0] = 0.0
        self.dfr["cash_used"].iloc[0] = 0.0
        self.dfr["cumulative_P/L"].iloc[0] = 0.0
        self.dfr["P/L"].iloc[0] = 0.0
        self.dfr[f"shares_{self.UNDERLYING}"].iloc[0] = self.shares_under
        self.dfr[f"shares_{self.TRIPLE}"].iloc[0] = self.shares_trip

    def update_portfolio(self, i):
        pu = self.dfr[f"{self.UNDERLYING}_Close"].iloc[i]
        pt = self.dfr[f"{self.TRIPLE}_Close"].iloc[i]
        self.dfr[f"portfolio_{self.UNDERLYING}_long"].iloc[i] = self.shares_under*pu
        self.dfr[f"portfolio_{self.TRIPLE}_short"].iloc[i] = self.shares_trip*pt
        total = self.dfr[f"portfolio_{self.UNDERLYING}_long"].iloc[i] + self.dfr[f"portfolio_{self.TRIPLE}_short"].iloc[i]
        self.dfr["portfolio_total"].iloc[i] = total
        beta = self.shares_under*pu + self.shares_trip*pt*3
        self.dfr["beta_exposure"].iloc[i] = beta
        self.dfr["beta_exposure_per"].iloc[i] = beta/total if total!=0 else 0

    def beta_norm_strategy(self, exposure_indicator):
        self.init_portfolio()
        for i in range(1, len(self.dfr)):
            self.update_portfolio(i)
            beta = self.dfr["beta_exposure"].iloc[i]
            long_val = self.dfr[f"portfolio_{self.UNDERLYING}_long"].iloc[i]
            if beta > exposure_indicator*long_val:
                amt = beta/3 / self.dfr[f"{self.TRIPLE}_Close"].iloc[i]
                self.shares_trip -= amt
                self.cash -= amt*self.dfr[f"{self.TRIPLE}_Close"].iloc[i]
                self.update_portfolio(i)
            elif beta < -exposure_indicator*long_val:
                amt = -beta/3 / self.dfr[f"{self.TRIPLE}_Close"].iloc[i]
                self.shares_trip += amt
                self.cash += amt*self.dfr[f"{self.TRIPLE}_Close"].iloc[i]
                self.update_portfolio(i)
            self.dfr["cash_used"].iloc[i] = self.cash
            delta_cash = self.cash - self.dfr["cash_used"].iloc[i-1]
            pl = self.dfr["portfolio_total"].iloc[i] - self.dfr["portfolio_total"].iloc[i-1] - delta_cash
            self.dfr["P/L"].iloc[i] = pl
            self.dfr["cumulative_P/L"].iloc[i] = self.dfr["cumulative_P/L"].iloc[i-1]+pl
        return self.dfr["P/L"].sum()


def simulate_one(under, trip):
    try:
        full = yf.download([under, trip], period="max", auto_adjust=False)
        closes = full.xs("Close", axis=1, level=1)
        first_valid = closes.apply(lambda col: col.first_valid_index())
        fd = max(first_valid)
        ld = full.index.max()
        days = (ld-fd).days
        years = days//365
        if years<6: return None
        y = random.randint(6, years)
        end1 = fd+relativedelta(years=y)
        offs = random.randint(0, days-(end1-fd).days)
        sd = fd+pd.Timedelta(days=offs)
        ed = sd+relativedelta(years=y)
        summ = Summary(under, trip, sd.strftime("%Y-%m-%d"), ed.strftime("%Y-%m-%d"))
        summ.summary_stats()
        pl = summ.beta_norm_strategy(0.0001)
        ideal = summ.dfr["ideal_cumulative"].iloc[-1]
        trip_cum = summ.dfr[f"{trip}_cumulative"].iloc[-1]
        return {"underlying":under, "triple":trip, "years":y, "gap":ideal-trip_cum, "P/L":pl}
    except:
        return None


def main():
    pairs=[("SPY","SPXL"),("SOXX","SOXL"),("IWM","TNA"),("QQQ","TQQQ"),("DIA","UDOW"),("XBI","LABU"),("XLF","FAS")]
    n=35
    tasks=[(u,t) for u,t in pairs for _ in range(n)]
    results=[]
    workers=os.cpu_count() or 4
    ctx=multiprocessing.get_context('fork')
    with ProcessPoolExecutor(max_workers=workers, mp_context=ctx) as exec:
        futures=[exec.submit(simulate_one,u,t) for u,t in tasks]
        for f in as_completed(futures):
            r=f.result()
            if r: results.append(r)
    df=pd.DataFrame(results)
    print(df['gap'].corr(df['P/L']), df['years'].corr(df['gap']))

main()

[                       0%                       ]  2 of 2 completed

[*********************100%***********************]  2 of 2 completed

[*********************100%***********************]  2 of 2 completed

[*********************100%***********************]  2 of 2 completed
[*********************100%***********************]  2 of 2 completed
[*********************100%***********************]  2 of 2 completed
[*********************100%***********************]  2 of 2 completed
[*********************100%***********************]  2 of 2 completed
[*********************100%***********************]  2 of 2 completed
[*********************100%***********************]  2 of 2 completed
[*********************100%***********************]  2 of 2 completed
[*********************100%***********************]  2 of 2 completed

[*********************100%***********************]  2 of 2 completed
[*********************100%***********************]  2 of 2 completed

[*********************100%***