In [None]:
import yfinance as yf
import pandas as pd

tickers = {
    "US_Stocks": "^GSPC",
    "Global_Stocks": "URTH",
    "Emerging_Markets": "EEM",
    "US_Bonds": "AGG",
    "Intl_Bonds": "BNDX", 
    "Broad_Commodities": "GSG",
    "Gold": "GLD",
    "US_RealEstate": "VNQ",
    "Global_RealEstate": "VNQI",
    "Cash": "^IRX",
    "Bitcoin": "BTC-USD",
    "Volatility_Index": "^VIX",
    "Etherium": "ETH-USD",
    "Inflation_Protected": "TIP",
    "Private_Equity": "PSP"
}

start_date = "1970-01-01"  # Try a more recent start date if older data isn't available
end_date = "2024-12-31"

data = {}
for name, ticker in tickers.items():
    print(f"Downloading data for {name} ({ticker})...")
    df = yf.download(ticker, start=start_date, end=end_date, interval="1d")
    print(df.head())

    if df.empty:
        print(f"No data for {name}. Skipping.")
        continue

    # If multi-indexed columns, flatten them
    if isinstance(df.columns, pd.MultiIndex):
        # Attempt to drop levels if they are consistent. For example, 
        # if you see columns like (Price, ^GSPC, Adj Close), dropping two levels:
        df.columns = df.columns.droplevel([1])

    print(f"Columns after flattening for {name}:", df.columns)

    if 'Adj Close' in df.columns:
        data[name] = df['Adj Close']
    else:
        print(f"'Adj Close' not found in {name}. Available columns: {df.columns}")

if data:
    combined_data = pd.DataFrame(data)
    combined_data.index.name = "Date"
    combined_data = combined_data.dropna()
    combined_data.to_csv("Diversified_Portfolio_Data_Complete.csv")
    print("Data saved to 'Diversified_Portfolio_Data_Complete.csv'")
    print(combined_data.head())
else:
    print("No valid data collected.")


In [None]:
import yfinance as yf
import pandas as pd

tickers = {
    # Equities
    "US_Stocks": "^GSPC",     # S&P 500
    # "Dow_Jones": "^DJI",      # Dow Jones Industrial Average, data back to 1920s
    "Nasdaq_Composite": "^IXIC",

    # "Volatility_Index": "^VIX",
    
    # Rates/Yields
    "3M_TBill": "^IRX",       # 13-week T-bill
    "10Y_Treasury_Yield": "^TNX",   # 10-year treasury yield (often starts ~1962)
    "30Y_Treasury_Yield": "^TYX",   # 30-year treasury yield (often starts ~1977)
    
    # Commodities (continuous futures)
    "Gold": "^XAU",   # COMEX Gold continuous contract, data sometimes back to 1970s
    # "Silver": "^DJCISI",
    "Crude_Oil": "^XOI", # NYMEX WTI Crude continuous contract, data often from early 1980s
    
    # Currency / Dollar Index
    "US_Dollar_Index": "^NYICDX", # ICE U.S. Dollar Index futures, often from mid-1980s
    "Cash": "^IRX",
}

start_date = "1970-01-01"  # Try a more recent start date if older data isn't available
end_date = "2024-12-31"

data = {}
for name, ticker in tickers.items():
    print(f"Downloading data for {name} ({ticker})...")
    df = yf.download(ticker, period="max", interval="1d")
    print(df.head())

    if df.empty:
        print(f"No data for {name}. Skipping.")
        continue

    # If multi-indexed columns, flatten them
    if isinstance(df.columns, pd.MultiIndex):
        # Attempt to drop levels if they are consistent. For example, 
        # if you see columns like (Price, ^GSPC, Adj Close), dropping two levels:
        df.columns = df.columns.droplevel([1])

    print(f"Columns after flattening for {name}:", df.columns)

    if 'Adj Close' in df.columns:
        data[name] = df['Adj Close']
    else:
        print(f"'Adj Close' not found in {name}. Available columns: {df.columns}")

if data:
    combined_data = pd.DataFrame(data)
    combined_data.index.name = "Date"
    combined_data = combined_data.dropna()
    combined_data.to_csv("Diversified_Portfolio_Data_Complete.csv")
    print("Data saved to 'Diversified_Portfolio_Data_Complete.csv'")
    print(combined_data.head())
else:
    print("No valid data collected.")


In [None]:
import yfinance as yf
import pandas as pd

tickers = {
    "Tron": "TRX-USD",
    "Solana": "SOL-USD",
    "Avalanche": "AVAX-USD",
    "Xrp": "XRP-USD",
    "Bnb": "BNB-USD",
    "Bitcoin": "BTC-USD",
    "Doge": "DOGE-USD",
    "Etherium": "ETH-USD",
    "PolkaDot": "DOT-USD", 
    # "Stellar": "XLM-USD",
    # "Monero": "XMR-USD", 
    # "Neo": "NEO-USD",
    # "Litecoin": "LTC-USD",
    # "Cardano": "ADA-USD",
    # "Eos": "EOS-USD",

    "US_Stocks": "^GSPC",
    "Global_Stocks": "URTH",
    "Emerging_Markets": "EEM",
    "US_Bonds": "AGG",
    "Intl_Bonds": "BNDX",
    "Broad_Commodities": "GSG",
    "Gold": "GLD",
    "US_RealEstate": "VNQ",
    "Global_RealEstate": "VNQI",
    "Cash": "^IRX",
    "Volatility_Index": "^VIX",
    # "CVIX": "CVOL-USD",
    "Inflation_Protected": "TIP",
    "Private_Equity": "PSP",
    "DollarIndex": "^DXY",   # US Dollar Index

    # "Apple": "AAPL",
    # "Microsoft": "MSFT",
    # "Amazon": "AMZN",
    # "Tesla": "TSLA",
    # "Google": "GOOGL",
    # "Meta": "META",
    # "NVIDIA": "NVDA",
    # "ExxonMobil": "XOM",
    # "JPMorgan": "JPM",
    # "Johnson_Johnson": "JNJ",
    # "Procter_Gamble": "PG",
    # "Pfizer": "PFE",
    # "CocaCola": "KO",
    # "PepsiCo": "PEP",
    # "Intel": "INTC",
    # "Cisco": "CSCO",
    # "Chevron": "CVX",
    # "Goldman_Sachs": "GS",
    # "Boeing": "BA",
    # "Disney": "DIS",
    # "US_Stocks": "^GSPC",
    # "Global_Stocks": "URTH",
    # "Emerging_Markets": "EEM",
    # # "US_Bonds": "AGG",
    # # "Intl_Bonds": "BNDX",
    # "Broad_Commodities": "GSG",
    # "Gold": "GLD",
    # "US_RealEstate": "VNQ",
    # "Global_RealEstate": "VNQI",
    # # "Cash": "^IRX",
    # "Volatility_Index": "^VIX",
    # "Inflation_Protected": "TIP",
    # "Private_Equity": "PSP"
}

start_date = "2000-01-01"  # Try a more recent start date if older data isn't available
end_date = "2025-01-27"

data = {}
for name, ticker in tickers.items():
    print(f"Downloading data for {name} ({ticker})...")
    df = yf.download(ticker, start=start_date, end=end_date, interval="1d")
    print(df.head())

    if df.empty:
        print(f"No data for {name}. Skipping.")
        continue

    # If multi-indexed columns, flatten them
    if isinstance(df.columns, pd.MultiIndex):
        # Attempt to drop levels if they are consistent. For example, 
        # if you see columns like (Price, ^GSPC, Adj Close), dropping two levels:
        df.columns = df.columns.droplevel([1])

    print(f"Columns after flattening for {name}:", df.columns)

    if 'Adj Close' in df.columns:
        data[name] = df['Adj Close']
    else:
        print(f"'Adj Close' not found in {name}. Available columns: {df.columns}")

if data:
    combined_data = pd.DataFrame(data)
    combined_data.index.name = "Date"
    combined_data = combined_data.dropna()
    combined_data.to_csv("Diversified_Portfolio_Data_Complete.csv")
    print("Data saved to 'Diversified_Portfolio_Data_Complete.csv'")
    print(combined_data.head())
else:
    print("No valid data collected.")


In [8]:
import yfinance as yf
import pandas as pd

tickers = {
    # "Tron": "TRX-USD",
    # "Solana": "SOL-USD",
    # "Avalanche": "AVAX-USD",
    # "Xrp": "XRP-USD",
    # "Bnb": "BNB-USD",
    # "Bitcoin": "BTC-USD",
    # "Doge": "DOGE-USD",
    # "Etherium": "ETH-USD",
    # "PolkaDot": "DOT-USD", 
    # "Stellar": "XLM-USD",
    # "Monero": "XMR-USD", 
    # "Neo": "NEO-USD",
    # "Litecoin": "LTC-USD",
    # "Cardano": "ADA-USD",
    # "Eos": "EOS-USD",

    # "US_Stocks": "^GSPC",
    "Global_Stocks": "URTH",
    "Emerging_Markets": "EEM",
    # "US_Bonds": "AGG",
    "Intl_Bonds": "BNDX",
    "Broad_Commodities": "GSG",
    "Gold": "GLD",
    # "US_RealEstate": "VNQ",
    "Global_RealEstate": "VNQI",
    "Cash": "^IRX",
    "Volatility_Index": "^VIX",
    # "CVIX": "CVOL-USD",
    # "Inflation_Protected": "TIP",
    # "Private_Equity": "PSP",
    # "DollarIndex": "^DXY",   # US Dollar Index

    # "Apple": "AAPL",
    # "Microsoft": "MSFT",
    # "Amazon": "AMZN",
    # "Tesla": "TSLA",
    # "Google": "GOOGL",
    # "Meta": "META",
    # "NVIDIA": "NVDA",
    # "ExxonMobil": "XOM",
    # "JPMorgan": "JPM",
    # "Johnson_Johnson": "JNJ",
    # "Procter_Gamble": "PG",
    # "Pfizer": "PFE",
    # "CocaCola": "KO",
    # "PepsiCo": "PEP",
    # "Intel": "INTC",
    # "Cisco": "CSCO",
    # "Chevron": "CVX",
    # "Goldman_Sachs": "GS",
    # "Boeing": "BA",
    # "Disney": "DIS",
    # "US_Stocks": "^GSPC",
    # "Global_Stocks": "URTH",
    # "Emerging_Markets": "EEM",
    # # "US_Bonds": "AGG",
    # # "Intl_Bonds": "BNDX",
    # "Broad_Commodities": "GSG",
    # "Gold": "GLD",
    # "US_RealEstate": "VNQ",
    # "Global_RealEstate": "VNQI",
    # # "Cash": "^IRX",
    # "Volatility_Index": "^VIX",
    # "Inflation_Protected": "TIP",
    # "Private_Equity": "PSP"
}

start_date = "2000-01-01"  # Try a more recent start date if older data isn't available
end_date = "2025-02-08"

data = {}
for name, ticker in tickers.items():
    print(f"Downloading data for {name} ({ticker})...")
    df = yf.download(ticker, start=start_date, end=end_date, interval="1d")
    print(df.head())

    if df.empty:
        print(f"No data for {name}. Skipping.")
        continue

    # If multi-indexed columns, flatten them
    if isinstance(df.columns, pd.MultiIndex):
        # Attempt to drop levels if they are consistent. For example, 
        # if you see columns like (Price, ^GSPC, Adj Close), dropping two levels:
        df.columns = df.columns.droplevel([1])

    print(f"Columns after flattening for {name}:", df.columns)

    if 'Adj Close' in df.columns:
        data[name] = df['Adj Close']
    else:
        print(f"'Adj Close' not found in {name}. Available columns: {df.columns}")

if data:
    combined_data = pd.DataFrame(data)
    combined_data.index.name = "Date"
    combined_data = combined_data.dropna()
    combined_data.to_csv("Diversified_Portfolio_Data_Complete_Macro.csv")
    print("Data saved to 'Diversified_Portfolio_Data_Complete.csv'")
    print(combined_data)
else:
    print("No valid data collected.")


[*********************100%***********************]  1 of 1 completed

1 Failed download:
['URTH']: JSONDecodeError('Expecting value: line 1 column 1 (char 0)')
[*********************100%***********************]  1 of 1 completed

1 Failed download:
['EEM']: JSONDecodeError('Expecting value: line 1 column 1 (char 0)')
[*********************100%***********************]  1 of 1 completed

1 Failed download:
['BNDX']: JSONDecodeError('Expecting value: line 1 column 1 (char 0)')
[*********************100%***********************]  1 of 1 completed

1 Failed download:
['GSG']: JSONDecodeError('Expecting value: line 1 column 1 (char 0)')
[*********************100%***********************]  1 of 1 completed

1 Failed download:
['GLD']: JSONDecodeError('Expecting value: line 1 column 1 (char 0)')
[*********************100%***********************]  1 of 1 completed

Downloading data for Global_Stocks (URTH)...
Empty DataFrame
Columns: [(Adj Close, URTH), (Close, URTH), (High, URTH), (Low, URTH), (Open, URTH), (Volume, URTH)]
Index: []
No data for Global_Stocks. Skipping.
Downloading data for Emerging_Markets (EEM)...
Empty DataFrame
Columns: [(Adj Close, EEM), (Close, EEM), (High, EEM), (Low, EEM), (Open, EEM), (Volume, EEM)]
Index: []
No data for Emerging_Markets. Skipping.
Downloading data for Intl_Bonds (BNDX)...
Empty DataFrame
Columns: [(Adj Close, BNDX), (Close, BNDX), (High, BNDX), (Low, BNDX), (Open, BNDX), (Volume, BNDX)]
Index: []
No data for Intl_Bonds. Skipping.
Downloading data for Broad_Commodities (GSG)...
Empty DataFrame
Columns: [(Adj Close, GSG), (Close, GSG), (High, GSG), (Low, GSG), (Open, GSG), (Volume, GSG)]
Index: []
No data for Broad_Commodities. Skipping.
Downloading data for Gold (GLD)...
Empty DataFrame
Columns: [(Adj Close, GLD), (Close, GLD), (High, GLD), (Low, GLD), (Open, GLD), (Volume, GLD)]
Index: []
No data for Go



1 Failed download:
['VNQI']: JSONDecodeError('Expecting value: line 1 column 1 (char 0)')
[*********************100%***********************]  1 of 1 completed

1 Failed download:
['^IRX']: JSONDecodeError('Expecting value: line 1 column 1 (char 0)')
[*********************100%***********************]  1 of 1 completed

1 Failed download:
['^VIX']: JSONDecodeError('Expecting value: line 1 column 1 (char 0)')


Empty DataFrame
Columns: [(Adj Close, VNQI), (Close, VNQI), (High, VNQI), (Low, VNQI), (Open, VNQI), (Volume, VNQI)]
Index: []
No data for Global_RealEstate. Skipping.
Downloading data for Cash (^IRX)...
Empty DataFrame
Columns: [(Adj Close, ^IRX), (Close, ^IRX), (High, ^IRX), (Low, ^IRX), (Open, ^IRX), (Volume, ^IRX)]
Index: []
No data for Cash. Skipping.
Downloading data for Volatility_Index (^VIX)...
Empty DataFrame
Columns: [(Adj Close, ^VIX), (Close, ^VIX), (High, ^VIX), (Low, ^VIX), (Open, ^VIX), (Volume, ^VIX)]
Index: []
No data for Volatility_Index. Skipping.
No valid data collected.


In [2]:
import datetime
from ctypes import *

import pandas as pd

TF_EQUIV = {
    "1m": "1Min",
    "5m": "5Min",
    "15m": "15Min",
    "30m": "30Min",
    "1h": "1H",
    "4h": "4H",
    "12h": "12H",
    "1d": "D",
}

STRAT_PARAMS = {
    "obv": {
        "ma_period": {"name": "MA Period", "type": int, "min": 2, "max": 200},
    },
    "ichimoku": {
        "kijun": {"name": "Kijun Period", "type": int, "min": 25, "max": 90},
        "tenkan": {"name": "Tenkan Period", "type": int, "min": 10, "max": 50},
    },
    "sup_res": {
        "min_points": {"name": "Min. Points", "type": int, "min": 2, "max": 20},
        "min_diff_points": {
            "name": "Min. Difference between Points",
            "type": int,
            "min": 1,
            "max": 100,
        },
        "rounding_nb": {
            "name": "Rounding Number",
            "type": float,
            "min": 0.001,
            "max": 0.05,
            "decimals": 3,
        },
        "ratio": {
            "name": "WIN/LOSS Ratio",
            "type": float,
            "min": 0.5,
            "max": 10,
            "decimals": 2,
        },
        "risk": {
            "name": "Risk per trade",
            "type": float,
            "min": 0.005,
            "max": 0.02,
            "decimals": 2,
        },
        "leverage": {"name": "Leverage", "type": int, "min": 1, "max": 50},
        "cash": {
            "name": "Initial balance (USD)",
            "type": int,
            "min": 1000,
            "max": 1000,
        },
        "stop_short": {
            "name": "Stop short safety",
            "type": float,
            "min": 1.001,
            "max": 1.01,
            "decimals": 4,
        },
        "stop_long": {
            "name": "Stop long safety",
            "type": float,
            "min": 0.99,
            "max": 0.999,
            "decimals": 4,
        },
    },
    "fractals": {
        "ema_fast": {"name": "EMA Fast", "type": int, "min": 6, "max": 50},
        "ema_middle": {"name": "EMA Middle", "type": int, "min": 40, "max": 100},
        "ema_slow": {"name": "EMA Slow", "type": int, "min": 100, "max": 400},
        "rsi_length": {"name": "RSI", "type": int, "min": 14, "max": 14},
        "stop_ratio": {
            "name": "WIN/LOSS Ratio",
            "type": float,
            "min": 1,
            "max": 3,
            "decimals": 2,
        },
        "risk": {
            "name": "Risk per trade",
            "type": float,
            "min": 0.005,
            "max": 0.02,
            "decimals": 3,
        },
        "cash": {
            "name": "Initial balance (USD)",
            "type": int,
            "min": 1000,
            "max": 1000,
        },
        "leverage": {"name": "Leverage", "type": int, "min": 1, "max": 50},
    },
    "fractals2": {
        "ema_fast": {"name": "EMA Fast", "type": int, "min": 1, "max": 50},
        "ema_middle": {"name": "EMA Middle", "type": int, "min": 40, "max": 100},
        "ema_slow": {"name": "EMA Slow", "type": int, "min": 100, "max": 400},
        "rsi_length": {"name": "RSI", "type": int, "min": 1, "max": 100},
        "stop_ratio": {
            "name": "WIN/LOSS Ratio",
            "type": float,
            "min": 0.5,
            "max": 10,
            "decimals": 2,
        },
        "risk": {
            "name": "Risk per trade",
            "type": float,
            "min": 0.005,
            "max": 0.02,
            "decimals": 3,
        },
        "cash": {
            "name": "Initial balance (USD)",
            "type": int,
            "min": 1000,
            "max": 1000,
        },
        "leverage": {"name": "Leverage", "type": int, "min": 1, "max": 125},
        "breakeven": {
            "name": "Break Even",
            "type": float,
            "min": 0.1,
            "max": 1,
            "decimals": 2,
        },
    },
    "fractal_simple": {
        "ema_fast": {"name": "EMA Fast", "type": int, "min": 1, "max": 50},
        "ema_middle": {"name": "EMA Middle", "type": int, "min": 40, "max": 100},
        "ema_slow": {"name": "EMA Slow", "type": int, "min": 100, "max": 400},
        "rsi_period": {"name": "RSI", "type": int, "min": 14, "max": 14},
        "bk_ratio": {
            "name": "Break Even",
            "type": float,
            "min": 0.1,
            "max": 0.9,
            "decimals": 2,
        },
        "ratio": {
            "name": "WIN/LOSS Ratio",
            "type": float,
            "min": 0.5,
            "max": 3,
            "decimals": 2,
        },
        "risk": {
            "name": "Risk per trade",
            "type": float,
            "min": 0.005,
            "max": 0.02,
            "decimals": 3,
        },
        "cash": {
            "name": "Initial balance (USD)",
            "type": int,
            "min": 1000,
            "max": 1000,
        },
        "leverage": {"name": "Leverage", "type": int, "min": 1, "max": 125},
    },
    "sma": {
        "slow_ma": {"name": "Slow MA Period", "type": int, "min": 2, "max": 200},
        "fast_ma": {"name": "Fast MA Period", "type": int, "min": 2, "max": 200},
    },
    "psar": {
        "initial_acc": {
            "name": "Initial Acceleration",
            "type": float,
            "min": 0.005,
            "max": 0.2,
            "decimals": 2,
        },
        "acc_increment": {
            "name": "Acceleration Increment",
            "type": float,
            "min": 0.005,
            "max": 0.2,
            "decimals": 2,
        },
        "max_acc": {
            "name": "Max. Acceleration",
            "type": float,
            "min": 0.05,
            "max": 2,
            "decimals": 2,
        },
    },
    "fractal_test": {
        "ema_fast": {"name": "EMA Fast", "type": int, "min": 2, "max": 8},
        "ema_middle": {"name": "EMA Middle", "type": int, "min": 90, "max": 110},
        "ema_slow": {"name": "EMA Slow", "type": int, "min": 100, "max": 200},
        "rsi_period": {"name": "RSI Period", "type": int, "min": 50, "max": 70},
        "rsi_long_value": {
            "name": "RSI Long Value Condition",
            "type": int,
            "min": 70,
            "max": 90,
        },
        "rsi_short_value": {
            "name": "RSI Short Value Condition",
            "type": int,
            "min": 1,
            "max": 10,
        },
        "ratio": {
            "name": "WIN/LOSS Ratio",
            "type": float,
            "min": 7,
            "max": 10,
            "decimals": 2,
        },
        "risk": {
            "name": "Risk per trade",
            "type": float,
            "min": 0.02,
            "max": 0.02,
            "decimals": 3,
        },
        "cash": {
            "name": "Initial balance (USD)",
            "type": int,
            "min": 1000,
            "max": 1000,
        },
        "leverage": {"name": "Leverage", "type": int, "min": 1, "max": 15},
        "bk_ratio": {
            "name": "Breakeven ratio",
            "type": float,
            "min": 0.9,
            "max": 1,
            "decimals": 2,
        },
        "dist_long": {
            "name": "Stop loss long distance",
            "type": float,
            "min": 0.999,
            "max": 1,
            "decimals": 4,
        },
        "dist_short": {
            "name": "Stop loss short distance",
            "type": float,
            "min": 1,
            "max": 1.001,
            "decimals": 4,
        },
        "nb_lows": {"name": "Down Trends precisions", "type": int, "min": 1, "max": 5},
        "nb_highs": {"name": "Up Trends precisions", "type": int, "min": 1, "max": 5},
        "macd_line_fast": {"name": "MACD Fast line", "type": int, "min": 12, "max": 12},
        "macd_line_slow": {"name": "MACD Slow line", "type": int, "min": 26, "max": 26},
        "macd_signal": {"name": "MACD Signal", "type": int, "min": 9, "max": 9},
        "macd_long_ratio": {
            "name": "MACD Long ratio",
            "type": float,
            "min": 1,
            "max": 3,
            "decimals": 2,
        },
        "macd_short_ratio": {
            "name": "MACD Short ratio",
            "type": float,
            "min": 0.01,
            "max": 0.1,
            "decimals": 2,
        },
    },
    "sup_res_cpp": {
        "min_points": {"name": "Min. Points", "type": int, "min": 2, "max": 20},
        "min_diff_points": {
            "name": "Min. Difference between Points",
            "type": int,
            "min": 1,
            "max": 100,
        },
        "rounding_nb": {
            "name": "Rounding Number",
            "type": float,
            "min": 10,
            "max": 500,
            "decimals": 1,
        },
        "ratio": {
            "name": "WIN/LOSS Ratio",
            "type": float,
            "min": 0.5,
            "max": 5,
            "decimals": 2,
        },
        "risk": {
            "name": "Risk per trade",
            "type": float,
            "min": 0.005,
            "max": 0.02,
            "decimals": 2,
        },
        "leverage": {"name": "Leverage", "type": int, "min": 1, "max": 50},
        "cash": {
            "name": "Initial balance (USD)",
            "type": int,
            "min": 1000,
            "max": 1000,
        },
        "stop_short": {
            "name": "Stop short safety",
            "type": float,
            "min": 1.0001,
            "max": 1.01,
            "decimals": 4,
        },
        "stop_long": {
            "name": "Stop long safety",
            "type": float,
            "min": 0.99,
            "max": 0.9999,
            "decimals": 4,
        },
    },
    "bollinger": {
        "window_size": {
            "name": "window size",
            "type": int,
            "min": 0.0001,
            "max": 1000000,
        },
        "num_std": {
            "name": "deviation number",
            "type": float,
            "min": 0.0001,
            "max": 100000,
        },
    },
    "single_index": {"test": {"name": "test", "type": int}},
    "arima": {"start_date": {"name": "start_date", "type": str}},
    # "drl": {
    #     "batch_size": {"name": "batch_size", "type": int}
    # }
}


def ms_to_dt(ms: int) -> datetime.datetime:
    return datetime.datetime.utcfromtimestamp(ms / 1000)


# def resample_timeframe(data: pd.DataFrame, tf: str) -> pd.DataFrame:
#     return data.resample(TF_EQUIV[tf]).agg(
#         {"open": "first", "high": "max", "low": "min", "close": "last", "volume": "sum"}
#     )


def resample_timeframe(data: pd.DataFrame, tf: str) -> pd.DataFrame:
    # Identify all columns that represent an instrument.
    # For each instrument, we expect columns like {prefix}_open, {prefix}_high, etc.
    # The prefix might be empty for the main dataset columns.

    # Possible base OHLCV columns
    base_cols = {"open", "high", "low", "close", "volume"}

    # Find all instruments by extracting the prefix from columns that end with '_open'
    # or if the base columns exist without any prefix.
    instruments = set()
    for col in data.columns:
        if col.endswith("_open"):
            prefix = col[:-5]  # remove '_open'
            instruments.add(prefix)

    # Check if we have a base instrument (no prefix)
    if base_cols.issubset(data.columns):
        instruments.add("")  # represents the main (no-prefix) instrument

    # Build a dynamic aggregation dictionary
    agg_dict = {}
    for inst in instruments:
        # If inst is empty, columns are just 'open', 'high', 'low', 'close', 'volume'
        # Otherwise they are 'inst_open', 'inst_high', etc.
        prefix = f"{inst}_" if inst else ""
        agg_dict[f"{prefix}open"] = "first"
        agg_dict[f"{prefix}high"] = "max"
        agg_dict[f"{prefix}low"] = "min"
        agg_dict[f"{prefix}close"] = "last"
        agg_dict[f"{prefix}volume"] = "sum"

    return data.resample(TF_EQUIV[tf]).agg(agg_dict)


def get_library():
    lib = CDLL("backtestingCpp/build/libbacktestingCpp.dll", winmode=0)

    # SMA
    lib.Sma_new.restype = c_void_p
    lib.Sma_new.argtypes = [c_char_p, c_char_p, c_char_p, c_longlong, c_longlong]
    lib.Sma_execute_backtest.restype = c_void_p
    lib.Sma_execute_backtest.argtypes = [c_void_p, c_int, c_int]

    lib.Sma_get_pnl.restype = c_double
    lib.Sma_get_pnl.argtypes = [c_void_p]
    lib.Sma_get_max_dd.restype = c_double
    lib.Sma_get_max_dd.argtypes = [c_void_p]

    # PSAR
    lib.Psar_new.restype = c_void_p
    lib.Psar_new.argtypes = [c_char_p, c_char_p, c_char_p, c_longlong, c_longlong]
    lib.Psar_execute_backtest.restype = c_void_p
    lib.Psar_execute_backtest.argtypes = [c_void_p, c_double, c_double, c_double]

    lib.Psar_get_pnl.restype = c_double
    lib.Psar_get_pnl.argtypes = [c_void_p]
    lib.Psar_get_max_dd.restype = c_double
    lib.Psar_get_max_dd.argtypes = [c_void_p]

    # Fractal_test
    lib.fractal_test_new.restype = c_void_p
    lib.fractal_test_new.argtypes = [
        c_char_p,
        c_char_p,
        c_char_p,
        c_longlong,
        c_longlong,
    ]
    lib.fractal_test_execute_backtest.restype = c_void_p
    lib.fractal_test_execute_backtest.argtypes = [
        c_void_p,
        c_int,
        c_int,
        c_int,
        c_int,
        c_int,
        c_int,
        c_double,
        c_double,
        c_double,
        c_int,
        c_double,
        c_double,
        c_double,
        c_int,
        c_int,
        c_int,
        c_int,
        c_int,
        c_double,
        c_double,
    ]

    lib.fractal_test_get_pnl.restype = c_double
    lib.fractal_test_get_pnl.argtypes = [c_void_p]
    lib.fractal_test_get_max_dd.restype = c_double
    lib.fractal_test_get_max_dd.argtypes = [c_void_p]

    # Support Resistance CPP
    lib.sup_res_cpp_new.restype = c_void_p
    lib.sup_res_cpp_new.argtypes = [
        c_char_p,
        c_char_p,
        c_char_p,
        c_longlong,
        c_longlong,
    ]
    lib.sup_res_cpp_execute_backtest.restype = c_void_p
    lib.sup_res_cpp_execute_backtest.argtypes = [
        c_void_p,
        c_int,
        c_int,
        c_double,
        c_double,
        c_double,
        c_int,
        c_int,
        c_double,
        c_double,
    ]

    lib.sup_res_cpp_get_pnl.restype = c_double
    lib.sup_res_cpp_get_pnl.argtypes = [c_void_p]
    lib.sup_res_cpp_get_daily_pnl.restype = c_double
    lib.sup_res_cpp_get_daily_pnl.argtypes = [c_void_p]
    lib.sup_res_cpp_get_daily_trades.restype = c_int
    lib.sup_res_cpp_get_daily_trades.argtypes = [c_void_p]
    lib.sup_res_cpp_get_zero_per_day.restype = c_int
    lib.sup_res_cpp_get_zero_per_day.argtypes = [c_void_p]

    # Fractal_simple
    lib.fractal_simple_new.restype = c_void_p
    lib.fractal_simple_new.argtypes = [
        c_char_p,
        c_char_p,
        c_char_p,
        c_longlong,
        c_longlong,
    ]
    lib.fractal_simple_execute_backtest.restype = c_void_p
    lib.fractal_simple_execute_backtest.argtypes = [
        c_void_p,
        c_int,
        c_int,
        c_int,
        c_int,
        c_double,
        c_double,
        c_double,
        c_int,
        c_int,
    ]

    lib.fractal_simple_get_pnl.restype = c_double
    lib.fractal_simple_get_pnl.argtypes = [c_void_p]
    lib.fractal_simple_get_max_dd.restype = c_double
    lib.fractal_simple_get_max_dd.argtypes = [c_void_p]

    return lib


In [3]:
from typing import *
import logging
import time

import h5py
import numpy as np
import pandas as pd


logger = logging.getLogger()

logging.basicConfig()
log = logging.getLogger(__name__)
log.setLevel(logging.INFO)
log.info("%s logger started.", __name__)

TF_EQUIV = {
    "1m": "1Min",
    "5m": "5Min",
    "15m": "15Min",
    "30m": "30Min",
    "1h": "1H",
    "4h": "4H",
    "12h": "12H",
    "1d": "D",
}



class Hdf5client:
    def __init__(self, exchange: str):
        self.hf = h5py.File(f"data/{exchange}.h5", "a")
        self.hf.flush()

    def create_dataset(self, symbol: str):
        if symbol not in self.hf.keys():
            self.hf.create_dataset(symbol, (0, 6), maxshape=(None, 6), dtype="float64")
            self.hf.flush()

    def write_data(self, symbol: str, data: List[Tuple]):

        min_ts, max_ts = self.get_first_last_timestamp(symbol)

        if min_ts is None:
            min_ts = float("inf")
            max_ts = 0

        filtered_data = []

        for d in data:
            if d[0] < min_ts:
                filtered_data.append(d)
            elif d[0] > max_ts:
                filtered_data.append(d)

        if len(filtered_data) == 0:
            logger.warning("%s: No data to insert", symbol)

        data_array = np.array(data)

        self.hf[symbol].resize(self.hf[symbol].shape[0] + data_array.shape[0], axis=0)
        self.hf[symbol][-data_array.shape[0] :] = data_array

        self.hf.flush()

    def get_data(
        self, symbol: str, from_time: int, to_time: int
    ) -> Union[None, pd.DataFrame]:

        start_query = time.time()

        existing_data = self.hf[symbol][:]

        if len(existing_data) == 0:
            return None

        data = sorted(existing_data, key=lambda x: x[0])
        data = np.array(data)

        df = pd.DataFrame(
            data, columns=["timestamp", "open", "high", "low", "close", "volume"]
        )
        df = df[(df["timestamp"] >= from_time) & (df["timestamp"] <= to_time)]

        df["timestamp"] = pd.to_datetime(
            df["timestamp"].values.astype(np.int64), unit="ms"
        )
        df.set_index("timestamp", drop=True, inplace=True)

        query_time = round((time.time() - start_query), 2)

        logger.info(
            "Retrieved %s %s data from database in %s seconds",
            len(df.index),
            symbol,
            query_time,
        )

        return df

    def get_data2(
        self, symbol: str, from_time: int, to_time: int, tf: str
    ) -> Union[None, pd.DataFrame]:

        start_query = time.time()

        if tf == "1m":
            params = 60
        elif tf == "5m":
            params = 5 * 60
        elif tf == "15m":
            params = 15 * 60
        elif tf == "30m":
            params = 30 * 60
        elif tf == "1h":
            params = 60 * 60
        elif tf == "4h":
            params = 4 * 60 * 60
        elif tf == "12h":
            params = 12 * 60 * 60
        elif tf == "1d":
            params = 24 * 60 * 60

        existing_data = self.hf[symbol][:]

        if len(existing_data) == 0:
            return None

        data = sorted(existing_data, key=lambda x: x[0])
        data = np.array(data)

        df = pd.DataFrame(
            data, columns=["timestamp", "open", "high", "low", "close", "volume"]
        )
        df = df[
            (df["timestamp"] >= (from_time - params * 1000))
            & (df["timestamp"] <= (to_time - params * 1000))
        ]

        df["timestamp"] = pd.to_datetime(
            df["timestamp"].values.astype(np.int64), unit="ms"
        )
        df.set_index("timestamp", drop=True, inplace=True)

        df = resample_timeframe(df, tf)

        query_time = round((time.time() - start_query), 2)

        logger.info(
            "Retrieved %s %s data from database in %s seconds",
            len(df.index),
            symbol,
            query_time,
        )

        return df

    def get_first_last_timestamp(
        self, symbol: str
    ) -> Union[Tuple[None, None], Tuple[float, float]]:

        existing_data = self.hf[symbol][:]

        if len(existing_data) == 0:
            return None, None

        first_ts = min(existing_data, key=lambda x: x[0])[0]
        last_ts = max(existing_data, key=lambda x: x[0])[0]

        return first_ts, last_ts


INFO:__main__:__main__ logger started.


In [6]:
import typing
import logging

import time

logger = logging.getLogger()


def collect_all(client: typing.Union[BinanceClient], exchange: str, symbol: str):

    h5_db = Hdf5client(exchange)
    h5_db.create_dataset(symbol)

    oldest_ts, most_recent_ts = h5_db.get_first_last_timestamp(symbol)

    # Initial Request

    if oldest_ts is None:
        data = client.get_historical_data(
            symbol, end_time=int(time.time() * 1000) - 60000
        )

        if len(data) == 0:
            logger.warning("%s %s: no initial data found", exchange, symbol)
            return
        else:
            logger.info(
                "%s %s: Collected %s initial data from %s to %s",
                exchange,
                symbol,
                len(data),
                ms_to_dt(data[0][0]),
                ms_to_dt(data[-1][0]),
            )

        oldest_ts = data[0][0]
        most_recent_ts = data[-1][0]

        h5_db.write_data(symbol, data)

    data_to_insert = []

    # Most recent data

    while True:

        data = client.get_historical_data(
            symbol, start_time=int(most_recent_ts + 60000)
        )

        if data is None:
            time.sleep(4)  # Pause in case an error occurs during the request
            continue

        if len(data) < 2:
            break

        data = data[:-1]

        data_to_insert = data_to_insert + data

        if len(data_to_insert) > 10000:
            h5_db.write_data(symbol, data_to_insert)
            data_to_insert.clear()

        if data[-1][0] > most_recent_ts:
            most_recent_ts = data[-1][0]

        logger.info(
            "%s %s: Collected %s recent data from %s to %s",
            exchange,
            symbol,
            len(data),
            ms_to_dt(data[0][0]),
            ms_to_dt(data[-1][0]),
        )

        time.sleep(0.15)

    h5_db.write_data(symbol, data_to_insert)
    data_to_insert.clear()

    # Older data

    while True:

        data = client.get_historical_data(symbol, end_time=int(oldest_ts - 60000))

        if data is None:
            time.sleep(4)  # Pause in case an error occurs during the request
            continue

        if len(data) == 0:
            logger.info(
                "%s %s: Stopped older data collection because no data was found before %s",
                exchange,
                symbol,
                ms_to_dt(oldest_ts),
            )
            break

        data_to_insert = data_to_insert + data

        if len(data_to_insert) > 10000:
            h5_db.write_data(symbol, data_to_insert)
            data_to_insert.clear()

        if data[0][0] < oldest_ts:
            oldest_ts = data[0][0]

        logger.info(
            "%s %s: Collected %s older data from %s to %s",
            exchange,
            symbol,
            len(data),
            ms_to_dt(data[0][0]),
            ms_to_dt(data[-1][0]),
        )

        time.sleep(0.15)

    h5_db.write_data(symbol, data_to_insert)


In [5]:
from typing import *
import logging

import requests

logger = logging.getLogger()


class BinanceClient:
    def __init__(self, futures=False):

        self.futures = futures

        if self.futures:
            self._base_url = "https://fapi.binance.com"
        else:
            self._base_url = "https://api.binance.com"

        self.symbols = self._get_symbols()

    def _make_request(self, endpoint: str, query_parameters: Dict):

        try:
            response = requests.get(self._base_url + endpoint, params=query_parameters)
        except Exception as e:
            logger.error("Connection error while making request to %s: %s", endpoint, e)
            return None

        if response.status_code == 200:
            return response.json()
        else:
            logger.error(
                "Error while making request to %s: %s (status code == %s)",
                endpoint,
                response.json(),
                response.status_code,
            )
            return None

    def _get_symbols(self) -> List[str]:

        params = dict()

        endpoint = "/fapi/v1/exchangeInfo" if self.futures else "/api/v3/exchangeInfo"
        data = self._make_request(endpoint, params)

        symbols = [x["symbol"] for x in data["symbols"]]

        print(symbols)

        return symbols

    def get_historical_data(
        self,
        symbol: str,
        start_time: Optional[int] = None,
        end_time: Optional[int] = None,
    ):

        params = dict()

        params["symbol"] = symbol
        params["interval"] = "1m"
        params["limit"] = 1500

        if start_time is not None:
            params["startTime"] = start_time
        if end_time is not None:
            params["endTime"] = end_time

        endpoint = "/fapi/v1/klines" if self.futures else "/api/v3/exchangeInfo"
        raw_candles = self._make_request(endpoint, params)

        candles = []

        if raw_candles is not None:
            for c in raw_candles:
                candles.append(
                    (
                        float(c[0]),
                        float(c[1]),
                        float(c[2]),
                        float(c[3]),
                        float(c[4]),
                        float(c[5]),
                    )
                )
            return candles
        else:
            return None


In [9]:
# Data retrieving
def get_timeframe_data(symbol, from_time, to_time, timeframe):
    h5_db = Hdf5client("binance")
    data = h5_db.get_data(symbol, from_time, to_time)
    if timeframe != "1m":
        data = resample_timeframe(data, timeframe)
    return data


def prepare_additional_data(file_path, asset_prefix, timeframe):
    """
    Prepares additional data in the same format as the EURUSD example and resamples it
    to match the provided timeframe.

    Parameters
    ----------
    file_path : str
        The path to the CSV file.
    asset_prefix : str
        The prefix to prepend to column names, e.g. 'eurusd' or 'ustbond'.
    timeframe : str
        The target timeframe to which the data should be resampled (e.g., '4h', '1h', etc.).

    Returns
    -------
    pd.DataFrame
        A DataFrame indexed by timestamp at the specified timeframe and columns renamed
        with the asset_prefix.
    """
    # Read the CSV
    df = pd.read_csv(file_path)

    # Convert the timestamp from milliseconds to datetime
    df["timestamp"] = pd.to_datetime(df["Local time"], unit="ms")
    df.set_index("timestamp", inplace=True)

    # Keep only the required columns
    df = df[["Open", "High", "Low", "Close", "Volume"]]

    # Rename columns to include the asset prefix
    df.columns = [f"{asset_prefix}_{col.lower()}" for col in df.columns]

    # The original data is in 1m timeframe by default, so resample if needed
    if timeframe != "1m":
        df = resample_timeframe(df, timeframe)

    return df    


# Get the total system memory
# total_memory = psutil.virtual_memory().total

# Calculate 50% of total system memory
# memory_to_allocate = total_memory * 0.5

from_time = "2015-01-01"
to_time = "2025-02-08"
symbol = "BTCUSDT"

# Define timeframes
timeframes = ["1m"]
tf = timeframes[0]

# Convert times
from_time = int(
    datetime.datetime.strptime(from_time, "%Y-%m-%d").timestamp() * 1000
)
to_time = int(datetime.datetime.strptime(to_time, "%Y-%m-%d").timestamp() * 1000)


data = get_timeframe_data(symbol, from_time, to_time, tf)
ethusdt_df = get_timeframe_data("ETHUSDT", from_time, to_time, tf)
bnbusdt_df = get_timeframe_data("BNBUSDT", from_time, to_time, tf)
xrpusdt_df = get_timeframe_data("XRPUSDT", from_time, to_time, tf)
solusdt_df = get_timeframe_data("SOLUSDT", from_time, to_time, tf)
adausdt_df = get_timeframe_data("ADAUSDT", from_time, to_time, tf)
dogeusdt_df = get_timeframe_data("DOGEUSDT", from_time, to_time, tf)
trxusdt_df = get_timeframe_data("TRXUSDT", from_time, to_time, tf)
avaxusdt_df = get_timeframe_data("AVAXUSDT", from_time, to_time, tf)
# shibusdt_df = get_timeframe_data("1000SHIBUSDT", from_time, to_time, tf)
dotusdt_df = get_timeframe_data("DOTUSDT", from_time, to_time, tf)
xlmusdt_df = get_timeframe_data("XLMUSDT", from_time, to_time, tf)
xmrusdt_df = get_timeframe_data("XMRUSDT", from_time, to_time, tf)
neousdt_df = get_timeframe_data("NEOUSDT", from_time, to_time, tf)
ltcusdt_df = get_timeframe_data("LTCUSDT", from_time, to_time, tf)
# adausdt_df = get_timeframe_data("ADAUSDT", from_time, to_time, tf)
eosusdt_df = get_timeframe_data("EOSUSDT", from_time, to_time, tf)

# Rename columns to include the asset prefix
data.columns = [f"btcusdt_{col.lower()}" for col in data.columns]
ethusdt_df.columns = [f"ethusdt_{col.lower()}" for col in ethusdt_df.columns]
bnbusdt_df.columns = [f"bnbusdt_{col.lower()}" for col in bnbusdt_df.columns]
xrpusdt_df.columns = [f"xrpusdt_{col.lower()}" for col in xrpusdt_df.columns]
solusdt_df.columns = [f"solusdt_{col.lower()}" for col in solusdt_df.columns]
adausdt_df.columns = [f"adausdt_{col.lower()}" for col in adausdt_df.columns]
dogeusdt_df.columns = [f"dogeusdt_{col.lower()}" for col in dogeusdt_df.columns]
trxusdt_df.columns = [f"trxusdt_{col.lower()}" for col in trxusdt_df.columns]
avaxusdt_df.columns = [f"avaxusdt_{col.lower()}" for col in avaxusdt_df.columns]
# shibusdt_df.columns = [f"shibusdt_{col.lower()}" for col in shibusdt_df.columns]
dotusdt_df.columns = [f"dotusdt_{col.lower()}" for col in dotusdt_df.columns]
xlmusdt_df.columns = [f"xlmusdt_{col.lower()}" for col in xlmusdt_df.columns]
xmrusdt_df.columns = [f"xmrusdt_{col.lower()}" for col in xmrusdt_df.columns]
neousdt_df.columns = [f"neousdt_{col.lower()}" for col in neousdt_df.columns]
ltcusdt_df.columns = [f"ltcusdt_{col.lower()}" for col in ltcusdt_df.columns]
# adausdt_df.columns = [f"adausdt_{col.lower()}" for col in adausdt_df.columns]
eosusdt_df.columns = [f"eosusdt_{col.lower()}" for col in eosusdt_df.columns]

# print(data)
# print(ethusdt_df)
# print(bnbusdt_df)
# print(xrpusdt_df)
# print(solusdt_df)
# print(adausdt_df)
# print(dogeusdt_df)
# print(trxusdt_df)
# print(avaxusdt_df)
# # print(shibusdt_df)
# print(dotusdt_df)
# print(xlmusdt_df)
# print(xmrusdt_df)
# print(neousdt_df)
# print(ltcusdt_df)
# # print(adausdt_df)
# print(eosusdt_df)


data_close_df = data[["btcusdt_close"]]
ethusdt_close_df = ethusdt_df[["ethusdt_close"]]
bnbusdt_close_df = bnbusdt_df[["bnbusdt_close"]]
xrpusdt_close_df = xrpusdt_df[["xrpusdt_close"]]
solusdt_close_df = solusdt_df[["solusdt_close"]]
adausdt_close_df = adausdt_df[["adausdt_close"]]
dogeusdt_close_df = dogeusdt_df[["dogeusdt_close"]]
trxusdt_close_df = trxusdt_df[["trxusdt_close"]]
avaxusdt_close_df = avaxusdt_df[["avaxusdt_close"]]
# shibusdt_close_df = shibusdt_df[["shibusdt_close"]]
dotusdt_close_df = dotusdt_df[["dotusdt_close"]]
xlmusdt_close_df = xlmusdt_df[["xlmusdt_close"]]
xmrusdt_close_df = xmrusdt_df[["xmrusdt_close"]]
neousdt_close_df = neousdt_df[["neousdt_close"]]
ltcusdt_close_df = ltcusdt_df[["ltcusdt_close"]]
# adausdt_close_df = adausdt_df[["adausdt_close"]]
eosusdt_close_df = eosusdt_df[["eosusdt_close"]]


# Additional data preparation and resampling to match main_data timeframe
eurusd_df = prepare_additional_data(
    "data/EURUSD/eurusd_cleaned.csv", "eurusd", timeframe=tf
)
eurusd_close_df = eurusd_df[["eurusd_close"]]
gbpusd_df = prepare_additional_data(
    "data/GBPUSD/gbpusd_cleaned.csv", "gbpusd", timeframe=tf
)
gbpusd_close_df = gbpusd_df[["gbpusd_close"]]
xauusd_df = prepare_additional_data(
    "data/Gold/xauusd_cleaned.csv", "xauusd", timeframe=tf
)
xauusd_close_df = xauusd_df[["xauusd_close"]]
xleusd_df = prepare_additional_data(
    "data/XLE_US_USD/xleusd_cleaned.csv", "xleusd", timeframe=tf
)
xleusd_close_df = xleusd_df[["xleusd_close"]]
xlpusd_df = prepare_additional_data(
    "data/XLP_US_USD/xlpusd_cleaned.csv", "xlpusd", timeframe=tf
)
xlpusd_close_df = xlpusd_df[["xlpusd_close"]]
ustbond_df = prepare_additional_data(
    "data/US_T-Bonds/ustbond_cleaned.csv", "ustbond", timeframe=tf
)
ustbond_close_df = ustbond_df[["ustbond_close"]]
sp500_df = prepare_additional_data(
    "data/SP500/sp500_cleaned.csv", "sp500", timeframe=tf
)
sp500_close_df = sp500_df[["sp500_close"]]
uk100_df = prepare_additional_data(
    "data/UK100/uk100_cleaned.csv", "uk100", timeframe=tf
)
uk100_close_df = uk100_df[["uk100_close"]]
aus200_df = prepare_additional_data(
    "data/AUS200/aus200_cleaned.csv", "aus200", timeframe=tf
)
aus200_close_df = aus200_df[["aus200_close"]]
chi50_df = prepare_additional_data(
    "data/CHI50/chi50_cleaned.csv", "chi50", timeframe=tf
)
chi50_close_df = chi50_df[["chi50_close"]]
dollar_idx_df = prepare_additional_data(
    "data/DOLLAR_IDX/dollar_idx_cleaned.csv", "dollar_idx", timeframe=tf
)
dollar_idx_close_df = dollar_idx_df[["dollar_idx_close"]]
eurbond_df = prepare_additional_data(
    "data/EUR_Bonds/eurbond_cleaned.csv", "eurbond", timeframe=tf
)
eurbond_close_df = eurbond_df[["eurbond_close"]]
jpn225_df = prepare_additional_data(
    "data/JPN225/jpn225_cleaned.csv", "jpn225", timeframe=tf
)
jpn225_close_df = jpn225_df[["jpn225_close"]]
ukbonds_df = prepare_additional_data(
    "data/UK_Bonds/ukbonds_cleaned.csv", "ukbonds", timeframe=tf
)
ukbonds_close_df = ukbonds_df[["ukbonds_close"]]
ussc2000_df = prepare_additional_data(
    "data/USSC2000/ussc2000_cleaned.csv", "ussc2000", timeframe=tf
)
ussc2000_close_df = ussc2000_df[["ussc2000_close"]]

# print(eurusd_df)
# print(gbpusd_df)
# print(xauusd_df)
# print(xleusd_df)
# print(xlpusd_df)
# print(ustbond_df)
# print(sp500_df)
# print(uk100_df)
# print(aus200_df)
# print(chi50_df)
# print(dollar_idx_df)
# print(eurbond_df)
# print(jpn225_df)
# print(ukbonds_df)
# print(ussc2000_df)

# # Merge all into a single DataFrame
# final_data = (
#     data_close_df.join(ethusdt_close_df, how="left")
#     .join(bnbusdt_close_df, how="left")
#     .join(xrpusdt_close_df, how="left")
#     # .join(solusdt_close_df, how="left")
#     .join(adausdt_close_df, how="left")
#     # .join(dogeusdt_close_df, how="left")
#     .join(trxusdt_close_df, how="left")
#     # .join(avaxusdt_close_df, how="left")
#     # .join(shibusdt_close_df, how="left")
#     # .join(dotusdt_close_df, how="left")
#     .join(xlmusdt_close_df, how="left")
#     # .join(xmrusdt_close_df, how="left")
#     .join(neousdt_close_df, how="left")
#     .join(ltcusdt_close_df, how="left")
#     # .join(adausdt_close_df, how="left")
#     .join(eosusdt_close_df, how="left")
#     # .join(eurusd_close_df, how="left")
#     # .join(ustbond_close_df, how="left")
#     # .join(xauusd_close_df, how="left")
#     # .join(xleusd_close_df, how="left")
#     # .join(xlpusd_close_df, how="left")
#     # .join(sp500_close_df, how="left")
#     # .join(gbpusd_close_df, how="left")
#     # .join(uk100_close_df, how="left")
#     # .join(aus200_close_df, how="left")
#     # .join(chi50_close_df, how="left")
#     # .join(dollar_idx_close_df, how="left")
#     # .join(eurbond_close_df, how="left")
#     # .join(jpn225_close_df, how="left")
#     # .join(ukbonds_close_df, how="left")
#     # .join(ussc2000_close_df, how="left")
# )

# Merge all into a single DataFrame
final_data = (
    data.join(ethusdt_df, how="left")
    .join(bnbusdt_df, how="left")
    .join(xrpusdt_df, how="left")
    # .join(solusdt_close_df, how="left")
    .join(adausdt_df, how="left")
    # .join(dogeusdt_close_df, how="left")
    .join(trxusdt_df, how="left")
    # .join(avaxusdt_close_df, how="left")
    # .join(shibusdt_close_df, how="left")
    # .join(dotusdt_close_df, how="left")
    .join(xlmusdt_df, how="left")
    # .join(xmrusdt_close_df, how="left")
    .join(neousdt_df, how="left")
    .join(ltcusdt_df, how="left")
    # .join(adausdt_close_df, how="left")
    .join(eosusdt_df, how="left")
    # .join(eurusd_close_df, how="left")
    # .join(ustbond_close_df, how="left")
    # .join(xauusd_close_df, how="left")
    # .join(xleusd_close_df, how="left")
    # .join(xlpusd_close_df, how="left")
    # .join(sp500_close_df, how="left")
    # .join(gbpusd_close_df, how="left")
    # .join(uk100_close_df, how="left")
    # .join(aus200_close_df, how="left")
    # .join(chi50_close_df, how="left")
    # .join(dollar_idx_close_df, how="left")
    # .join(eurbond_close_df, how="left")
    # .join(jpn225_close_df, how="left")
    # .join(ukbonds_close_df, how="left")
    # .join(ussc2000_close_df, how="left")
)

final_data = final_data.dropna()

dataframes = {}

# for tf in timeframes:
#     dataframes[tf] = get_timeframe_data(symbol, from_time, to_time, tf)

dataframes[tf] = final_data

# # Syncronize the timeframes after computing the features
# for tf in timeframes:
#     dataframes[tf] = calculate_indicators(dataframes[tf]).dropna()

latest_start_date = None
earliest_end_date = None

for df in dataframes.values():
    start_date = df.index.min()
    end_date = df.index.max()
    if latest_start_date is None or start_date > latest_start_date:
        latest_start_date = start_date
    if earliest_end_date is None or end_date < earliest_end_date:
        earliest_end_date = end_date

# Ensure all DataFrames start and end on these dates
for tf in dataframes:
    dataframes[tf] = dataframes[tf][
        (dataframes[tf].index >= latest_start_date)
        & (dataframes[tf].index <= earliest_end_date)
    ]

pd.reset_option("display.max_rows")
print(dataframes)

{'1m':                      btcusdt_open  btcusdt_high  btcusdt_low  btcusdt_close  \
timestamp                                                                     
2018-06-11 11:30:00       6761.28       6772.99      6761.28        6765.00   
2018-06-11 11:31:00       6765.00       6765.85      6758.57        6760.00   
2018-06-11 11:32:00       6760.00       6768.27      6760.00        6765.77   
2018-06-11 11:33:00       6765.77       6765.77      6762.06        6765.75   
2018-06-11 11:34:00       6765.76       6765.77      6764.69        6765.76   
...                           ...           ...          ...            ...   
2025-02-07 22:56:00      95821.50      95826.00     95821.40       95825.30   
2025-02-07 22:57:00      95825.30      95891.20     95825.20       95891.20   
2025-02-07 22:58:00      95892.00      96058.00     95892.00       96008.60   
2025-02-07 22:59:00      96008.10      96024.00     95975.20       96014.30   
2025-02-07 23:00:00      96014.20      96014.

In [10]:
# Align lengths by truncating to the shortest length after resampling
min_length = min(len(df) for df in dataframes.values())
aligned_data = {
    tf: df.iloc[:min_length]
    for tf, df in dataframes.items()
}

# Concatenate data from all timeframes
concatenated_data = pd.concat(aligned_data.values(), axis=1)
concatenated_data.index.name = "Date"
concatenated_data.to_csv("Diversified_Portfolio_Data_Complete_DRL.csv")

print(concatenated_data)

                     btcusdt_open  btcusdt_high  btcusdt_low  btcusdt_close  \
Date                                                                          
2018-06-11 11:30:00       6761.28       6772.99      6761.28        6765.00   
2018-06-11 11:31:00       6765.00       6765.85      6758.57        6760.00   
2018-06-11 11:32:00       6760.00       6768.27      6760.00        6765.77   
2018-06-11 11:33:00       6765.77       6765.77      6762.06        6765.75   
2018-06-11 11:34:00       6765.76       6765.77      6764.69        6765.76   
...                           ...           ...          ...            ...   
2025-02-07 22:56:00      95821.50      95826.00     95821.40       95825.30   
2025-02-07 22:57:00      95825.30      95891.20     95825.20       95891.20   
2025-02-07 22:58:00      95892.00      96058.00     95892.00       96008.60   
2025-02-07 22:59:00      96008.10      96024.00     95975.20       96014.30   
2025-02-07 23:00:00      96014.20      96014.30     