In [1]:
import pandas as pd
import numpy as np
from binance.client import Client
from datetime import datetime, timedelta, timezone
import os
import pandas_ta as ta
from tqdm import tqdm
import time 
import pytz
import os
# os.system('cls' if os.name == 'nt' else 'clear')
from IPython.display import clear_output
# clear_output(wait=True)

import importlib
import BaseFunctions
importlib.reload(BaseFunctions)
from BaseFunctions import *

import StrategyList
importlib.reload(StrategyList)
from StrategyList import *

import VariableCreation
importlib.reload(VariableCreation)
from VariableCreation import *

import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Load API credentials from environment variables
API_KEY = os.getenv("BinanceAPI_250502")
API_SECRET = os.getenv("BinanceSecret_250502")

import warnings
warnings.filterwarnings("ignore")

client = Client(API_KEY, API_SECRET) if API_KEY and API_SECRET else Client()

BackTime = "2025-05-05 00:00:00"
# Convert string to naive datetime object
BackTime = datetime.strptime(BackTime, '%Y-%m-%d %H:%M:%S')
# Make it timezone-aware (UTC)
BackTime = BackTime.replace(tzinfo=timezone.utc)
# print(BackTime)
# BackTime = 1735689600000

BackTime = int(BackTime.timestamp() * 1000)  # Convert to milliseconds
Interval = '5m'
current_time = time.time()
pair = 'BTCUSDT'

BuySellFlag = 'Hold'
BuyCounter = 0
SellCounter = 0

from datetime import datetime; import pytz; epoch_to_utc = lambda epoch: datetime.fromtimestamp(epoch / 1000.0 if epoch > 1e10 else epoch, tz=pytz.UTC).strftime('%Y-%m-%d %H:%M:%S')

In [4]:
current_time = int(time.time() * 1000)  # Current time in ms
interval_ms = 5 * 60 * 1000  # 5 minutes in ms

# Chunking logic for END-TIME based requests
time_chunks = []
chunk_size = 1000 * interval_ms  # 1000 candles of 5m each
current_end = BackTime + chunk_size  # First chunk ends after 1000 candles

while current_end <= current_time:
    time_chunks.append(current_end)
    current_end += chunk_size  # Move window forward by another 1000 candles

# Add the final chunk if needed
if (current_end - chunk_size) < current_time:
    time_chunks.append(current_time)
    
from concurrent.futures import ThreadPoolExecutor, as_completed
df2 = pd.DataFrame()
with ThreadPoolExecutor(max_workers=5) as executor:
    # Submit tasks to the executor
    futures = [executor.submit(get_candles_data, pair, Interval, 1000, end, client) for end in time_chunks]
    
    # Process results as they complete
    for future in as_completed(futures):
        df1 = future.result()
        if not df1.empty:
            df2 = pd.concat([df2, df1], ignore_index=True)

In [5]:
time_counts = df2['Close Time'].value_counts()
print(time_counts[time_counts > 1])

print(df2['Open Time'].min())

print(df2['Open Time'].max())

print(df2.shape)

Series([], Name: count, dtype: int64)
2025-05-04 06:10:00
2025-05-07 17:25:00
(1000, 13)


In [None]:
# Convert timestamps once
df2['Close Time'] = pd.to_datetime(df2['Close Time'])
df2['epochTime'] = (df2['Close Time'].astype('int64') // 10**6)

# Sort by time and get unique epochs
df2 = df2.sort_values('epochTime')
unique_times = df2['epochTime'].unique()

# Pre-allocate results list instead of growing DataFrame
results = []
window_data = pd.DataFrame()

for current_time in tqdm(unique_times, desc="Processing"):
    try:
        # Get only NEW data since last iteration (much more efficient)
        new_data = df2[df2['epochTime'] == current_time]
        window_data = pd.concat([window_data, new_data])
        
        # Process only when we have sufficient history
        if len(window_data) > 100:  # Minimum window size
            processed = create_variablesV2(window_data.copy(), pair, client, current_time)
            strategized = allstrategiesv2(processed)
            
            # Append only the latest result to save memory
            if not strategized.empty:
                results.append(strategized.iloc[[-1]])  # Only keep last row
                
        # Optional: Clear memory periodically
        if len(results) % 1000 == 0:
            pd.concat(results).to_parquet(f"partial_{current_time}.parquet")
            results = []
            
    except Exception as e:
        print(f"Error at {current_time}: {str(e)}")
        continue

# Final concatenation
final_df = pd.concat(results, ignore_index=True)

In [None]:
final_df.to_csv('BTCUSDT2024Onwards_AllVarsAndStats.csv', index=False)

In [None]:
def process_data(df2, pair, client, min_window_size=100, save_interval=1000):
    """
    Process time-series data in chunks with strategy application
    
    Parameters:
    - df2: Input DataFrame with market data
    - pair: Trading pair symbol
    - client: API client object
    - min_window_size: Minimum data points required before processing
    - save_interval: How often to save partial results (in iterations)
    
    Returns:
    - Final processed DataFrame
    """
    # --- Chunk 1: Convert and Prepare Timestamps ---
    print("Preparing timestamps...")
    df2['Close Time'] = pd.to_datetime(df2['Close Time'])
    df2['epochTime'] = (df2['Close Time'].astype('int64') // 10**6)
    
    # --- Chunk 2: Sort and Get Unique Times ---
    print("Sorting data...")
    df2 = df2.sort_values('epochTime')
    unique_times = df2['epochTime'].unique()
    
    # --- Chunk 3: Initialize Processing Variables ---
    results = []
    window_data = pd.DataFrame()
    partial_file_counter = 0
    
    # --- Chunk 4-7: Main Processing Loop ---
    print("Processing data...")
    for current_time in tqdm(unique_times, desc="Processing"):
        try:
            # Get new data for current timestamp
            new_data = df2[df2['epochTime'] == current_time]
            window_data = pd.concat([window_data, new_data])
            
            # Only process when we have sufficient history
            if len(window_data) >= min_window_size:
                # Process data and apply strategies
                processed = create_variablesV2(window_data.copy(), pair, client, current_time)
                strategized = allstrategiesv2(processed)
                
                # Store only the latest result to save memory
                if not strategized.empty:
                    results.append(strategized.iloc[[-1]])
            
            # Periodically save results to disk to manage memory
            if len(results) % save_interval == 0 and len(results) > 0:
                partial_file_counter += 1
                pd.concat(results).to_parquet(f"partial_results_{partial_file_counter}.parquet")
                results = []
                
        except Exception as e:
            print(f"Error processing time {current_time}: {str(e)}")
            continue
    
    # --- Chunk 8: Final Result Compilation ---
    print("Compiling final results...")
    # Combine all partial files if they exist
    if partial_file_counter > 0:
        partial_files = [f"partial_results_{i}.parquet" for i in range(1, partial_file_counter+1)]
        partial_dfs = [pd.read_parquet(f) for f in partial_files]
        final_df = pd.concat(partial_dfs + [pd.concat(results)] if results else partial_dfs)
    else:
        final_df = pd.concat(results) if results else pd.DataFrame()
    
    return final_df



In [None]:
final = process_data(df2, pair, client)