In [2]:
import os
import requests
import pandas as pd
from datetime import datetime
import time

# --- CONFIGURATION ---
# Use 'r' before the path for Windows compatibility
save_path = r"D:\project tradeingbot\ReinforcementTrading_Part_1-main\lehetséges datak"
symbol = "BTC"
comparison_symbol = "USD"

# Create the directory if it doesn't exist
if not os.path.exists(save_path):
    os.makedirs(save_path)
    print(f"Created folder: {save_path}")

def fetch_btc_data(symbol="BTC", comparison_symbol="USD", start_date="2020-01-01"):
    url = "https://min-api.cryptocompare.com/data/v2/histoday"
    target_ts = int(datetime.strptime(start_date, "%Y-%m-%d").timestamp())
    
    params = {
        "fsym": symbol,
        "tsym": comparison_symbol,
        "limit": 2000,
        "toTs": int(datetime.now().timestamp())
    }

    all_data = []
    print(f"Fetching {symbol} data from API...")
    
    while True:
        response = requests.get(url, params=params)
        data = response.json()

        if data["Response"] != "Success":
            break

        batch = data["Data"]["Data"]
        if not batch:
            break
            
        all_data.extend(batch)
        earliest_ts = batch[0]["time"]
        
        if earliest_ts <= target_ts:
            break
            
        params["toTs"] = earliest_ts - 1
        time.sleep(0.1)

    df = pd.DataFrame(all_data)
    
    # Formatting to match your EURUSD files
    df['Gmt time'] = pd.to_datetime(df['time'], unit='s').dt.strftime('%d.%m.%Y %H:%M:%S.000')
    df = df.rename(columns={'open': 'Open', 'high': 'High', 'low': 'Low', 'close': 'Close', 'volumefrom': 'Volume'})
    df = df[['Gmt time', 'Open', 'High', 'Low', 'Close', 'Volume']]
    
    # Sort and Filter
    df['dt_object'] = pd.to_datetime(df['Gmt time'], format='%d.%m.%Y %H:%M:%S.000')
    df = df.sort_values('dt_object').drop(columns=['dt_object'])
    df = df[pd.to_datetime(df['Gmt time'], format='%d.%m.%Y %H:%M:%S.000') >= start_date]
    
    return df

# Fetch full data
full_df = fetch_btc_data(symbol, comparison_symbol)

# Split into Learning (2020-2023) and Testing (2024-Present)
learning_mask = pd.to_datetime(full_df['Gmt time'], format='%d.%m.%Y %H:%M:%S.000') < '2024-01-01'
learning_df = full_df[learning_mask]
testing_df = full_df[~learning_mask]

# Define full file paths
learning_file = os.path.join(save_path, "BTC_Daily_Learning_2020_2023.csv")
testing_file = os.path.join(save_path, "BTC_Daily_Testing_2024_Present.csv")

# Save the files
learning_df.to_csv(learning_file, index=False)
testing_df.to_csv(testing_file, index=False)

print(f"\nSuccess! Files saved in: {save_path}")
print(f"1. {os.path.basename(learning_file)}")
print(f"2. {os.path.basename(testing_file)}")

Fetching BTC data from API...

Success! Files saved in: D:\project tradeingbot\ReinforcementTrading_Part_1-main\lehetséges datak
1. BTC_Daily_Learning_2020_2023.csv
2. BTC_Daily_Testing_2024_Present.csv


In [3]:
df['dt'] = pd.to_datetime(df['Gmt time'], format='%d.%m.%Y %H:%M:%S.000')
expected_range = pd.date_range(start=df['dt'].min(), end=df['dt'].max(), freq='D')
missing_days = expected_range.difference(df['dt'])

if len(missing_days) == 0:
    print("Data is continuous. No missing days!")
else:
    print(f"Warning: Missing {len(missing_days)} days!")

NameError: name 'df' is not defined