In [None]:
# BTC Binance 1-Hour Data
import pandas as pd
import requests
from datetime import datetime, timedelta
import time

def get_binance_futures_klines(symbol, interval, start_time, end_time):

    endpoint = "https://fapi.binance.com/fapi/v1/klines"
    
    params = {
        "symbol": symbol,
        "interval": interval,
        "startTime": int(start_time.timestamp() * 1000),
        "endTime": int(end_time.timestamp() * 1000),
        "limit": 1000 #max
    }
    
    response = requests.get(endpoint, params=params)
    data = response.json()
    
    return data

def process_klines_data(klines):
  
    df = pd.DataFrame(klines, columns=[
        'timestamp', 'open', 'high', 'low', 'close', 'volume',
        'close_time', 'quote_volume', 'trades_count',
        'taker_buy_volume', 'taker_buy_quote_volume', 'ignore'
    ])
    
    df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
    
    numeric_columns = ['open', 'high', 'low', 'close', 'volume']
    df[numeric_columns] = df[numeric_columns].astype(float)
    
    df['p_n_l'] = ((df['close'] - df['open']) / df['open']) * 100
    
    df = df[['timestamp', 'open', 'close', 'high', 'low', 'p_n_l']]
    df = df.rename(columns={'timestamp': 'date'})
    
    return df

def download_full_history(symbol="BTCUSDT", interval="1h"):

    start_date = datetime(2023, 1, 1)
    end_date = datetime.now()
    
    all_data = []
    current_start = start_date
    
    while current_start < end_date:
        current_end = min(current_start + timedelta(days=7), end_date)
        
        print(f"Downloading data from {current_start} to {current_end}")
        
        klines = get_binance_futures_klines(symbol, interval, current_start, current_end)
        all_data.extend(klines)
        
        current_start = current_end
        time.sleep(1) 
    
    df = process_klines_data(all_data)
    return df

def clean_data(df):

    df = df.drop_duplicates()
    
    df = df.sort_values('date')
    
    df = df.reset_index(drop=True)    
    return df

df = download_full_history()
df = clean_data(df)

df.to_csv('./binance_data/usdt_btc_1h.csv', index=False)


In [None]:
# ETH OKX 1-Hour Data
import pandas as pd
import requests
from datetime import datetime, timedelta
import time

def get_okx_klines(symbol, interval, start_time, end_time):
    endpoint = "https://www.okx.com/api/v5/market/history-candles"
    
    params = {
        "instId": symbol,
        "bar": interval,
        "before": str(int(start_time.timestamp() * 1000)),
        "after": str(int(end_time.timestamp() * 1000)),
        "limit": 100
    }
    
    response = requests.get(endpoint, params=params)
    data = response.json()
    
    return data.get('data', [])

def process_okx_data(klines):
    df = pd.DataFrame(klines, columns=[
        'timestamp', 'open', 'high', 'low', 'close', 
        'vol', 'volCcy', 'volCcyQuote', 'confirm'
    ])
    
    df['timestamp'] = pd.to_datetime(df['timestamp'].astype(float), unit='ms')
    
    numeric_columns = ['open', 'high', 'low', 'close', 'vol']
    df[numeric_columns] = df[numeric_columns].astype(float)
    
    df['p_n_l'] = ((df['close'] - df['open']) / df['open']) * 100
    
    df = df[['timestamp', 'open', 'close', 'high', 'low', 'p_n_l']]
    df = df.rename(columns={'timestamp': 'date'})
    
    return df

def download_full_history(symbol="ETH-USDT", interval="1H"):
    start_date = datetime(2023, 1, 1)
    end_date = datetime.now()
    
    all_data = []
    current_start = start_date
    
    while current_start < end_date:
        current_end = min(current_start + timedelta(days=7), end_date)
        
        print(f"Downloading data from {current_start} to {current_end}")
        
        klines = get_okx_klines(symbol, interval, current_start, current_end)
        all_data.extend(klines)
        
        current_start = current_end
        time.sleep(1)
    
    df = process_okx_data(all_data)
    return df

def clean_data(df):
    df = df.drop_duplicates()
    df = df.sort_values('date')
    df = df.reset_index(drop=True)    
    return df

# Download and process the data
df = download_full_history()
df = clean_data(df)

# Save to CSV
df.to_csv('./okx_data/eth_usdt_1h.csv', index=False)

In [None]:
import pandas as pd

def sync_csv_files(file1_path, file2_path, output_path1=None, output_path2=None):
    try:
        df1 = pd.read_csv(file1_path)
        df2 = pd.read_csv(file2_path)
        
        df1['date'] = pd.to_datetime(df1['date'], format='mixed')
        df2['date'] = pd.to_datetime(df2['date'], format='mixed')
        
        df1['date'] = df1['date'].dt.floor('h')
        df2['date'] = df2['date'].dt.floor('h')
        
        common_timestamps = set(df1['date']).intersection(set(df2['date']))
        
        df1_sync = df1[df1['date'].isin(common_timestamps)].sort_values('date', ascending=True)
        df2_sync = df2[df2['date'].isin(common_timestamps)].sort_values('date', ascending=True)
        
        print(f"Original rows in file 1: {len(df1)}")
        print(f"Original rows in file 2: {len(df2)}")
        print(f"Rows after synchronization: {len(df1_sync)}")
        
        if not df1_sync.empty:
            print(f"\nDate range: {df1_sync['date'].iloc[-1]} to {df1_sync['date'].iloc[0]}")
        
        if output_path1:
            df1_sync.to_csv(output_path1, index=False)
            print(f"Saved synchronized file 1 to: {output_path1}")
        
        if output_path2:
            df2_sync.to_csv(output_path2, index=False)
            print(f"Saved synchronized file 2 to: {output_path2}")
        
        return df1_sync, df2_sync
        
    except Exception as e:
        print(f"Error: {e}")
        return None, None

file1 = "./binance_data/usdt_btc_1h.csv"
file2 = "./okx_data/eth_usdt_1h.csv"

df1_sync, df2_sync = sync_csv_files(
    file1,
    file2,
    output_path1=file1,
    output_path2=file2
)
print("\nDone")