In [1]:
import pandas as pd
from duka import app
from duka.core.utils import TimeFrame
import click
from datetime import datetime, timedelta
import os

def download_asset(symbol, start_date, end_date, timeframe, folder):
    """Download data for a single asset using duka"""
    timeframes = {
        'M1': TimeFrame.M1,
        'H1': TimeFrame.H1,
        'D1': TimeFrame.D1
    }
    
    app.run(
        symbols=[symbol],
        start_date=start_date,
        end_date=end_date,
        timeframe=timeframes[timeframe],
        folder=folder,
        header=True,
        threads=10
    )

def process_file(file_path, symbol):
    """Process CSV file to format columns and normalize data"""
    df = pd.read_csv(file_path)
    df.rename(columns={
        'Open': f'{symbol}_open',
        'High': f'{symbol}_high',
        'Low': f'{symbol}_low',
        'Close': f'{symbol}_close',
        'Volume': f'{symbol}_volume'
    }, inplace=True)
    
    # Convert timestamp to datetime index
    df['time'] = pd.to_datetime(df['Timestamp'], unit='ms')
    df.set_index('time', inplace=True)
    df.drop(columns=['Timestamp'], inplace=True)
    
    return df[[f'{symbol}_open', f'{symbol}_high', 
              f'{symbol}_low', f'{symbol}_close', 
              f'{symbol}_volume']]

def merge_datasets(folder, symbols, output_file):
    """Merge multiple asset files into a single dataset"""
    merged_df = pd.DataFrame()
    
    for symbol in symbols:
        symbol_lower = symbol.lower()
        file_pattern = f"{symbol_lower}_{{}}.csv"
        latest_file = max([f for f in os.listdir(folder) if f.startswith(symbol_lower)], 
                         key=lambda x: datetime.strptime(x.split('_')[-1].replace('.csv', ''), "%Y-%m-%d"))
        
        df = process_file(os.path.join(folder, latest_file), symbol_lower)
        merged_df = pd.merge(merged_df, df, how='outer', left_index=True, right_index=True)
    
    # Forward-fill missing values and save
    merged_df.ffill().to_csv(output_file)
    return merged_df

@click.command()
@click.option('--symbols', '-s', multiple=True, required=True, 
             help='List of symbols (e.g. EURUSD GBPUSD)')
@click.option('--timeframe', '-t', default='H1',
             type=click.Choice(['M1', 'H1', 'D1']),
             help='Timeframe for OHLCV data')
@click.option('--start_date', '-sd', required=True,
             help='Start date (YYYY-MM-DD)')
@click.option('--end_date', '-ed', required=True,
             help='End date (YYYY-MM-DD)')
@click.option('--output_file', '-o', default='merged_data.csv',
             help='Output filename')
def main(symbols, timeframe, start_date, end_date, output_file):
    """Main function to execute the data pipeline"""
    download_folder = './dukadata'
    os.makedirs(download_folder, exist_ok=True)

    # Download all symbols
    for symbol in symbols:
        print(f"Downloading {symbol} data...")
        download_asset(symbol, start_date, end_date, timeframe, download_folder)
    
    # Process and merge
    merged_data = merge_datasets(download_folder, symbols, output_file)
    print(f"Merged dataset saved to {output_file}")
    print(merged_data.head())

if __name__ == '__main__':
    main()

Usage: ipykernel_launcher.py [OPTIONS]
Try 'ipykernel_launcher.py --help' for help.

Error: No such option: --f


AttributeError: 'tuple' object has no attribute 'tb_frame'