In [1]:
import pandas as pd
import os
import numpy as np
import re # Import regex for advanced filename parsing
from tqdm.auto import tqdm

# ======== SETTINGS ========
# Ensure these paths are correct
data_folder = r"C:/Users/yaman/OneDrive/سطح المكتب/project1/strategys/Big Char Trada" # Corrected: Added closing double quote here
output_folder = os.path.join(data_folder, "C:/Users/yaman/OneDrive/سطح المكتب/project1/strategys/Big Char Trada/out") # Output folder created inside data_folder for better organization
os.makedirs(output_folder, exist_ok=True)

# Strategy parameters
atr_period = 14
tp_atr_factor = 1.5
sl_atr_factor = 1.0
ema_period = 50
big_candle_factor = 1.5
volume_factor = 1.5

primary_pair = "GBP/USD"
secondary_pair = "EUR/USD"

timeframe_hierarchy = {
    'M1': 'M5',
    'M5': 'M15',
    'M15': 'M30',
    'M30': 'H1',
    'H1': 'H4',
    'H4': 'D1'
}

# ======== LOAD ALL CSV FILES ========
all_files = [os.path.join(data_folder, f) for f in os.listdir(data_folder) if f.endswith('.csv')]
data_list = []

print("Loading CSV files...")
for file in tqdm(all_files, desc="Loading CSV files"):
    filename = os.path.basename(file).upper()
    
    # Determine encoding and separator based on filename
    encoding = "utf-8-sig" # Default to utf-8-sig
    sep = ','

    if "GBPUSD" in filename: # Assuming GBPUSD files might use UTF-16
        encoding = "utf-16"
        sep = ',' 
    elif "EURUSD" in filename: # Assuming EURUSD files might use UTF-8
        encoding = "utf-8-sig"
        sep = ','

    try:
        df = pd.read_csv(file, encoding=encoding, sep=sep)
    except Exception as e:
        print(f"Error reading {filename} with encoding {encoding} and sep '{sep}': {e}")
        # Try a different encoding if the first one fails
        if encoding == "utf-16":
            try:
                df = pd.read_csv(file, encoding="utf-8-sig", sep=sep)
                print(f"Successfully read {filename} with utf-8-sig after utf-16 failed.")
            except Exception as e2:
                print(f"Error reading {filename} with utf-8-sig also: {e2}. Skipping file.")
                continue
        elif encoding == "utf-8-sig":
            try:
                df = pd.read_csv(file, encoding="utf-16", sep=sep)
                print(f"Successfully read {filename} with utf-16 after utf-8-sig failed.")
            except Exception as e2:
                print(f"Error reading {filename} with utf-16 also: {e2}. Skipping file.")
                continue
        else:
            continue # Skip file if it can't be read with either encoding

    # Clean column names
    df.columns = df.columns.str.strip()

    # Ensure datetime column exists and is parsed correctly
    if 'DateTime' not in df.columns:
        # Look for a column likely to be DateTime
        potential_dt_cols = [col for col in df.columns if 'date' in col.lower() or 'time' in col.lower()]
        if potential_dt_cols:
            df.rename(columns={potential_dt_cols[0]: 'DateTime'}, inplace=True)
        else:
            # If no obvious column name, assume the first column is DateTime
            df.rename(columns={df.columns[0]: 'DateTime'}, inplace=True)
            
    df['DateTime'] = pd.to_datetime(df['DateTime'], errors='coerce')
    df.dropna(subset=['DateTime'], inplace=True) # Drop rows where DateTime parsing failed

    # Assign currency pair
    if "EURUSD" in filename:
        df['Pair'] = "EUR/USD"
    elif "GBPUSD" in filename:
        df['Pair'] = "GBP/USD"
    else:
        # Try to infer pair from filename (e.g., "XYZ_M5.csv" -> "XYZ")
        pair_match = re.search(r'([A-Z]{3}[A-Z]{3})', filename)
        if pair_match:
            df['Pair'] = pair_match.group(1)[:3] + "/" + pair_match.group(1)[3:]
        else:
            df['Pair'] = "Other" # If no known pair found

    # Assign timeframe
    for tf in ["M1","M5","M15","M30","H1","H4","D1"]:
        if tf in filename:
            df['Timeframe'] = tf
            break
    else:
        df['Timeframe'] = "Other"
    
    data_list.append(df)

# Merge all data
data = pd.concat(data_list, ignore_index=True)
data.sort_values(['Pair','Timeframe','DateTime'], inplace=True)
data.reset_index(drop=True, inplace=True)
print("CSV files loaded and merged.")

# ======== FUNCTIONS ========
def calculate_atr(df, period):
    """Calculate ATR using exponential moving average"""
    df['High_Low'] = df['High'] - df['Low']
    df['High_PrevClose'] = abs(df['High'] - df['Close'].shift(1))
    df['Low_PrevClose'] = abs(df['Low'] - df['Close'].shift(1))
    df['True_Range'] = df[['High_Low', 'High_PrevClose', 'Low_PrevClose']].max(axis=1)
    df['ATR'] = df['True_Range'].ewm(span=period, adjust=False).mean()
    df.drop(columns=['High_Low', 'High_PrevClose', 'Low_PrevClose', 'True_Range'], inplace=True)
    return df

def check_hit(open_price, high, low, close_price, direction, tp_price, sl_price):
    """
    Check if trade hit TP or SL within the current candle's range.
    Also estimates MaxAdverseExcursion (MAE) and MaxFavorableExcursion (MFE) for the current candle.
    Returns (result, exit_price, mae, mfe).
    Note: This is a simplified MAE/MFE for a single candle. For multi-candle trades,
    MAE/MFE would need to be tracked cumulatively.
    """
    mae = 0.0 # Max Adverse Excursion (absolute value)
    mfe = 0.0 # Max Favorable Excursion (absolute value)

    if direction == 1:  # Buy Trade
        # Immediate hits at open
        if open_price >= tp_price: 
            return ("Win", tp_price, 0.0, tp_price - open_price)
        if open_price <= sl_price: 
            return ("Loss", sl_price, open_price - sl_price, 0.0)

        # Movement within the candle
        if high >= tp_price and low <= sl_price: # Both TP and SL hit within candle
            # Assume SL is hit first if both are reached, unless TP is closer to open
            if (tp_price - open_price) < (open_price - sl_price): # TP is closer
                result = "Win"
                exit_p = tp_price
            else: # SL is closer or equal distance
                result = "Loss"
                exit_p = sl_price
            mae = open_price - low # Max adverse during candle
            mfe = high - open_price # Max favorable during candle
            return (result, exit_p, mae, mfe)
        elif high >= tp_price: # Only TP hit
            mae = open_price - low
            mfe = high - open_price
            return ("Win", tp_price, mae, mfe)
        elif low <= sl_price: # Only SL hit
            mae = open_price - low
            mfe = high - open_price
            return ("Loss", sl_price, mae, mfe)
        else: # Neither hit, assume close price is exit
            mae = open_price - low
            mfe = high - open_price
            return (None, None, mae, mfe)
    
    else:  # Sell Trade (direction == -1)
        # Immediate hits at open
        if open_price <= tp_price: 
            return ("Win", tp_price, 0.0, open_price - tp_price)
        if open_price >= sl_price: 
            return ("Loss", sl_price, sl_price - open_price, 0.0)

        # Movement within the candle
        if low <= tp_price and high >= sl_price: # Both TP and SL hit within candle
            # Assume SL is hit first if both are reached, unless TP is closer to open
            if (open_price - tp_price) < (sl_price - open_price): # TP is closer
                result = "Win"
                exit_p = tp_price
            else: # SL is closer or equal distance
                result = "Loss"
                exit_p = sl_price
            mae = high - open_price
            mfe = open_price - low
            return (result, exit_p, mae, mfe)
        elif low <= tp_price: # Only TP hit
            mae = high - open_price
            mfe = open_price - low
            return ("Win", tp_price, mae, mfe)
        elif high >= sl_price: # Only SL hit
            mae = high - open_price
            mfe = open_price - low
            return ("Loss", sl_price, mae, mfe)
        else: # Neither hit, assume close price is exit
            mae = high - open_price
            mfe = open_price - low
            return (None, None, mae, mfe)


# ======== PREPARE DATA ========
processed_data_frames = []
print("Calculating technical indicators (EMA, ATR, Rolling Averages)...")
# Group data by Pair and Timeframe before processing for efficiency and correctness
grouped_data = data.groupby(['Pair', 'Timeframe'])

for (pair, tf), df in tqdm(grouped_data, desc="Processing Pairs and Timeframes"):
    if df.empty:
        continue

    # Ensure data is sorted by DateTime before indicator calculation
    df = df.sort_values('DateTime').reset_index(drop=True)

    df['EMA'] = df['Close'].ewm(span=ema_period, adjust=False).mean()
    df = calculate_atr(df, atr_period)
    df['CandleSize'] = df['Close'] - df['Open']
    df['Rolling_Avg_CandleSize'] = df['CandleSize'].abs().rolling(window=ema_period, min_periods=1).mean()
    if 'Volume' in df.columns:
        df['Rolling_Avg_Volume'] = df['Volume'].rolling(window=ema_period, min_periods=1).mean()
    processed_data_frames.append(df)

data_processed = pd.concat(processed_data_frames, ignore_index=True)
data_processed.sort_values(['Pair','Timeframe','DateTime'], inplace=True)
data_processed.reset_index(drop=True, inplace=True)
print("Technical indicators calculated.")

# Prepare primary pair data for alignment check
primary_data_dict = {}
df_primary_all_tfs = data_processed[data_processed['Pair']==primary_pair].copy()
for tf in df_primary_all_tfs['Timeframe'].unique():
    primary_data_dict[tf] = df_primary_all_tfs[df_primary_all_tfs['Timeframe']==tf].set_index('DateTime')


# ======== BACKTEST ========
trades = []
# Rebuild df_by_tf after indicator updates
df_by_tf = { (p, tf): data_processed[(data_processed['Pair']==p) & (data_processed['Timeframe']==tf)].copy()
             for p in data_processed['Pair'].unique() for tf in data_processed['Timeframe'].unique() }

print("Starting backtest...")
trade_id_counter = 0

# Calculate total iterations for a more accurate progress bar
total_iterations = sum(len(df_by_tf[(p, tf)]) for p, tf_dict in data_processed.groupby('Pair') for tf in tf_dict['Timeframe'].unique() if (p, tf) in df_by_tf)

with tqdm(total=total_iterations, desc="Backtesting Progress") as pbar:
    for pair in data_processed['Pair'].unique(): # Iterate over all unique pairs in the data
        df_pair_all_tfs = data_processed[data_processed['Pair']==pair].copy()
        
        for tf in df_pair_all_tfs['Timeframe'].unique():
            df = df_by_tf.get((pair, tf))
            if df is None or df.empty:
                continue
            
            # Ensure ATR and EMA are not NaN at the start of loop for calculations
            df_filtered = df.dropna(subset=['ATR', 'EMA']).reset_index(drop=True)

            for i in range(1, len(df_filtered)):
                pbar.update(1) # Update progress bar for each row iteration

                row = df_filtered.iloc[i]
                prev = df_filtered.iloc[i-1]

                # Initialize conditions flags
                higher_tf_condition_met = False
                volume_condition_met = False
                primary_pair_direction_str = None
                primary_pair_alignment = False
                current_higher_tf_close = np.nan
                current_higher_tf_ema = np.nan

                # Big candle check
                if abs(prev['CandleSize']) < prev['Rolling_Avg_CandleSize'] * big_candle_factor:
                    continue

                direction = 1 if prev['CandleSize'] > 0 else -1 # 1 for Buy, -1 for Sell

                # Higher timeframe check
                higher_tf_to_check = timeframe_hierarchy.get(tf, None)
                if higher_tf_to_check:
                    df_higher = df_by_tf.get((pair, higher_tf_to_check))
                    if df_higher is not None and not df_higher.empty:
                        # Find the last candle on the higher timeframe that occurred at or before the current candle's DateTime
                        df_higher_subset = df_higher[df_higher['DateTime'] <= row['DateTime']]
                        if not df_higher_subset.empty:
                            row_higher = df_higher_subset.iloc[-1]
                            current_higher_tf_close = row_higher['Close']
                            current_higher_tf_ema = row_higher['EMA']
                            if (direction == 1 and current_higher_tf_close < current_higher_tf_ema) or \
                               (direction == -1 and current_higher_tf_close > current_higher_tf_ema):
                                continue # Condition not met, skip trade
                            higher_tf_condition_met = True
                else: # If no higher timeframe to check, consider the condition met
                    higher_tf_condition_met = True
                
                # Volume check
                if 'Volume' in row.index and 'Rolling_Avg_Volume' in row.index and not pd.isna(row['Rolling_Avg_Volume']):
                    if row['Volume'] < row['Rolling_Avg_Volume'] * volume_factor:
                        continue # Condition not met, skip trade
                    volume_condition_met = True
                else: # If no volume data, consider the condition met
                    volume_condition_met = True

                # Secondary pair alignment with primary
                if pair == secondary_pair:
                    if tf in primary_data_dict:
                        # Find the corresponding primary pair candle at or before current DateTime
                        primary_row_df = primary_data_dict[tf].loc[primary_data_dict[tf].index <= row['DateTime']]
                        if not primary_row_df.empty:
                            primary_row = primary_row_df.iloc[-1]
                            primary_pair_candle_size = primary_row['Close'] - primary_row['Open']
                            primary_direction = 1 if primary_pair_candle_size > 0 else -1
                            primary_pair_direction_str = "Buy" if primary_direction == 1 else "Sell"

                            if direction != primary_direction:
                                continue # Alignment not met, skip trade
                            primary_pair_alignment = True
                    else: # If no primary pair data for this timeframe, skip for secondary pair alignment
                        continue
                
                # Entry, TP, SL calculation
                entry_price = row['Open']
                atr_at_entry = row['ATR']
                
                # Store actual ATR factors used (or "Default" if fallback applied)
                current_tp_atr_factor = tp_atr_factor
                current_sl_atr_factor = sl_atr_factor
                
                if not pd.isna(atr_at_entry):
                    tp_price = entry_price + direction * (atr_at_entry * current_tp_atr_factor)
                    sl_price = entry_price - direction * (atr_at_entry * current_sl_atr_factor)
                else: # Fallback if ATR is somehow NaN (should be handled by df_filtered, but good safeguard)
                    tp_price = entry_price + direction * 0.0004 # Example default values
                    sl_price = entry_price - direction * 0.0002 # Example default values
                    current_tp_atr_factor = "Default"
                    current_sl_atr_factor = "Default"

                result, exit_price, mae, mfe = check_hit(entry_price, row['High'], row['Low'], row['Close'], direction, tp_price, sl_price)
                
                # If TP/SL not hit within the current candle, use Close price as exit
                duration_in_candles = 1
                if result is None:
                    # Determine result based on open vs. close if no TP/SL hit
                    result = "Win" if direction * (row['Close'] - entry_price) > 0 else "Loss"
                    exit_price = row['Close']
                    # Recalculate MAE/MFE for the candle based on open/close if TP/SL wasn't decisive
                    if direction == 1: # Buy
                        mae = max(0, entry_price - row['Low'])
                        mfe = max(0, row['High'] - entry_price)
                    else: # Sell
                        mae = max(0, row['High'] - entry_price)
                        mfe = max(0, entry_price - row['Low'])

                actual_profit_loss = (exit_price - entry_price) * direction
                entry_to_close_ratio = (exit_price - entry_price) / entry_price * 100 if entry_price != 0 else 0

                trade_id_counter += 1
                trade_record = {
                    'TradeID': trade_id_counter,
                    'Pair': pair,
                    'Timeframe': tf,
                    'Direction': "Buy" if direction == 1 else "Sell",
                    'EntryDateTime': row['DateTime'],
                    'EntryPrice': entry_price,
                    'CandleSize_at_Entry': prev['CandleSize'],
                    'RollingAvgCandleSize_at_Entry': prev['Rolling_Avg_CandleSize'],
                    'Volume_at_Entry': row['Volume'] if 'Volume' in row.index else np.nan,
                    'RollingAvgVolume_at_Entry': row['Rolling_Avg_Volume'] if 'Rolling_Avg_Volume' in row.index else np.nan,
                    'EMA_at_Entry': row['EMA'],
                    'ATR_at_Entry': atr_at_entry,
                    'ATR_TP_FactorUsed': current_tp_atr_factor,
                    'ATR_SL_FactorUsed': current_sl_atr_factor,
                    'TP_Price': tp_price,
                    'SL_Price': sl_price,
                    'ExitDateTime': row['DateTime'], # For now, exit is assumed within the same candle
                    'ExitPrice': exit_price,
                    'Result': result,
                    'ActualProfitLoss': actual_profit_loss,
                    'EntryToCloseRatio': entry_to_close_ratio,
                    'DurationInCandles': duration_in_candles,
                    'MaxAdverseExcursion': mae, # Absolute value
                    'MaxFavorableExcursion': mfe, # Absolute value
                    'HigherTimeframeToMonitor': higher_tf_to_check,
                    'HigherTimeframeClose': current_higher_tf_close,
                    'HigherTimeframeEMA': current_higher_tf_ema,
                    'HigherTimeframeConditionMet': higher_tf_condition_met,
                    'VolumeConditionMet': volume_condition_met,
                    'PrimaryPairDirection': primary_pair_direction_str,
                    'PrimaryPairAlignment': primary_pair_alignment,
                    'PeakProfitDurationInCandles': np.nan, # To be calculated later
                    'PeakProfitPrice': np.nan # To be calculated later
                }
                trades.append(trade_record)

# ======== CALCULATE PEAK PROFIT DURATION FOR WINNING TRADES ========
# Convert list of trade dictionaries to DataFrame for easier processing
trades_df = pd.DataFrame(trades)

# Create a temporary indexed DataFrame for data_processed for efficient lookup of subsequent candles
data_processed_indexed = data_processed.set_index(['Pair', 'Timeframe', 'DateTime'])

print("\nCalculating Peak Profit Duration for winning trades...")
# Only iterate through winning trades
winning_trades_indices = trades_df[trades_df['Result'] == 'Win'].index

for idx in tqdm(winning_trades_indices, desc="Calculating Peak Profit"):
    trade = trades_df.loc[idx]
    pair = trade['Pair']
    tf = trade['Timeframe']
    exit_dt = trade['ExitDateTime']
    direction = 1 if trade['Direction'] == 'Buy' else -1
    exit_price = trade['ExitPrice']

    # Find subsequent candles after the trade's exit DateTime
    sub_df_after_exit = df_by_tf.get((pair, tf))
    if sub_df_after_exit is not None:
        sub_df_after_exit = sub_df_after_exit[sub_df_after_exit['DateTime'] > exit_dt].sort_values('DateTime')

        peak_price = exit_price
        peak_duration = 0
        
        for j, subsequent_candle in sub_df_after_exit.iterrows():
            candle_high = subsequent_candle['High']
            candle_low = subsequent_candle['Low']

            if direction == 1: # Buy trade: looking for higher highs
                if candle_high > peak_price:
                    peak_price = candle_high
                    peak_duration += 1
                else: # Price stopped moving favorably
                    break
            else: # Sell trade: looking for lower lows
                if candle_low < peak_price:
                    peak_price = candle_low
                    peak_duration += 1
                else: # Price stopped moving favorably
                    break
        
        trades_df.loc[idx, 'PeakProfitDurationInCandles'] = peak_duration
        trades_df.loc[idx, 'PeakProfitPrice'] = peak_price

print("Peak Profit Duration calculation complete.")


# ======== SAVE RESULTS ========
total_trades = len(trades_df)
wins = trades_df[trades_df['Result']=="Win"].copy()
losses = trades_df[trades_df['Result']=="Loss"].copy()

print(f"\nTotal trades: {total_trades}")
print(f"Winning trades: {len(wins)}")
print(f"Losing trades: {len(losses)}")

# Save to CSV
trades_df.to_csv(os.path.join(output_folder,"all_trades.csv"), index=False)
wins.to_csv(os.path.join(output_folder,"winning_trades.csv"), index=False)
losses.to_csv(os.path.join(output_folder,"losing_trades.csv"), index=False)
print(f"Results saved to {output_folder}")

print("\nBacktest complete with enhanced output files!")
print(f"All Trades (first 5 rows):\n{trades_df.head()}")
print(f"\nWinning Trades (first 5 rows):\n{wins.head()}")

Loading CSV files...


Loading CSV files:   0%|          | 0/14 [00:00<?, ?it/s]

CSV files loaded and merged.
Calculating technical indicators (EMA, ATR, Rolling Averages)...


Processing Pairs and Timeframes:   0%|          | 0/12 [00:00<?, ?it/s]

Technical indicators calculated.
Starting backtest...


Backtesting Progress:   0%|          | 0/2773713 [00:00<?, ?it/s]


Calculating Peak Profit Duration for winning trades...


Calculating Peak Profit:   0%|          | 0/32294 [00:00<?, ?it/s]

Peak Profit Duration calculation complete.

Total trades: 48174
Winning trades: 32294
Losing trades: 15880
Results saved to C:/Users/yaman/OneDrive/سطح المكتب/project1/strategys/Big Char Trada/out

Backtest complete with enhanced output files!
All Trades (first 5 rows):
   TradeID     Pair Timeframe Direction EntryDateTime  EntryPrice  \
0        1  EUR/USD        D1      Sell    2023-12-18     1.08968   
1        2  EUR/USD        D1       Buy    2023-12-22     1.10101   
2        3  EUR/USD        D1       Buy    2025-03-04     1.04854   
3        4  EUR/USD        D1       Buy    2025-03-05     1.06192   
4        5  EUR/USD        D1      Sell    2025-04-07     1.08890   

   CandleSize_at_Entry  RollingAvgCandleSize_at_Entry  Volume_at_Entry  \
0             -0.00979                       0.004032           176756   
1              0.00716                       0.004067           198705   
2              0.00826                       0.003804            86576   
3              0.0