### 1. Imports & Initialization

In [2]:
import MetaTrader5 as mt5
import pandas as pd
from datetime import datetime
import pytz

# 1. Initialize Connection
if not mt5.initialize():
    print("‚ùå initialize() failed, error code =", mt5.last_error())
    quit()
else:
    print("‚úÖ Connected to MetaTrader 5")
    
# Check account info to ensure we are live
account_info = mt5.account_info()
if account_info:
    print(f"   Login: {account_info.login}")
    print(f"   Server: {account_info.server}")
    print(f"   Currency: {account_info.currency}")

‚úÖ Connected to MetaTrader 5
   Login: 105395845
   Server: FBS-Demo
   Currency: USD


### 2: Configuration & Extraction (H1)

In [3]:
# 2. Configuration
SYMBOL = "EURUSD"
TIMEFRAME = mt5.TIMEFRAME_H1  # <--- CRITICAL CHANGE: Hourly Data
TIMEZONE = pytz.timezone("Etc/UTC")

# 3. Date Range (2020 to Present)
# We go back to Jan 1, 2020 to capture different market conditions (Volatile & Quiet)
utc_from = datetime(2020, 1, 1, tzinfo=TIMEZONE)
utc_to = datetime.now(TIMEZONE)

print(f"‚è≥ Extracting {SYMBOL} (H1) from {utc_from.date()} to {utc_to.date()}...")

# 4. Request Data
rates = mt5.copy_rates_range(SYMBOL, TIMEFRAME, utc_from, utc_to)

if rates is None or len(rates) == 0:
    print("‚ùå No data received! Check your symbol name and internet connection.")
else:
    print(f"‚úÖ Received {len(rates)} H1 candles.")

‚è≥ Extracting EURUSD (H1) from 2020-01-01 to 2025-12-23...
‚úÖ Received 37204 H1 candles.


### 3: Formatting & Validation

In [4]:
# 5. Convert to DataFrame
df = pd.DataFrame(rates)

# 6. Convert Time
# MT5 sends time in seconds (Unix timestamp). We convert to human-readable datetime.
df['time'] = pd.to_datetime(df['time'], unit='s')

# Set 'time' as the index (Standard for Time Series)
df.set_index('time', inplace=True)

# 7. Basic Cleanup
# We only need OHLC and Volume.
# 'spread' and 'real_volume' are often 0 or unreliable in historical data, but we keep them just in case.
expected_cols = ['open', 'high', 'low', 'close', 'tick_volume', 'spread', 'real_volume']
df = df[expected_cols]

# 8. Validation
print("\n--- Data Head (Start) ---")
display(df.head(3))

print("\n--- Data Tail (End) ---")
display(df.tail(3))

# Check for gaps
print(f"\nTotal Rows: {len(df)}")
print(f"Date Range: {df.index.min()} to {df.index.max()}")


--- Data Head (Start) ---


Unnamed: 0_level_0,open,high,low,close,tick_volume,spread,real_volume
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2020-01-02 00:00:00,1.1212,1.12168,1.12071,1.12153,954,29,0
2020-01-02 01:00:00,1.12156,1.1223,1.12156,1.12196,833,21,0
2020-01-02 02:00:00,1.12191,1.122,1.12167,1.12192,615,21,0



--- Data Tail (End) ---


Unnamed: 0_level_0,open,high,low,close,tick_volume,spread,real_volume
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2025-12-23 07:00:00,1.17749,1.17797,1.17742,1.17749,1108,8,0
2025-12-23 08:00:00,1.17749,1.17763,1.17685,1.17711,1190,8,0
2025-12-23 09:00:00,1.17709,1.17812,1.17691,1.17785,1994,8,0



Total Rows: 37204
Date Range: 2020-01-02 00:00:00 to 2025-12-23 09:00:00


### 4: Save to Parquet

In [7]:
import os

# 9. Ensure Directory Exists
if not os.path.exists("../data"):
    os.makedirs("../data")

# 10. Save
output_path = "../data/EURUSD_H1_Raw.parquet"
df.to_parquet(output_path)

print(f"üíæ Success! Data saved to: {output_path}")
print("   Ready for Phase 2: Labeling (Triple Barrier).")

# 11. Shutdown MT5
mt5.shutdown()

üíæ Success! Data saved to: ../data/EURUSD_H1_Raw.parquet
   Ready for Phase 2: Labeling (Triple Barrier).


True