In [1]:
import pandas as pd
import json
from fyers_apiv3 import fyersModel
from datetime import datetime, timedelta
from dateutil.relativedelta import relativedelta
import os
import time

CONFIG_FILE = "fyers_config.json"

def get_fyers():
    """Get Fyers client with auto token management"""
    config = json.load(open(CONFIG_FILE))
    
    # Try existing tokens
    if 'access_token' in config:
        fyers = fyersModel.FyersModel(client_id=config['client_id'], token=config['access_token'])
        try:
            if fyers.get_profile().get('s') == 'ok':
                return fyers
        except:
            pass
        
        # Try refresh
        if 'refresh_token' in config:
            try:
                session = fyersModel.SessionModel(
                    client_id=config['client_id'],
                    secret_key=config['secret_key'],
                    redirect_uri=config['redirect_uri'],
                    response_type="code",
                    grant_type="refresh_token"
                )
                session.set_token(config['refresh_token'])
                resp = session.generate_token()
                if resp.get('s') == 'ok':
                    config['access_token'] = resp['access_token']
                    config['refresh_token'] = resp['refresh_token']
                    json.dump(config, open(CONFIG_FILE, 'w'), indent=4)
                    return fyersModel.FyersModel(client_id=config['client_id'], token=resp['access_token'])
            except:
                pass
    
    # New auth
    session = fyersModel.SessionModel(
        client_id=config['client_id'],
        secret_key=config['secret_key'],
        redirect_uri=config['redirect_uri'],
        response_type="code"
    )
    print(f"Visit: {session.generate_authcode()}")
    auth_code = input("Enter auth_code: ").strip()
    
    session.grant_type = "authorization_code"
    session.set_token(auth_code)
    resp = session.generate_token()
    
    config['access_token'] = resp['access_token']
    config['refresh_token'] = resp['refresh_token']
    json.dump(config, open(CONFIG_FILE, 'w'), indent=4)
    return fyersModel.FyersModel(client_id=config['client_id'], token=resp['access_token'])

fyers = get_fyers()

In [2]:
nifty_50_raw = ['ITCHOTELS', 'INDIGO', 'MAXHEALTH', 'ADANIENT', 'ADANIPORTS', 'APOLLOHOSP', 'ASIANPAINT', 'AXISBANK', 'BAJAJFINSV', 'BAJAJ_AUTO', 'BAJFINANCE', 'BEL', 'BHARTIARTL', 'BPCL', 'BRITANNIA', 'CIPLA', 'COALINDIA', 'DIVISLAB', 'DRREDDY', 'EICHERMOT', 'ETERNAL', 'GAIL', 'GRASIM', 'HCLTECH', 'HDFCBANK', 'HDFCLIFE', 'HEROMOTOCO', 'HINDALCO', 'HINDUNILVR', 'ICICIBANK', 'INDUSINDBK', 'INFY', 'IOC', 'ITC', 'JIOFIN', 'JSWSTEEL', 'KOTAKBANK', 'LTIM', 'LT', 'M&M', 'MARUTI', 'NESTLEIND', 'NTPC', 'ONGC', 'POWERGRID', 'RELIANCE', 'SBILIFE', 'SBIN', 'SHREECEM', 'SHRIRAMFIN', 'SUNPHARMA', 'TATACONSUM', 'TATAMOTORS', 'TATASTEEL', 'TCS', 'TECHM', 'TITAN', 'TRENT', 'ULTRACEMCO', 'UPL', 'WIPRO', 'YESBANK']


In [3]:
# # ----------------------------------- find stock in masterlist file -----------------------------------#

# Read Master_list file of Fyers
df = pd.read_csv(r"D:/Programming/Download_Backtest_Deploy_data/1__Download/1__Download_data_Fyers_via_API/fyers_master_list.csv")
df = df[df['NSE:GOLDSTAR-SM'].str.contains('-EQ', na=False)].reset_index(drop=True)

# ---------------------------------

n = 0

nifty_50_raw = [symbol.replace("_", "-") for symbol in nifty_50_raw]

x = 0
# Fix: Iterate over the actual values in the list, not indices
for stock in nifty_50_raw:
    if f"NSE:{stock}-EQ" in df["NSE:GOLDSTAR-SM"].values:
        x += 1
        n += 1
        # a = print(x, stock, "\tFound") if len(stock) > 4 else print(x, stock, "\t\tFound")
    else:
        x += 1
        print(x, stock, "\tNot found")

print(f"From {len(nifty_50_raw)} Total {n} Nifty50 stocks found in the DataFrame.")

# ---------------------------------

nifty_50_tokens = [f"NSE:{stock}-EQ" for stock in nifty_50_raw]
filtered_masterlist = df[df['NSE:GOLDSTAR-SM'].isin(nifty_50_tokens)].reset_index(drop=True)
nifty_50 = filtered_masterlist['NSE:GOLDSTAR-SM'].to_list()
print(nifty_50)

From 62 Total 62 Nifty50 stocks found in the DataFrame.
['NSE:BHARTIARTL-EQ', 'NSE:DIVISLAB-EQ', 'NSE:MARUTI-EQ', 'NSE:INDIGO-EQ', 'NSE:UPL-EQ', 'NSE:LT-EQ', 'NSE:ULTRACEMCO-EQ', 'NSE:TCS-EQ', 'NSE:NTPC-EQ', 'NSE:JSWSTEEL-EQ', 'NSE:YESBANK-EQ', 'NSE:GRASIM-EQ', 'NSE:HDFCBANK-EQ', 'NSE:HEROMOTOCO-EQ', 'NSE:TECHM-EQ', 'NSE:HINDALCO-EQ', 'NSE:HINDUNILVR-EQ', 'NSE:POWERGRID-EQ', 'NSE:ADANIPORTS-EQ', 'NSE:APOLLOHOSP-EQ', 'NSE:INFY-EQ', 'NSE:IOC-EQ', 'NSE:ITC-EQ', 'NSE:BAJAJ-AUTO-EQ', 'NSE:BAJAJFINSV-EQ', 'NSE:LTIM-EQ', 'NSE:NESTLEIND-EQ', 'NSE:JIOFIN-EQ', 'NSE:KOTAKBANK-EQ', 'NSE:TRENT-EQ', 'NSE:M&M-EQ', 'NSE:COALINDIA-EQ', 'NSE:SBILIFE-EQ', 'NSE:MAXHEALTH-EQ', 'NSE:ASIANPAINT-EQ', 'NSE:ONGC-EQ', 'NSE:ADANIENT-EQ', 'NSE:RELIANCE-EQ', 'NSE:ITCHOTELS-EQ', 'NSE:SBIN-EQ', 'NSE:SHREECEM-EQ', 'NSE:BAJFINANCE-EQ', 'NSE:SUNPHARMA-EQ', 'NSE:TATACONSUM-EQ', 'NSE:TATAMOTORS-EQ', 'NSE:TATASTEEL-EQ', 'NSE:TITAN-EQ', 'NSE:WIPRO-EQ', 'NSE:BEL-EQ', 'NSE:SHRIRAMFIN-EQ', 'NSE:HDFCLIFE-EQ', 'NSE:GAIL-EQ', 'NS

In [4]:
def download_stock_data(symbol, days, resolution, range_from, range_to):

    try:
        start_date = datetime.strptime(range_from, "%Y-%m-%d")
        end_date = datetime.strptime(range_to, "%Y-%m-%d")
        
        all_data = []
        
        chunks = []
        current_start = start_date
        while current_start <= end_date:
            current_end = min(current_start + timedelta(days=days), end_date)
            chunks.append((current_start.strftime("%Y-%m-%d"), current_end.strftime("%Y-%m-%d")))
            current_start = current_end + timedelta(days=1)
        # print(chunks)

        for chunk_from, chunk_to in chunks:
            data = fyers.history(data={"symbol": symbol, 
                                       "resolution": resolution, 
                                       "date_format": "1", 
                                       "range_from": chunk_from, 
                                       "range_to": chunk_to, 
                                       "cont_flag": "1"})

            # print(data)

            if data.get('s') == 'ok' and data.get('candles'):
                all_data.extend(data['candles'])
            else:
                print(f"Error: {data}")

            time.sleep(1)  # To avoid hitting rate limits

        data = pd.DataFrame(all_data, columns=['timestamp', 'Open', 'High', 'Low', 'Close', 'Volume'])
        data['Datetime'] = pd.to_datetime(data['timestamp'], unit='s', utc=True).dt.tz_convert('Asia/Kolkata')
        data['Datetime'] = data['Datetime'].dt.tz_localize(None)
        if resolution == "D": data['Datetime'] = data['Datetime'].dt.normalize() + pd.Timedelta(hours=9, minutes=15)
        data = data[['Datetime', 'Open', 'High', 'Low', 'Close', 'Volume']]

        return data

    except Exception as e:
        print(f"Error downloading {symbol}: {str(e)}")
        return None

# data = download_stock_data('NSE:RELIANCE-EQ', 
#                            days=10, 
#                            resolution="D", 
#                            range_from="2025-09-01", 
#                            range_to="2025-09-30")
# data

In [5]:
# ----------------------------------- Helper Functions -----------------------------------

def calculate_expected_candles(resolution, trading_days):
    """
    Calculate expected number of candles for given resolution and trading days.
    Market hours: 9:15 AM to 3:30 PM = 375 minutes = 22,500 seconds
    """
    total_seconds = 22500  # 6h 15min in seconds

    resolution_map = {
        "5S": 5, "10S": 10, "15S": 15, "30S": 30, "45S": 45,                   # Seconds based
        "1": 60, "2": 120, "3": 180, "5": 300, "10": 600, "15": 900,           # Minutes based
        "20": 1200, "30": 1800, "60": 3600, "120": 7200, "240": 14400,
        "D": 22500                                                              # Daily         
    }

    if resolution not in resolution_map:
        raise ValueError(f"Unsupported resolution: {resolution}")

    candles_per_day = total_seconds // resolution_map[resolution]
    return trading_days * candles_per_day


def get_trading_days(start_date, end_date, stock_name=None, reference_csv="D:/Programming/Download_Backtest_Deploy_data/1__Download/TradingView_data_download/NIFTY_50_TV_D.csv"):

    LISTING_DATES = {
        "NSE:ITCHOTELS-EQ": "2025-01-29"
    }
    
    try:
        df = pd.read_csv(reference_csv)
        df['Datetime'] = pd.to_datetime(df['Datetime'])

        start = pd.to_datetime(start_date)
        end = pd.to_datetime(end_date)
        
        # Apply listing date filter if available
        if stock_name and stock_name in LISTING_DATES:
            listing_date = pd.to_datetime(LISTING_DATES[stock_name])
            start = max(start, listing_date)  # Use later date

        mask = (df['Datetime'].dt.date >= start.date()) & (df['Datetime'].dt.date <= end.date())
        trading_days = df.loc[mask, 'Datetime'].dt.date.tolist()

        return [str(day) for day in trading_days]
    except Exception as e:
        print(f"Error reading trading days: {str(e)}")
        return []


def verify_data_completeness(data, expected_candles, trading_days, resolution, threshold=0.999):
    """
    ENHANCED: Verify downloaded data completeness with per-day candle count check.
    Returns: dict with verification results including incomplete_days
    """
    if data is None or data.empty:
        return {
            "status": "failed",
            "actual_candles": 0,
            "expected_candles": expected_candles,
            "missing_days": trading_days,
            "incomplete_days": [],
            "completeness_pct": 0.0
        }

    actual_candles = len(data)
    data['Date'] = pd.to_datetime(data['Datetime']).dt.date.astype(str)
    available_days = data['Date'].unique().tolist()

    # Check for completely missing days
    missing_days = [day for day in trading_days if day not in available_days]

    # Check ALL days for missing candles (not just incomplete ones)
    incomplete_days = []
    missing_candles_details = []
    expected_per_day = calculate_expected_candles(resolution, 1)

    if expected_per_day:
        day_counts = data.groupby('Date').size()
        
        for day in trading_days:
            if day not in missing_days:  # Only check days that exist
                actual_day_candles = day_counts.get(day, 0)
                expected_threshold = expected_per_day * threshold
                
                # Check if day is below threshold
                if actual_day_candles < expected_threshold:
                    incomplete_days.append(day)
                
                # NEW: Check for ANY missing candles (even if above threshold)
                if actual_day_candles < expected_per_day:
                    missing_count = expected_per_day - actual_day_candles
                    
                    # Generate expected timestamps
                    day_data = data[data['Date'] == day]['Datetime'].sort_values()
                    market_start = pd.to_datetime(f"{day} 09:15:00")
                    market_end = pd.to_datetime(f"{day} 15:29:55")
                    
                    # Resolution mapping
                    freq_map = {
                        "5S": "5S", "10S": "10S", "15S": "15S", "30S": "30S",
                        "1": "1min", "2": "2min", "5": "5min", "15": "15min", "30": "30min"
                    }
                    freq = freq_map.get(resolution, "1min")
                    
                    expected_timestamps = pd.date_range(start=market_start, end=market_end, freq=freq)
                    actual_timestamps = pd.to_datetime(day_data)
                    
                    # Find missing timestamps
                    missing_timestamps = expected_timestamps.difference(actual_timestamps)
                    
                    missing_candles_details.append({
                        "date": day,
                        "missing_count": len(missing_timestamps),
                        "actual_candles": actual_day_candles,
                        "expected_candles": expected_per_day,
                        "missing_timestamps": missing_timestamps.strftime('%Y-%m-%d %H:%M:%S').tolist()
                    })

    completeness_pct = (actual_candles / expected_candles * 100) if expected_candles > 0 else 0

    total_problem_days = len(missing_days) + len(incomplete_days)
    status = "complete" if (total_problem_days == 0 and completeness_pct >= 99.9) else "incomplete"

    result = {
        "status": status,
        "actual_candles": actual_candles,
        "expected_candles": expected_candles,
        "missing_days": missing_days,
        "incomplete_days": incomplete_days,
        "missing_candles_details": missing_candles_details,
        "completeness_pct": round(completeness_pct, 2),
        "expected_per_day": expected_per_day
    }

    return result


def load_data_status():
    """Load status tracking JSON for all stocks"""
    status_file = "D:/Programming/Download_Backtest_Deploy_data/1__Download/1__Download_data_Fyers_via_API/fyers_data_status.json"
    try:
        if os.path.exists(status_file):
            with open(status_file, 'r') as f:
                return json.load(f)
    except:
        pass
    return {}


def save_data_status(status_data):
    """Save status tracking JSON for all stocks"""
    status_file = "D:/Programming/Download_Backtest_Deploy_data/1__Download/1__Download_data_Fyers_via_API/fyers_data_status.json"
    with open(status_file, 'w') as f:
        json.dump(status_data, f, indent=2)


def fill_problem_days(symbol, resolution, problem_days, existing_data=None):
    """
    Download data for missing or incomplete days.
    For incomplete days, replaces existing partial data with fresh download.
    
    Returns: DataFrame with data from problem days
    """
    if not problem_days:
        return None

    print(f"  → Downloading {len(problem_days)} problem days: {problem_days[:5]}{'...' if len(problem_days) > 5 else ''}")

    all_new_data = []

    for day in problem_days:
        print(symbol, 1, resolution, day, day)
        data = download_stock_data(symbol, days=1, resolution=resolution, range_from=day, range_to=day)
        if data is not None and not data.empty:
            all_new_data.append(data)
            print(f"    ✓ {day}: {len(data)} candles")
        else:
            print(f"    ✗ {day}: No data")
        time.sleep(1)

    if all_new_data:
        new_data = pd.concat(all_new_data, ignore_index=True)
        
        # If existing data provided, remove old data for these days before merging
        if existing_data is not None and not existing_data.empty:
            existing_data['Datetime'] = pd.to_datetime(existing_data['Datetime'])
            new_data['Datetime'] = pd.to_datetime(new_data['Datetime'])
            
            existing_data['Date'] = pd.to_datetime(existing_data['Datetime']).dt.date.astype(str)
            # Remove old data for problem days
            existing_data = existing_data[~existing_data['Date'].isin(problem_days)].drop(columns=['Date'])
            # Merge with new data
            merged_data = pd.concat([existing_data, new_data], ignore_index=True)
            merged_data = merged_data.sort_values('Datetime').reset_index(drop=True)
            return merged_data
        else:
            return new_data
    
    return existing_data if existing_data is not None else None


def download_with_verification(symbol, resolution, range_from, range_to, days=10, retries=3):
    """
    - Attempt 1: Download full range
    - Attempt 2-3: Download ONLY missing/incomplete days and merge with existing data
    
    Returns: (data_df, verification_result, best_attempt)
    """
    trading_days = get_trading_days(range_from, range_to, symbol)
    expected_candles = calculate_expected_candles(resolution, len(trading_days))

    current_data = None
    current_verification = None

    for attempt in range(retries):
        print(f"\n{'-'*60}")
        print(f"Attempt {attempt + 1}/{retries} for {symbol} ({resolution})")
        print(f"{'-'*60}")

        if attempt == 0:
            # First attempt: Download full range
            print(f"Downloading full range: {range_from} to {range_to}")
            current_data = download_stock_data(symbol, days, resolution, range_from, range_to)
            current_verification = verify_data_completeness(current_data, expected_candles, trading_days, resolution)
            
        else:
            # Subsequent attempts: Download only missing/incomplete days
            if current_verification:
                problem_days = current_verification['missing_days'] + current_verification['incomplete_days']
                
                if len(problem_days) > 0:
                    print(f"Fixing {len(problem_days)} problem days:")
                    print(f"  • Missing: {len(current_verification['missing_days'])} days")
                    print(f"  • Incomplete: {len(current_verification['incomplete_days'])} days")
                    
                    current_data = fill_problem_days(symbol, resolution, problem_days, current_data)
                    
                    if current_data is not None:
                        print(f"  → Total candles after merge: {len(current_data)}")
                    else:
                        print(f"  ✗ No data retrieved from API")
                    
                    # Re-verify after merge
                    current_verification = verify_data_completeness(current_data, expected_candles, trading_days, resolution)
                else:
                    print("  → No problem days to fix")

        # Display results
        if current_verification:
            print(f"\nResult: {current_verification['actual_candles']}/{current_verification['expected_candles']} candles ({current_verification['completeness_pct']}%)")
            print(f"Status: {current_verification['status']}")
            print(f"Missing days: {len(current_verification['missing_days'])}")
            print(f"Incomplete days: {len(current_verification['incomplete_days'])}")
            
            # If complete, return immediately
            if current_verification['status'] == 'complete':
                print(f"✓ COMPLETE on attempt {attempt + 1}")
                return current_data, current_verification, {"candles": current_verification['actual_candles'], "attempt": attempt + 1}
        else:
            print("✗ Download failed")

        problem_count = 0
        if current_verification:
            problem_count = len(current_verification['missing_days']) + len(current_verification['incomplete_days'])
        
        if attempt < retries - 1 and problem_count > 0:
            print(f"\n→ Retrying to fix {problem_count} remaining problem days...")
            time.sleep(2)

    # After all retries
    if current_verification:
        print(f"\n{'='*60}")
        print(f"Final result after {retries} attempts:")
        print(f"  Candles: {current_verification['actual_candles']}/{current_verification['expected_candles']}")
        print(f"  Completeness: {current_verification['completeness_pct']}%")
        print(f"  Missing days: {len(current_verification['missing_days'])}")
        print(f"  Incomplete days: {len(current_verification['incomplete_days'])}")
        print(f"{'='*60}")
        
        return current_data, current_verification, {"candles": current_verification['actual_candles'], "attempt": retries}
    
    # Complete failure
    failed_verification = verify_data_completeness(None, expected_candles, trading_days, resolution)
    return None, failed_verification, {"candles": 0, "attempt": retries}

In [6]:
def master_download(resolution, days, range_from, range_to):

    data_folder = f"D:/Programming/Download_Backtest_Deploy_data/1__Download/1__Download_data_Fyers_via_API/storage_Fyers_{resolution}"

    os.makedirs(data_folder, exist_ok=True)

    status_data = load_data_status()

    successful = []
    failed = []
    incomplete = []

    print(f"\n{'='*120}")
    print(f"{' FYERS DATA DOWNLOAD (ENHANCED) ':-^120}")
    print(f"{'='*120}")
    print(f"Resolution: {resolution}")
    print(f"Date Range: {range_from} to {range_to}")
    print(f"Total Stocks: {len(nifty_50)}")
    print(f"{'='*120}\n")

    for i, symbol in enumerate(nifty_50, 1):
        print(i, symbol)

        stock_name = symbol.replace("NSE:", "").replace("-EQ", "").replace('-', '_')
        filename = f"{data_folder}/{stock_name}_Fyers_{resolution}.csv"
        
        print(f"{'='*90}")
        print(f"{f' [{i}/{len(nifty_50)}] {stock_name} ':-^90}")

        # Initialize stock status if not exists
        if stock_name not in status_data:
            status_data[stock_name] = {}
        
        if resolution not in status_data[stock_name]:
            status_data[stock_name][resolution] = {}

        # Check if file already exists
        if os.path.exists(filename):
            existing_data = pd.read_csv(filename)
            print(f"✓ Existing file found: {len(existing_data)} candles")

            # Verify existing data
            trading_days = get_trading_days(range_from, range_to, symbol)
            expected_candles = calculate_expected_candles(resolution, len(trading_days))
            verification = verify_data_completeness(existing_data, expected_candles, trading_days, resolution)

            # Printing details
            print(f"  Status: {verification['completeness_pct']}% complete")
            print(f"  Candles: {verification['actual_candles']}/{verification['expected_candles']}")
            print(f"  Missing days: {len(verification['missing_days'])}")
            print(f"  Incomplete days: {len(verification['incomplete_days'])}")
            print(f"  missing_candles_details:")
            for missing_candles_day in verification['missing_candles_details']:
                print(f"    Date: {missing_candles_day['date']} | Missing: {missing_candles_day['missing_count']} candles")
                print(f"      Timestamps: {missing_candles_day['missing_timestamps'][:5]}")  # First 5 timestamps

            if verification['status'] == 'complete':
                print("✓ Data is complete. Skipping.\n")
                successful.append(stock_name)
                status_data[stock_name][resolution] = {
                    "last_updated": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
                    "date_range": {"from": range_from, "to": range_to},
                    "trading_days_expected": len(trading_days),
                    "expected_candles": int(expected_candles),
                    "actual_candles": int(verification['actual_candles']),
                    "missing_days": [],
                    "incomplete_days": [],
                    "missing_candles_details": [],
                    "status": "complete",
                    "completeness_pct": 100.0
                }
                save_data_status(status_data)
                continue
            else:
                problem_days = verification['missing_days'] + verification['incomplete_days']
                print(f"⚠ Total problem days: {len(problem_days)}")
                print("  → Will attempt to fix problem days...\n")

                # Fix problem days
                if len(problem_days) > 0:
                    fixed_data = fill_problem_days(symbol, resolution, problem_days, existing_data)

                    if fixed_data is not None and not fixed_data.empty:
                        # Re-verify
                        final_verification = verify_data_completeness(fixed_data, expected_candles, trading_days, resolution)
                        
                        fixed_data.to_csv(filename, index=False)
                        
                        # Update status
                        status_data[stock_name][resolution] = {
                            "last_updated": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
                            "date_range": {"from": range_from, "to": range_to},
                            "trading_days_expected": len(trading_days),
                            "expected_candles": int(expected_candles),
                            "actual_candles": int(final_verification['actual_candles']),
                            "missing_days": [str(d) for d in final_verification['missing_days']],
                            "incomplete_days": [str(d) for d in final_verification['incomplete_days']],
                            "missing_candles_details": [
                                {
                                    "date": str(item['date']),
                                    "missing_count": int(item['missing_count']),
                                    "missing_timestamps": ", ".join([str(ts) for ts in item['missing_timestamps']])
                                }
                                for item in final_verification['missing_candles_details']
                            ],
                            "status": final_verification['status'],
                            "completeness_pct": float(final_verification['completeness_pct'])
                        }

                        save_data_status(status_data)
                        
                        if final_verification['status'] == 'complete':
                            print(f"✓ Successfully fixed all problem days!")
                            successful.append(stock_name)
                        else:
                            print(f"⚠ Still incomplete: {final_verification['completeness_pct']}%")
                            incomplete.append(stock_name)
                        
                        continue

        # Download with enhanced verification (per-day check + incremental retry)
        data, verification, best_attempt = download_with_verification(
            symbol, resolution, range_from, range_to, days=days, retries=3
        )

        # Update status
        status_data[stock_name][resolution] = {
            "last_updated": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
            "date_range": {"from": range_from, "to": range_to},
            "trading_days_expected": len(get_trading_days(range_from, range_to)),
            "expected_candles": int(verification['expected_candles']),
            "actual_candles": int(verification['actual_candles']),
            "missing_days": [str(d) for d in verification['missing_days']],
            "incomplete_days": [str(d) for d in verification.get('incomplete_days', [])],
            "missing_candles_details": [
                {
                    "date": str(item['date']),
                    "missing_count": int(item['missing_count']),
                    "missing_timestamps": ", ".join([str(ts) for ts in item['missing_timestamps']])
                }
                for item in verification.get('missing_candles_details', [])
            ],
            "status": verification['status'],
            "completeness_pct": float(verification['completeness_pct']),
            "retry_count": best_attempt['attempt'],
            "best_attempt": best_attempt
        }
        
        # Handle based on status
        if verification['status'] == 'complete':
            data.to_csv(filename, index=False)
            successful.append(stock_name)
            save_data_status(status_data)
            print(f"\n✓ SUCCESS: {stock_name} saved")
            
        elif verification['status'] == 'incomplete':
            data.to_csv(filename, index=False)
            incomplete.append(stock_name)
            save_data_status(status_data)
            
            print(f"\n⚠ INCOMPLETE: {stock_name}")
            print(f"  Completeness: {verification['completeness_pct']}%")
            print(f"  Missing days: {len(verification['missing_days'])}")
            print(f"  Incomplete days: {len(verification.get('incomplete_days', []))}")
            print(f"  missing_candles_details:")
            for missing_candles_day in verification['missing_candles_details']:
                print(f"    Date: {missing_candles_day['date']} | Missing: {missing_candles_day['missing_count']} candles")
                print(f"      Timestamps: {missing_candles_day['missing_timestamps'][:5]}")  # First 5 timestamps

            print(f"✓ Using available data: {verification['actual_candles']} candles")
            status_data[stock_name][resolution]['user_action'] = 'used_best_data'
            save_data_status(status_data)
            
        else:
            failed.append(stock_name)
            print(f"\n✗ FAILED: {stock_name}")
            save_data_status(status_data)
        
        time.sleep(1)
    
    # Final Summary
    print(f"\n{'='*120}")
    print(f"{'-- DOWNLOAD SUMMARY --':-^120}")
    print(f"{'='*120}")
    print(f"✓ Successful: {len(successful)}/{len(nifty_50)}")
    print(f"⚠ Incomplete: {len(incomplete)}/{len(nifty_50)}")
    print(f"✗ Failed: {len(failed)}/{len(nifty_50)}")

    # Calculate aggregate statistics
    total_expected = 0
    total_actual = 0
    total_missing_days = 0
    total_incomplete_days = 0
    total_missing_candles = 0

    for stock in nifty_50:
        stock_name = stock.replace("NSE:", "").replace("-EQ", "").replace('-', '_')
        if stock_name in status_data and resolution in status_data[stock_name]:
            stock_status = status_data[stock_name][resolution]
            total_expected += stock_status.get('expected_candles', 0)
            total_actual += stock_status.get('actual_candles', 0)
            total_missing_days += len(stock_status.get('missing_days', []))
            total_incomplete_days += len(stock_status.get('incomplete_days', []))

    total_missing_candles = total_expected - total_actual

    print(f"\n{'--- AGGREGATE STATISTICS ---':-^60}")
    print(f"Total Expected Candles: {total_expected:,}")
    print(f"Total Actual Candles: {total_actual:,}")
    print(f"Total Missing Candles: {total_missing_candles:,}")
    print(f"Overall Completeness: {(total_actual/total_expected*100):.2f}%" if total_expected > 0 else "N/A")
    print(f"Total Missing Days: {total_missing_days}")
    print(f"Total Incomplete Days: {total_incomplete_days}")

    if successful:
        print(f"\n✓ Complete stocks ({len(successful)}):")
        for stock in successful[:10]:
            print(f"  • {stock}")
        if len(successful) > 10:
            print(f"  ... and {len(successful) - 10} more")

    if incomplete:
        print(f"\n⚠ Incomplete stocks ({len(incomplete)}):")
        for stock in incomplete:
            stock_info = status_data.get(stock, {}).get(resolution, {})
            completeness = stock_info.get('completeness_pct', 0)
            print(f"  • {stock} ({completeness}%)")

    if failed:
        print(f"\n✗ Failed stocks ({len(failed)}):")
        for stock in failed:
            print(f"  • {stock}")
    
    print(f"\n{'='*120}\n")
    
    time.sleep(2)

    return successful, incomplete, failed

# =======================================================

resolution = "5S"

# Test master download with all stocks
successful, incomplete, failed = master_download(resolution=resolution,
                                                 days=100, 
                                                 range_from="2025-01-01",  
                                                 range_to="2025-09-30")



-------------------------------------------- FYERS DATA DOWNLOAD (ENHANCED) --------------------------------------------
Resolution: 5S
Date Range: 2025-01-01 to 2025-09-30
Total Stocks: 62

1 NSE:BHARTIARTL-EQ
----------------------------------- [1/62] BHARTIARTL ------------------------------------
✓ Existing file found: 841500 candles
  Status: 100.0% complete
  Candles: 841500/841500
  Missing days: 0
  Incomplete days: 0
  missing_candles_details:
✓ Data is complete. Skipping.

2 NSE:DIVISLAB-EQ
------------------------------------ [2/62] DIVISLAB -------------------------------------
✓ Existing file found: 841500 candles
  Status: 100.0% complete
  Candles: 841500/841500
  Missing days: 0
  Incomplete days: 0
  missing_candles_details:
✓ Data is complete. Skipping.

3 NSE:MARUTI-EQ
------------------------------------- [3/62] MARUTI --------------------------------------
✓ Existing file found: 841500 candles
  Status: 100.0% complete
  Candles: 841500/841500
  Missing days: 0
  

In [7]:
import json
import pandas as pd

# Load status file
with open("D:/Programming/Download_Backtest_Deploy_data/1__Download/1__Download_data_Fyers_via_API/fyers_data_status.json", 'r') as f:
    status_data = json.load(f)

# Generate all expected timestamps for resolution
market_start = pd.to_datetime("09:15:00", format="%H:%M:%S")
market_end = pd.to_datetime("15:29:55", format="%H:%M:%S")
all_timestamps = pd.date_range(start=market_start, end=market_end, freq=resolution)
all_times = [ts.strftime('%H:%M:%S') for ts in all_timestamps]

print(f"Total expected timestamps: {len(all_times)}")

# Aggregate missing candles by timestamp across ALL stocks
timestamp_counts = {time: 0 for time in all_times}  # Initialize all to 0

for stock_name, resolutions in status_data.items():
    if resolution in resolutions:
        details = resolutions[resolution]
        missing_details = details.get('missing_candles_details', [])
        
        for day_detail in missing_details:
            timestamps = day_detail.get('missing_timestamps', [])
            
            # Handle both formats: list (new) or string (old)
            if isinstance(timestamps, str):
                # Old format: parse comma-separated string
                timestamps = [ts.strip() for ts in timestamps.split(',') if '...' not in ts]
            
            # Now timestamps is always a list
            for ts in timestamps:
                if not ts:
                    print("i am", ts)
                    continue
                time_only = ts.split(' ')[1] if ' ' in ts else ts
                if time_only in timestamp_counts:
                    timestamp_counts[time_only] += 1
                    
# Display aggregated results
print(f"\n{'='*70}")
print(f"{f'MISSING CANDLES BY TIMESTAMP (ALL {len(all_times)} TIMESTAMPS)':^70}")
print(f"{'='*70}")
print(f"Resolution: {resolution}")
print(f"Total timestamps in trading day: {len(all_times)}")
print(f"\n{'Time':<9} {'Missing Count':<10}")
print(f"{'-'*70}")

total_missing = sum(timestamp_counts.values())
total_stocks = len(status_data)

# Display ALL timestamps
time_list = []
for time in all_times:
    time_list.append(time)

    if len(time_list) == 5:
        for t in time_list:
            count = timestamp_counts[t]
            print(f"{t:<9} {count:<4} {'Missing' if count > 0 else '       '}", end='    ')

        time_list = []
        print()

print(f"\n{'='*70}")
print(f"Summary:")
print(f"  Total stocks analyzed: {total_stocks}")
print(f"  Total timestamps: {len(all_times)}")
print(f"  Timestamps with missing data: {sum(1 for c in timestamp_counts.values() if c > 0)}")
print(f"  Most problematic time: {max(timestamp_counts.items(), key=lambda x: x[1])[0]} ({max(timestamp_counts.values())} missing)")
print(f"  Total missing candles (all stocks, all times): {total_missing}")

Total expected timestamps: 4500

          MISSING CANDLES BY TIMESTAMP (ALL 4500 TIMESTAMPS)          
Resolution: 5S
Total timestamps in trading day: 4500

Time      Missing Count
----------------------------------------------------------------------
09:15:00  0               09:15:05  0               09:15:10  0               09:15:15  0               09:15:20  0               
09:15:25  0               09:15:30  0               09:15:35  0               09:15:40  0               09:15:45  0               
09:15:50  0               09:15:55  0               09:16:00  0               09:16:05  0               09:16:10  0               
09:16:15  0               09:16:20  0               09:16:25  0               09:16:30  0               09:16:35  0               
09:16:40  0               09:16:45  0               09:16:50  0               09:16:55  0               09:17:00  0               
09:17:05  0               09:17:10  0               09:17:15  0               09:17:20  0   