In [1]:
# Import required modules
import sys
import os
from pathlib import Path

# Add src directory to Python path
notebook_dir = Path().absolute()
src_dir = notebook_dir.parent / 'src'
sys.path.insert(0, str(src_dir))

# Import our bulk fetching modules
try:
    from hk_stock_universe import (
        get_hk_stock_list_static,
        get_hk_stocks_by_sector,
        get_comprehensive_hk_stock_list,
        get_top_hk_stocks,
        MAJOR_HK_STOCKS
    )
    from bulk_data_fetcher import (
        fetch_hk_stocks_bulk,
        fetch_all_major_hk_stocks,
        fetch_top_50_hk_stocks,
        fetch_hk_tech_stocks,
        create_bulk_fetch_summary,
        save_bulk_data
    )
except ImportError as e:
    print(f"Import error: {e}")
    print(f"Current working directory: {os.getcwd()}")
    print(f"Python path: {sys.path[:3]}...")  # Show first 3 entries

# Standard libraries
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime, timedelta

print("‚úÖ All modules imported successfully!")
print("üöÄ Ready for bulk data collection!")


‚úÖ All modules imported successfully!
üöÄ Ready for bulk data collection!


In [2]:
# Explore available stock categories
print("üìä Available Stock Categories:")
print("=" * 50)

for sector, stocks in MAJOR_HK_STOCKS.items():
    print(f"üè¢ {sector.upper()}: {len(stocks)} stocks")
    print(f"   Examples: {', '.join(stocks[:3])}...")
    print()

# Get all major stocks (deduplicated)
all_major_stocks = get_hk_stock_list_static()
print(f"üìà Total unique major stocks: {len(all_major_stocks)}")

# Show some examples
print(f"üîç Sample tickers: {all_major_stocks[:10]}")


üìä Available Stock Categories:
üè¢ BLUE_CHIPS: 15 stocks
   Examples: 0700.HK, 0005.HK, 0941.HK...

üè¢ TECH_STOCKS: 10 stocks
   Examples: 0700.HK, 9988.HK, 3690.HK...

üè¢ FINANCE: 10 stocks
   Examples: 0005.HK, 1398.HK, 0939.HK...

üè¢ PROPERTY: 10 stocks
   Examples: 0016.HK, 0017.HK, 1113.HK...

üìà Total unique major stocks: 34
üîç Sample tickers: ['0700.HK', '0005.HK', '0941.HK', '0388.HK', '1299.HK', '2318.HK', '1398.HK', '0939.HK', '3988.HK', '2388.HK']


In [3]:
# Set up date range (last 3 months for demo)
end_date = datetime.now().strftime('%Y-%m-%d')
start_date = (datetime.now() - timedelta(days=90)).strftime('%Y-%m-%d')

print(f"üìÖ Fetching data from {start_date} to {end_date}")
print(f"üìä Expected trading days: ~65")

# Example 1: Fetch top 20 stocks (for demo purposes)
print("\nüöÄ DEMO: Fetching top 20 HK stocks...")

demo_stocks = fetch_all_major_hk_stocks(
    start_date=start_date,
    end_date=end_date,
    max_stocks=20,           # Limit for demo
    batch_size=5,           # Small batches for demo
    delay_between_batches=1.0  # 1 second between batches
)

print(f"\n‚úÖ Demo completed! Fetched {len(demo_stocks)} stocks")

# Show summary
if demo_stocks:
    summary_df = create_bulk_fetch_summary(demo_stocks)
    print("\nüìä Summary of fetched stocks:")
    display(summary_df.head(10))


üìÖ Fetching data from 2025-03-19 to 2025-06-17
üìä Expected trading days: ~65

üöÄ DEMO: Fetching top 20 HK stocks...
üè¢ Fetching all major HK stocks...
üìà Loaded 34 major HK stocks
üìà Using all major stocks
üéØ Limited to 20 stocks
üìã Total stocks to fetch: 20
üöÄ Starting bulk fetch for 20 HK stocks
‚öôÔ∏è  Batch size: 5, Delay: 1.0s
üìÖ Date range: 2025-03-19 to 2025-06-17

üì¶ Processing batch 1/4: 5 stocks
   Tickers: 0700.HK, 0005.HK, 0941.HK, 0388.HK, 1299.HK
üöÄ Fetching data for 5 tickers from 2025-03-19 to 2025-06-17


Processing tickers:   0%|          | 0/5 [00:00<?, ?it/s]


üìä Processing 0700.HK...
  üì° No cached data, fetching from Yahoo Finance...
  üíæ Saved 59 records to cache
  ‚úÖ Final dataset: 59 records

üìä Processing 0005.HK...
  üì° No cached data, fetching from Yahoo Finance...
  üíæ Saved 59 records to cache
  ‚úÖ Final dataset: 59 records

üìä Processing 0941.HK...
  üì° No cached data, fetching from Yahoo Finance...
  üíæ Saved 59 records to cache
  ‚úÖ Final dataset: 59 records

üìä Processing 0388.HK...
  üì° No cached data, fetching from Yahoo Finance...
  üíæ Saved 59 records to cache
  ‚úÖ Final dataset: 59 records

üìä Processing 1299.HK...
  üì° No cached data, fetching from Yahoo Finance...
  üíæ Saved 59 records to cache
  ‚úÖ Final dataset: 59 records

üéâ Successfully processed 5 out of 5 tickers

üìà Summary:
  0700.HK: 59 records, $509.50 (latest close)
  0005.HK: 59 records, $92.75 (latest close)
  0941.HK: 59 records, $87.40 (latest close)
  0388.HK: 59 records, $419.60 (latest close)
  1299.HK: 59 record

Processing tickers:   0%|          | 0/5 [00:00<?, ?it/s]


üìä Processing 2318.HK...
  üì° No cached data, fetching from Yahoo Finance...
  üíæ Saved 59 records to cache
  ‚úÖ Final dataset: 59 records

üìä Processing 1398.HK...
  üì° No cached data, fetching from Yahoo Finance...
  üíæ Saved 59 records to cache
  ‚úÖ Final dataset: 59 records

üìä Processing 0939.HK...
  üì° No cached data, fetching from Yahoo Finance...
  üíæ Saved 59 records to cache
  ‚úÖ Final dataset: 59 records

üìä Processing 3988.HK...
  üì° No cached data, fetching from Yahoo Finance...
  üíæ Saved 59 records to cache
  ‚úÖ Final dataset: 59 records

üìä Processing 2388.HK...
  üì° No cached data, fetching from Yahoo Finance...
  üíæ Saved 59 records to cache
  ‚úÖ Final dataset: 59 records

üéâ Successfully processed 5 out of 5 tickers

üìà Summary:
  2318.HK: 59 records, $48.15 (latest close)
  1398.HK: 59 records, $6.15 (latest close)
  0939.HK: 59 records, $7.78 (latest close)
  3988.HK: 59 records, $4.61 (latest close)
  2388.HK: 59 records, $3

Processing tickers:   0%|          | 0/5 [00:00<?, ?it/s]


üìä Processing 0823.HK...
  üì° No cached data, fetching from Yahoo Finance...
  üíæ Saved 59 records to cache
  ‚úÖ Final dataset: 59 records

üìä Processing 0883.HK...
  üì° No cached data, fetching from Yahoo Finance...
  üíæ Saved 59 records to cache
  ‚úÖ Final dataset: 59 records

üìä Processing 0016.HK...
  üì° No cached data, fetching from Yahoo Finance...
  üíæ Saved 59 records to cache
  ‚úÖ Final dataset: 59 records

üìä Processing 0017.HK...
  üì° No cached data, fetching from Yahoo Finance...
  üíæ Saved 59 records to cache
  ‚úÖ Final dataset: 59 records

üìä Processing 1113.HK...
  üì° No cached data, fetching from Yahoo Finance...
  üíæ Saved 59 records to cache
  ‚úÖ Final dataset: 59 records

üéâ Successfully processed 5 out of 5 tickers

üìà Summary:
  0823.HK: 59 records, $42.05 (latest close)
  0883.HK: 59 records, $18.66 (latest close)
  0016.HK: 59 records, $87.20 (latest close)
  0017.HK: 59 records, $5.32 (latest close)
  1113.HK: 59 records, 

Processing tickers:   0%|          | 0/5 [00:00<?, ?it/s]


üìä Processing 9988.HK...
  üì° No cached data, fetching from Yahoo Finance...
  üíæ Saved 59 records to cache
  ‚úÖ Final dataset: 59 records

üìä Processing 3690.HK...
  üì° No cached data, fetching from Yahoo Finance...
  üíæ Saved 59 records to cache
  ‚úÖ Final dataset: 59 records

üìä Processing 1024.HK...
  üì° No cached data, fetching from Yahoo Finance...
  üíæ Saved 59 records to cache
  ‚úÖ Final dataset: 59 records

üìä Processing 9618.HK...
  üì° No cached data, fetching from Yahoo Finance...
  üíæ Saved 59 records to cache
  ‚úÖ Final dataset: 59 records

üìä Processing 9999.HK...
  üì° No cached data, fetching from Yahoo Finance...
  üíæ Saved 59 records to cache
  ‚úÖ Final dataset: 59 records

üéâ Successfully processed 5 out of 5 tickers

üìà Summary:
  9988.HK: 59 records, $112.90 (latest close)
  3690.HK: 59 records, $138.80 (latest close)
  1024.HK: 59 records, $60.00 (latest close)
  9618.HK: 59 records, $130.90 (latest close)
  9999.HK: 59 recor

Unnamed: 0,Ticker,Records,Start_Date,End_Date,Latest_Close,Min_Price,Max_Price,Avg_Volume,Total_Volume,Missing_Data,Data_Quality
0,0700.HK,59,2025-03-19,2025-06-16,509.5,431.632111,535.326965,25690220.0,1515722832,0,100.0
1,0388.HK,59,2025-03-19,2025-06-16,419.600006,297.0,420.399994,8536652.0,503662439,0,100.0
2,9999.HK,59,2025-03-19,2025-06-16,205.800003,135.107635,208.800003,8001125.0,472066372,0,100.0
3,3690.HK,59,2025-03-19,2025-06-16,138.800003,127.0,175.899994,52197210.0,3079635428,0,100.0
4,9618.HK,59,2025-03-19,2025-06-16,130.899994,123.800003,175.626053,16777600.0,989878353,0,100.0
5,9988.HK,59,2025-03-19,2025-06-16,112.900002,101.188087,140.944122,105814500.0,6243057814,0,100.0
6,0005.HK,59,2025-03-19,2025-06-16,92.75,71.618378,93.849998,21960490.0,1295668842,0,100.0
7,0941.HK,59,2025-03-19,2025-06-16,87.400002,76.454498,88.300003,21198890.0,1250734645,0,100.0
8,0016.HK,59,2025-03-19,2025-06-16,87.199997,66.150002,87.199997,3803998.0,224435896,0,100.0
9,1299.HK,59,2025-03-19,2025-06-16,68.75,48.946556,70.150002,39123440.0,2308283025,0,100.0
