In [1]:
# src/data_ingestion/downloader.py
import yfinance as yf
from concurrent.futures import ThreadPoolExecutor
import pandas as pd
import numpy as np  
import datetime
import ta  # 技术分析库

In [2]:
# 定义下载数据的函数
def download_stock_data(tickers, start_date, end_date, period=None, interval=None, auto_adjust=True, prepost=False):
    """
    批量下载股票数据
    :param tickers: 股票代码列表
    :param start_date: 开始日期
    :param end_date: 结束日期
    :return: 包含所有股票数据的 Pandas DataFrame
    """
    # 获取原始数据
    if period:
        df = yf.download(
            tickers=tickers,
            # start=start_date,
            # end=end_date,
            period='3mo',
            group_by='ticker',
            auto_adjust=auto_adjust,
            prepost=prepost,
            threads=True,  # 启用多线程下载
            progress=True,  # 显示进度条
            interval='1h' if not interval else interval
        )

    # 调整 DataFrame 结构，去掉多重索引
    df.columns = df.columns.droplevel('Ticker')
    df.columns.name = None

    # 添加分析指标
    df['Daily Return'] = df['Close'].pct_change() * 100  # 日收益率，反映了价格变动的幅度
    df['Log Return'] = np.log(df['Close'] / df['Close'].shift(1))  # 对数收益率
    df['Range Pct'] = (df['High'] - df['Low']) / df['Low'] * 100  # 用于衡量资产在一天内的价格波动程度
    df['MA10'] = df['Close'].rolling(10).mean()
    df['MA50'] = df['Close'].rolling(50).mean()
    df['Volatility'] = df['Log Return'].rolling(20).std() * np.sqrt(252)  # 波动率
    df['RSI'] = ta.momentum.RSIIndicator(df['Close']).rsi()  # RSI 指标
    df['Volume'] = df['Volume'].replace(0, np.nan).fillna(method='ffill')  # 处理成交量为0的情况
    df['MACD'] = ta.trend.MACD(df['Close']).macd()  # MACD 指标
    df['Bollinger_High'] = ta.volatility.BollingerBands(df['Close']).bollinger_hband()  # 布林带上轨
    df['Bollinger_Low'] = ta.volatility.BollingerBands(df['Close']).bollinger_lband()  # 布林带下轨
    df['VWAP'] = (df['Volume'] * (df['High'] + df['Low'] + df['Close']) / 3).cumsum() / df['Volume'].cumsum()  # 成交量加权平均价格
    df['ADX'] = ta.trend.ADXIndicator(df['High'], df['Low'], df['Close']).adx()  # 平均趋向指数
    df['Volume Profile'] = ta.volume.OnBalanceVolumeIndicator(df['Close'], df['Volume']).on_balance_volume()  # 能量潮指标
    # df['Gaussian_Channel_U'] = ta.volatility.GaussianChannel(df['Close']).gaussian_channel_hband()  # 高斯通道上轨
    # df['Gaussian_Channel_L'] = ta.volatility.GaussianChannel(df['Close']).gaussian_channel_lband()  # 高斯通道下轨

    return df

In [7]:
# 下载股票数据
start_date = '2025-09-01'
end_date = '2025-12-31'
period = '3mo'
interval = '1h'
ticker = ['QS']

df = download_stock_data( ticker , start_date , end_date , period=period, interval=interval)
df.head()

[*********************100%***********************]  1 of 1 completed
  df['Volume'] = df['Volume'].replace(0, np.nan).fillna(method='ffill')  # 处理成交量为0的情况


Unnamed: 0_level_0,Open,High,Low,Close,Volume,Daily Return,Log Return,Range Pct,MA10,MA50,Volatility,RSI,MACD,Bollinger_High,Bollinger_Low,VWAP,ADX,Volume Profile
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2025-08-13 13:30:00+00:00,9.43,9.7,9.24,9.2658,6707312,,,4.978356,,,,,,,,9.401933,0.0,6707312
2025-08-13 14:30:00+00:00,9.265,9.46,9.14,9.22,3457186,-0.494293,-0.004955,3.501091,,,,,,,,9.358194,0.0,3250126
2025-08-13 15:30:00+00:00,9.2271,9.3,9.18,9.28,1504139,0.650753,0.006486,1.307188,,,,,,,,9.344677,0.0,4754265
2025-08-13 16:30:00+00:00,9.275,9.29,9.17,9.1733,1587435,-1.149784,-0.011564,1.308614,,,,,,,,9.328681,0.0,3166830
2025-08-13 17:30:00+00:00,9.175,9.33,9.15,9.33,1726659,1.70822,0.016938,1.967217,,,,,,,,9.321918,0.0,4893489
