In [2]:
# src/data_ingestion/downloader.py
import yfinance as yf
from concurrent.futures import ThreadPoolExecutor
import pandas as pd
import numpy as np  
import datetime
import ta  # 技术分析库

In [3]:
# 定义下载数据的函数
import ta.volume


def download_stock_data(tickers, start_date, end_date, period=None, interval=None, auto_adjust=True, prepost=False):
    """
    批量下载股票数据
    :param tickers: 股票代码列表
    :param start_date: 开始日期
    :param end_date: 结束日期
    :return: 包含所有股票数据的 Pandas DataFrame
    """
    # 获取原始数据
    if period:
        df = yf.download(
            tickers=tickers,
            # start=start_date,
            # end=end_date,
            period=period,
            group_by='ticker',
            auto_adjust=auto_adjust,
            prepost=prepost,
            threads=True,  # 启用多线程下载
            progress=True,  # 显示进度条
            interval='1h' if not interval else interval
        )

    # 调整 DataFrame 结构，去掉多重索引
    df.columns = df.columns.droplevel('Ticker')
    df.columns.name = None

    # 添加分析指标
    df['Daily Return'] = df['Close'].pct_change() * 100  # 日收益率，反映了价格变动的幅度
    df['Log Return'] = np.log(df['Close'] / df['Close'].shift(1))  # 对数收益率
    df['Range Pct'] = (df['High'] - df['Low']) / df['Low'] * 100  # 用于衡量资产在一天内的价格波动程度
    df['MA10'] = df['Close'].rolling(10).mean()
    df['MA50'] = df['Close'].rolling(50).mean()
    df['Volatility'] = df['Log Return'].rolling(20).std() * np.sqrt(252)  # 波动率
    df['RSI'] = ta.momentum.RSIIndicator(df['Close']).rsi()  # RSI 指标
    df['Volume'] = df['Volume'].replace(0, np.nan).ffill()  # 处理成交量为0的情况
    df['MACD'] = ta.trend.MACD(df['Close']).macd()  # MACD 指标
    df['Bollinger_High'] = ta.volatility.BollingerBands(df['Close']).bollinger_hband()  # 布林带上轨
    df['Bollinger_Low'] = ta.volatility.BollingerBands(df['Close']).bollinger_lband()  # 布林带下轨
    df['VWAP'] = ta.volume.VolumeWeightedAveragePrice(df['High'], df['Low'], df['Close'], df['Volume']).volume_weighted_average_price()  # 成交量加权平均价格
    df['ADX'] = ta.trend.ADXIndicator(df['High'], df['Low'], df['Close']).adx()  # 平均趋向指数
    df['On-balance volume (OBV)'] = ta.volume.OnBalanceVolumeIndicator(df['Close'], df['Volume']).on_balance_volume()  # 能量潮指标
    # df['Gaussian_Channel_U'] = ta.volatility.GaussianChannel(df['Close']).gaussian_channel_hband()  # 高斯通道上轨
    # df['Gaussian_Channel_L'] = ta.volatility.GaussianChannel(df['Close']).gaussian_channel_lband()  # 高斯通道下轨

    return df

In [6]:
# 下载股票数据
start_date = '2025-09-01'
end_date = '2025-12-31'
period_ = '1mo'
interval_ = '5m'
ticker = ['QS']

df = download_stock_data( ticker , start_date , end_date , period=period_, interval=interval_)
df.head()

[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Volume,Daily Return,Log Return,Range Pct,MA10,MA50,Volatility,RSI,MACD,Bollinger_High,Bollinger_Low,VWAP,ADX,On-balance volume (OBV)
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2025-10-14 14:50:00+00:00,16.799999,16.805,16.41,16.5112,,,,2.407072,,,,,,,,,0.0,
2025-10-14 14:55:00+00:00,16.52,16.549999,16.42,16.445,393830.0,-0.400942,-0.004017,0.791712,,,,,,,,,0.0,-393830.0
2025-10-14 15:00:00+00:00,16.445,16.594999,16.434999,16.565599,342677.0,0.733352,0.007307,0.973531,,,,,,,,,0.0,-51153.0
2025-10-14 15:05:00+00:00,16.565001,16.68,16.495001,16.530001,419980.0,-0.214896,-0.002151,1.121549,,,,,,,,,0.0,-471133.0
2025-10-14 15:10:00+00:00,16.530001,16.58,16.465,16.514999,311017.0,-0.090752,-0.000908,0.69845,,,,,,,,,0.0,-782150.0
