In [4]:
# src/data_ingestion/downloader.py
import yfinance as yf
from concurrent.futures import ThreadPoolExecutor
import pandas as pd
import numpy as np  
import datetime
import ta  # 技术分析库

In [8]:
# 定义下载数据的函数
def download_stock_data(tickers, start_date, end_date, period=None, interval=None, auto_adjust=True, prepost=False):
    """
    批量下载股票数据
    :param tickers: 股票代码列表
    :param start_date: 开始日期
    :param end_date: 结束日期
    :return: 包含所有股票数据的 Pandas DataFrame
    """
    # 获取原始数据
    if period:
        df = yf.download(
            tickers=tickers,
            # start=start_date,
            # end=end_date,
            period='3mo',
            group_by='ticker',
            auto_adjust=auto_adjust,
            prepost=prepost,
            threads=True,  # 启用多线程下载
            progress=True,  # 显示进度条
            interval='1h' if not interval else interval
        )

    # 调整 DataFrame 结构，去掉多重索引
    df.columns = df.columns.droplevel('Ticker')
    df.columns.name = None

    # 添加分析指标
    df['Daily Return'] = df['Close'].pct_change() * 100  # 日收益率，反映了价格变动的幅度
    df['Log Return'] = np.log(df['Close'] / df['Close'].shift(1))  # 对数收益率
    df['Range Pct'] = (df['High'] - df['Low']) / df['Low'] * 100  # 用于衡量资产在一天内的价格波动程度
    df['MA10'] = df['Close'].rolling(10).mean()
    df['MA50'] = df['Close'].rolling(50).mean()
    df['Volatility'] = df['Log Return'].rolling(20).std() * np.sqrt(252)  # 波动率
    df['RSI'] = ta.momentum.RSIIndicator(df['Close']).rsi()  # RSI 指标
    df['Volume'] = df['Volume'].replace(0, np.nan).fillna(method='ffill')  # 处理成交量为0的情况
    # df['Gaussian_Channel_U'] = ta.volatility.GaussianChannel(df['Close']).gaussian_channel_hband()  # 高斯通道上轨
    # df['Gaussian_Channel_L'] = ta.volatility.GaussianChannel(df['Close']).gaussian_channel_lband()  # 高斯通道下轨

    return df

In [9]:
# 下载股票数据
start_date = '2025-09-01'
end_date = '2025-12-31'
period = '3mo'
interval = '1h'
ticker = ['QS']

df = download_stock_data( ticker , start_date , end_date , period=period, interval=interval)
df.head()

[*********************100%***********************]  1 of 1 completed
  df['Volume'] = df['Volume'].replace(0, np.nan).fillna(method='ffill')  # 处理成交量为0的情况


Unnamed: 0_level_0,Open,High,Low,Close,Volume,Daily Return,Log Return,Range Pct,MA10,MA50,Volatility,RSI
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2025-08-12 13:30:00+00:00,9.0,9.4,9.0,9.22,,,,4.44444,,,,
2025-08-12 14:30:00+00:00,9.229,9.27,8.97,9.1123,4281981.0,-1.168117,-0.01175,3.344484,,,,
2025-08-12 15:30:00+00:00,9.115,9.28,9.105,9.2119,4018075.0,1.093026,0.010871,1.922023,,,,
2025-08-12 16:30:00+00:00,9.215,9.265,9.14,9.1476,1519743.0,-0.698006,-0.007005,1.367615,,,,
2025-08-12 17:30:00+00:00,9.15,9.2399,9.08,9.195,1719223.0,0.518163,0.005168,1.76101,,,,
