In [24]:

import yfinance as yf
import pandas as pd
import pandas_market_calendars as mcal
import numpy as np
import clickhouse_connect as Client
import os

In [3]:
def download_single_ticker_data(ticker, output_file=None):
    """
    下载单个Ticker（股票或ETF）的历史数据。
    
    :param ticker: 股票或ETF代码（如 "SPY"）
    :param start_date: 数据开始日期（如 "2022-01-01"）
    :param end_date: 数据结束日期（如 "2023-01-01"）
    :param output_file: 可选，保存文件路径（如 "ticker_data.csv"）
    :return: 包含数据的Pandas DataFrame
    """
    print(f"正在下载 {ticker} 的数据...")
    
    # 使用 yfinance 下载数据
    data = yf.download(ticker)
    
    # 检查数据是否下载成功
    if data.empty:
        print(f"无法下载 {ticker} 的数据，请检查代码或日期范围。")
        return None
    
    # 如果需要保存到文件
    if output_file:
        data.to_csv(output_file)
        print(f"数据已保存到文件：{output_file}")
    
    return data


In [20]:
def init_db_df(symbol, start, end, freq='1D'):
    
    # Create a calendar
    nasdaq = mcal.get_calendar('NASDAQ')
    trading_hours = pd.DataFrame()
    trading_hours['start'] = nasdaq.schedule(start_date=start, end_date=end)['market_open'].dt.tz_convert('America/New_York')
    trading_hours['end'] = nasdaq.schedule(start_date=start, end_date=end)['market_close'].dt.tz_convert('America/New_York')
    # trading_hours['duration'] = (trading_hours['end'] - trading_hours['start']).dt.total_seconds() / 60
    ETH = []
    for _, row in trading_hours.iterrows():
        ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))
    ETH = pd.DatetimeIndex(np.concatenate(ETH))
    # trading_hours = trading_hour[trading_hour['duration'] == 390]
    # minute_ranges = []
    # for _, row in trading_hour.iterrows():
    #     # Generate minute-frequency datetime range
    #     time_range = pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None)
    #     minute_ranges.append(time_range)
    # minute_ranges = pd.DatetimeIndex(np.concatenate(minute_ranges))

    columns = 'symbol, open_time_local, open, high, low, close, volume, average'.split(', ')
    sql = f"SELECT symbol, open_time_local, open, high, low, close, volume, average from us_stocks.pg_klines_1m WHERE symbol = '{symbol}' AND open_time_local >= '{start}' AND open_time_local < '{end}';"
    client = Client.get_client(host='192.168.1.19', username='root', password='flab777')
    result = client.query(sql)
    df = pd.DataFrame(result.result_rows, columns=columns).sort_values('open_time_local')
    df['open_time'] = pd.to_datetime(df['open_time_local'], format='%Y%m%d %H:%M:%S')
    df['open'] = df['open'].astype(float)
    df['high'] = df['high'].astype(float)
    df['low'] = df['low'].astype(float)
    df['close'] = df['close'].astype(float)
    df['volume'] = df['volume'].astype(float)
    df.drop_duplicates(inplace=True)
    df.set_index('open_time', inplace=True, drop=False)
    df.dropna(inplace=True)
    df = df[df.index.isin(ETH)]

    df = df.resample(freq).agg({
        'symbol': 'first',
        'open_time': 'first',
        'open': 'first',
        'high': 'max',
        'low': 'min',
        'close': 'last',
        'volume': 'sum',
    })
    df.dropna(inplace=True)

    # Group by date
    # start_time = pd.Timestamp("09:30:00").time()
    # end_time = pd.Timestamp("15:59:00").time()

    df['date'] = df.index.date
    # df = df[df.date.isin(trading_hours.index.date)]
    df.set_index('date', inplace=True, drop=False)
    # df = df.reindex(minute_ranges)

    return df



In [27]:
ETFs=pd.read_csv("../data/ETF1s.csv")
for etf in ETFs['Symbol']:
    if os.path.exists(f"../data/daily_data/{etf}_daily.csv"):
        continue
    print(etf)
    etfKline=init_db_df(etf,"2010-01-01","2025-02-01")
    etfKline.to_csv(f"../data/daily_data/{etf}_daily.csv")

SPTL


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


SCHR


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


SOXL


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


GLDM


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


VGLT


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


SPIB


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


IJJ


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


QYLD


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


SPSB


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


SDVY


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


CALF


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


SCHI


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


FNGU


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


IJS


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


JNK


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


QLD


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


SPHY


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


IAGG


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


TLH


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


SPYD


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


JGLO


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


IOO


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


ARKK


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


SHYG


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


IBB


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


XBI


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


SPTS


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


TSLL


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


SSO


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


BOND


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


JCPB


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


IHI


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


SJNK


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


KRE


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


PPA


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


LMBS


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


FIXD


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


BSCQ


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


IEUR


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


BSCP


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


IDV


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


SLYG


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


IYR


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


BBEU


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


URA


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


AIRR


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


FEZ


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


IBDQ


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


CGXU


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


IBDR


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


EWU


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


IBDS


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


VAW


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


IHDG


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


XAR


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


GNR


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


ITB


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


COPX


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


EWC


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


BSCR


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


IBDT


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


ASHR


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


FTSL


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


DFIS


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


PFFD


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


NVD


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


PHO


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


GRID


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


TDTT


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


PFXF


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


VRP


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


FLJP


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


MLPA


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


JBBB


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


FLIN


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


FUTY


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


FPEI


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


DFNM


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


PFFA


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


DBC


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


BCI


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


TIPX


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


PCY


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


RWO


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


GSG


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


NLR


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


INDY


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


DLS


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


TAN


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


FMHI


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


AIA


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


HAUZ


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


FWD


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


MLN


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


MMIN


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


NBOS


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


FLMI


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


TMFG


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


FHEQ


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


NTSI


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


LEMB


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


AGOX


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


WIP


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


PSP


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


TBFG


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


UVXY


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


TBFC


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


IGHG


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


FCAL


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


SPSK


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


DGT


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


BSMP


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


CEFS


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


QQQY


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


BSMQ


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


SVXY


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


TACK


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


BSMR


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


IBND


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


VIXY


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


TDSC


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


IBMR


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


WTMF


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


FMF


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


DGP


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


CVY


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


QIS


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


HIPS


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


FXA


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


FXC


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


SJB


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


FXB


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


TPMN


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


CATF


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


MARB


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


ZSL


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


EAOA


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


MMCA


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


LIAX


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


LIAO


  ETH.append(pd.date_range(start=row['start'], end=row['end'], freq='T', inclusive="left").tz_localize(None))


In [6]:
ETFs=pd.read_csv('../data/ETFs.csv')

download_single_ticker_data('SPY', 'SPY.csv')
# for etf in ETFs['Symbol']:
    # download_single_ticker_data(etf,  f"{etf}.csv")

正在下载 SPY 的数据...


Failed to get ticker 'SPY' reason: Expecting value: line 1 column 1 (char 0)
[*********************100%***********************]  1 of 1 completed

1 Failed download:
['SPY']: YFTzMissingError('$%ticker%: possibly delisted; no timezone found')


无法下载 SPY 的数据，请检查代码或日期范围。
