In [33]:
import pandas as pd
import time
from alpha_vantage.timeseries import TimeSeries
from alpha_vantage.techindicators import TechIndicators
from datetime import datetime, timedelta
from dateutil.relativedelta import relativedelta
# 替换为你的API密钥
api_key = '64P4VM1TFHK6PN2B'



In [35]:

# 创建TimeSeries和TechIndicators对象
ts = TimeSeries(key=api_key, output_format='pandas')
ti = TechIndicators(key=api_key, output_format='pandas')





# 定义获取分钟级别数据的函数
def get_intraday_data(symbol, interval, start_date, end_date, filename):
    all_data = pd.DataFrame()
    current_date = start_date
    max_retries = 5  # 最大重试次数
    request_count = 0
    loop_breaker = 0

    while current_date < end_date and request_count < 25:
        retries = 0
        success = False
        while retries < max_retries and not success:
            try:
                month_str = current_date.strftime('%Y-%m')
                # 获取数据
                data, meta_data = ts.get_intraday(symbol=symbol, interval=interval, outputsize='full', month=month_str)
                data['date'] = pd.to_datetime(data.index)
                all_data = pd.concat([all_data, data])
                
                # 保存到临时文件
                all_data.to_csv(f'{filename}_temp.csv', mode='a', header=not request_count, index=False)
                
                # 更新日期
                #current_date += timedelta(days=30)
                current_date += relativedelta(months=1)
                success = True
                request_count += 1
                
                # 避免API调用频率限制
                time.sleep(12)  # Alpha Vantage限制每分钟最多5次请求
            except Exception as e:
                retries += 1
                print(f"Error fetching data for {current_date}: {e}, retrying ({retries}/{max_retries})")
                time.sleep(10)  # 等待一段时间后重试

                if "standard API rate limit" in str(e):
                    print("Reached daily request limit. Stopping data collection.")
                    loop_breaker += 1
                    break
        if loop_breaker == 1:
            break
        if not success:
            print(f"Failed to fetch data for {current_date} after {max_retries} retries, skipping this period")
            current_date += timedelta(days=30)  # 跳过这个时间段
    
    return all_data


In [30]:

#定义获取RSI数据的函数
def get_rsi_data(symbol, interval, filename):
    all_rsi = pd.DataFrame()
    max_retries = 5  # 最大重试次数
    current_date = datetime.now()
    request_count = 0

    for i in range(60):  # 遍历过去60个月的数据
        if request_count >= 25:
            break
        month_str = (current_date - timedelta(days=30*i)).strftime('%Y-%m')
        retries = 0
        success = False
        while retries < max_retries and not success:
            try:
                rsi_data, meta_data = ti.get_rsi(symbol=symbol, interval=interval, time_period=14, series_type='close', month=month_str)
                rsi_data['date'] = pd.to_datetime(rsi_data.index)
                all_rsi = pd.concat([all_rsi, rsi_data])
                
                # 保存到临时文件
                all_rsi.to_csv(f'{filename}_temp.csv', mode='a', header=not request_count, index=False)
                
                success = True
                request_count += 1
                time.sleep(12)  # 避免API调用频率限制
            except Exception as e:
                retries += 1
                print(f"Error fetching RSI data for {month_str}: {e}, retrying ({retries}/{max_retries})")
                time.sleep(60)  # 等待一段时间后重试

                if "standard API rate limit" in str(e):
                    print("Reached daily request limit. Stopping RSI data collection.")
                    break
        if "standard API rate limit" in str(e):
                    print("Reached daily request limit. Stopping RSI data collection.")
                    break
        if not success:
            print(f"Failed to fetch RSI data for {month_str} after {max_retries} retries, skipping this period")
    
    return all_rsi


In [36]:
# 定义参数
symbol = 'QQQ'  # 纳斯达克100 ETF代码
interval = '1min'
start_date = datetime.now() - timedelta(days=365*5)
end_date = datetime.now()
filename = 'QQQ_1min_data'

# 获取分钟级别数据并保存
data = get_intraday_data(symbol, interval, start_date, end_date, filename)

# 设置时间索引
data.index = pd.to_datetime(data.index)

# 获取不同时间间隔的RSI数据并保存
#rsi_1min = get_rsi_data(symbol, '1min', filename)
#rsi_5min = get_rsi_data(symbol, '5min', filename)
#rsi_30min = get_rsi_data(symbol, '30min', filename)
#rsi_60min = get_rsi_data(symbol, '60min', filename)

# 读取临时文件中的数据
data = pd.read_csv(f'{filename}_temp.csv')

# 使用前向填充法填充缺失值
data.fillna(method='ffill', inplace=True)

# 保存最终数据到CSV文件
data.to_csv(f'{filename}_final.csv', index=False)
print(f'Data saved to {filename}_final.csv')
print(data.head())

Error fetching data for 2019-07-20 04:07:38.802203: TimeSeries.get_intraday() got an unexpected keyword argument 'month', retrying (1/5)


KeyboardInterrupt: 