In [2]:
import tushare as ts
from dotenv import load_dotenv
import os
Tushare_API_Key=os.getenv("Tushare_API_Key")



In [None]:


import tushare as ts
import pandas as pd
import time
from datetime import datetime
import os
CSI300_index_code = '399300.SZ'
CSI500_index_code = '399905.SZ'
pro = ts.pro_api(Tushare_API_Key) 


index_code = CSI500_index_code

file_name = "data/csi500_weights_201401_202509.csv"

# 如果文件存在则不写表头，否则写表头
write_header = not os.path.exists(file_name)

start_year, start_month = 2014, 1
end_year, end_month = 2025, 9

for year in range(start_year, end_year + 1):
    m_start = start_month if year == start_year else 1
    m_end   = end_month   if year == end_year else 12

    for month in range(m_start, m_end + 1):
        # 计算当月第一天与最后一天
        start_date = f"{year}{month:02d}01"
        if month == 12:
            next_month = datetime(year + 1, 1, 1)
        else:
            next_month = datetime(year, month + 1, 1)
        last_day = (next_month - pd.Timedelta(days=1)).day
        end_date = f"{year}{month:02d}{last_day:02d}"

        print(f"获取 {year}-{month:02d} 数据...")
        try:
            df = pro.index_weight(index_code=index_code,
                                  start_date=start_date,
                                  end_date=end_date)
            if not df.empty:
                df["year"] = year
                df["month"] = month
                # 追加保存
                df.to_csv(file_name,
                          mode="a",
                          index=False,
                          encoding="utf-8-sig",
                          header=write_header)
                # 第一次写入后，之后都不写表头
                write_header = False
        except Exception as e:
            print(f"{year}-{month:02d} 获取失败：", e)

        # 防止接口调用过快
        time.sleep(1)

print(f"数据获取完成，已保存到: {file_name}")


获取 2014-01 数据...
获取 2014-02 数据...


KeyboardInterrupt: 

In [None]:
import pandas as pd
from typing import List, Tuple

def get_listed_periods(
    stocks_date_list: pd.Series,
    all_date_list: pd.Series
) -> List[Tuple[str, str]]:
    """
    Find the consecutive intervals of a stock in the CSI300.
    stocks_date_list: Sequence of stock dates in the CSI300 (ascending order)
    all_date_list: All CSI300 trading dates (ascending order)
    return: [(start_date, end_date), ...]
    """
    date_to_idx = {d: i for i, d in enumerate(all_date_list)}

    idx = stocks_date_list.map(date_to_idx)

    breaks = idx.diff().fillna(1) != 1 

    periods = []
    start = None
    for date, is_break in zip(stocks_date_list, breaks):
        if is_break:
            if start is not None: 
                periods.append((start, prev))
            start = date  
        prev = date
    periods.append((start, prev))  

    return periods
import pandas as pd

file_in = "data/csi300_weights_201401_202509.csv"
file_out = "data/csi300_entry_exit.txt"   # 输出为txt文件，制表符分隔

# 1. 读取数据
df = pd.read_csv(file_in, dtype={'trade_date': str})

# 2. 处理代码格式: 600000.SH → SH600000
df['stock'] = df['con_code'].apply(lambda x: x[-2:] + x[:-3])

# 3. 准备全市场日期列表（升序且唯一）
all_dates = df['trade_date'].drop_duplicates().sort_values().reset_index(drop=True)
all_min, all_max = all_dates.min(), all_dates.max()

# 4. 对每只股票获取不连续区间，并补全缺失边界
records = []
for stock, g in df.groupby('stock'):
    stock_dates = g['trade_date'].drop_duplicates().sort_values().reset_index(drop=True)
    periods = get_listed_periods(stock_dates, all_dates)
    if len(periods) != 1:
        print(f"{stock} 共 {len(periods)} 个区间：{periods}")

    for start, end in periods:
        # 填充缺失值
        s = start if pd.notna(start) else all_min
        e = end   if pd.notna(end)   else all_max
        records.append((stock, s, e))

# 5. 保存结果
res = pd.DataFrame(records, columns=['stock', 'start_date', 'end_date'])
res = res.sort_values(['stock', 'start_date'])
res.to_csv(file_out, sep='\t', header=False, index=False)
print(f"已生成：{file_out}")


SH600027 共 2 个区间：[('20181228', '20210601'), ('20240628', '20250901')]
SH600219 共 2 个区间：[('20171229', '20201201'), ('20220630', '20250901')]
SH601117 共 2 个区间：[('20170630', '20210601'), ('20220630', '20250901')]
SH601238 共 2 个区间：[('20151231', '20160601'), ('20180629', '20250901')]
SH601872 共 2 个区间：[('20201231', '20211201'), ('20230630', '20250901')]
SH601898 共 2 个区间：[('20171229', '20201201'), ('20211231', '20250901')]
SH601991 共 2 个区间：[('20150630', '20161201'), ('20171229', '20190603')]
SZ000723 共 2 个区间：[('20191231', '20210601'), ('20221230', '20231201')]
SZ000983 共 2 个区间：[('20161230', '20190603'), ('20230630', '20250901')]
SZ002422 共 2 个区间：[('20181228', '20210601'), ('20241231', '20250901')]
已生成：data/csi300_entry_exit.txt
