In [20]:
from jqdata import *
import pandas as pd
import numpy as np
import os
from datetime import datetime, timedelta

In [None]:
#获取近一年的大小市值界限
def ensure_dir(p):
    if not os.path.exists(p):
        os.makedirs(p)

def get_valid_codes(d):
    sec = get_all_securities(types=['stock'], date=d)
    codes = sec.index.tolist()
    st = get_extras('is_st', codes, count=1, end_date=d)
    codes = st.columns[st.iloc[0].eq(False)].tolist()
    try:
        p = get_price(codes, start_date=d, end_date=d, frequency='daily', fields=['paused'])
        paused_map = p.set_index('code')['paused'].astype(bool).to_dict()
        codes = [c for c in codes if not paused_map.get(c, False)]
    except:
        pass
    sec = sec.loc[codes]
    end_mask = pd.to_datetime(sec['end_date']) > pd.to_datetime(d)
    codes = sec[end_mask].index.tolist()
    return codes

def get_daily_groups(d, out_root='jq_output'):
    codes = get_valid_codes(d)
    if not codes:
        return None
    q = query(valuation.code, valuation.circulating_market_cap, valuation.market_cap).filter(valuation.code.in_(codes))
    f = get_fundamentals(q, date=d)
    if f is None or f.empty:
        return None
    f['market_cap'] = pd.to_numeric(f['circulating_market_cap'], errors='coerce')
    f['market_cap'] = f['market_cap'].fillna(pd.to_numeric(f['market_cap'], errors='coerce'))
    f = f.dropna(subset=['market_cap']).drop_duplicates(subset=['code'])
    if f.empty:
        return None
    f = f.sort_values('market_cap')
    n = len(f)
    k = max(1, int(np.floor(n * 0.2)))
    small = f.head(k)[['code', 'market_cap']]
    large = f.tail(k)[['code', 'market_cap']]
    small_upper = float(pd.to_numeric(small['market_cap'], errors='coerce').max()) if len(small) else float('nan')
    large_lower = float(pd.to_numeric(large['market_cap'], errors='coerce').min()) if len(large) else float('nan')
    return {'trade_date': d, 'small_upper': small_upper, 'large_lower': large_lower, 'count': n}

start = '2025-01-01'
end = '2025-12-29'
days = get_trade_days(start_date=start, end_date=end)
rows = []
for d in days:
    info = get_daily_groups(d, out_root='jq_output')
    if info is not None:
        rows.append(info)
if rows:
    out_groups = os.path.join('jq_output')
    ensure_dir(out_groups)
    out_df = pd.DataFrame(rows)[['trade_date', 'small_upper', 'large_lower']]
    out_df.to_csv(os.path.join(out_groups, 'boundaries.csv'), index=False, encoding='utf-8')


In [15]:
#获取近一年的大市值成分股与小市值成分股(去除st)
from jqdata import *
import pandas as pd
import numpy as np
import os

def ensure_dir(p):
    if not os.path.exists(p):
        os.makedirs(p)

def get_valid_codes(d):
    sec = get_all_securities(types=['stock'], date=d)
    codes = sec.index.tolist()
    st = get_extras('is_st', codes, count=1, end_date=d)
    codes = st.columns[st.iloc[0].eq(False)].tolist()
    try:
        p = get_price(codes, start_date=d, end_date=d, frequency='daily', fields=['paused'], panel=False)
        paused_map = p.set_index('code')['paused'].astype(bool).to_dict()
        codes = [c for c in codes if not paused_map.get(c, False)]
    except:
        pass
    sec = sec.loc[codes]
    end_mask = pd.to_datetime(sec['end_date']) > pd.to_datetime(d)
    codes = sec[end_mask].index.tolist()
    return codes

def get_daily_groups(d, out_root='jq_output'):
    codes = get_valid_codes(d)
    if not codes:
        return
    q = query(valuation.code, valuation.circulating_market_cap, valuation.market_cap).filter(valuation.code.in_(codes))
    f = get_fundamentals(q, date=d)
    if f is None or f.empty:
        return
    f['market_cap'] = pd.to_numeric(f['circulating_market_cap'], errors='coerce')
    f['market_cap'] = f['market_cap'].fillna(pd.to_numeric(f['market_cap'], errors='coerce'))
    f = f.dropna(subset=['market_cap']).drop_duplicates(subset=['code'])
    if f.empty:
        return
    f = f.sort_values('market_cap')
    n = len(f)
    k = max(1, int(np.floor(n * 0.2)))
    small = f.head(k)[['code', 'market_cap']]
    large = f.tail(k)[['code', 'market_cap']]
    out_dir_small = os.path.join(out_root, 'small')
    out_dir_large = os.path.join(out_root, 'large')
    ensure_dir(out_dir_small)
    ensure_dir(out_dir_large)
    small.to_csv(os.path.join(out_dir_small, f'{d}.csv'), index=False, encoding='utf-8')
    large.to_csv(os.path.join(out_dir_large, f'{d}.csv'), index=False, encoding='utf-8')

start = '2025-01-01'
end = '2025-12-29'
days = get_trade_days(start_date=start, end_date=end)
for d in days:
    get_daily_groups(d, out_root='jq_output')

In [16]:
#在近一年的成分股中添加股票当天的涨跌幅
def calcPctChange(date, codes, batch_size=400):
    ret = {}
    if not codes:
        return ret

    for i in range(0, len(codes), batch_size):
        batch = codes[i:i+batch_size]
        try:
            data = get_price(
                batch, end_date=date, count=2, frequency='daily',
                fields=['close'], skip_paused=True
            )
        except Exception:
            data = None

        # 1) dict: {code: DataFrame}
        if isinstance(data, dict):
            for code in batch:
                df = data.get(code, pd.DataFrame())
                if df is None or df.empty or 'close' not in df.columns or len(df) < 2:
                    ret[code] = np.nan
                    continue
                c2 = df['close'].iloc[-1]
                c1 = df['close'].iloc[-2]
                ret[code] = (c2 / c1 - 1.0) * 100.0 if pd.notna(c2) and pd.notna(c1) and c1 != 0 else np.nan
            continue

        # 2) MultiIndex DataFrame: columns=(field, code)
        if isinstance(data, pd.DataFrame) and isinstance(getattr(data, 'columns', None), pd.MultiIndex):
            lv0 = data.columns.get_level_values(0)
            if 'close' in lv0:
                close_df = data.loc[:, lv0 == 'close']
                for code in batch:
                    col = ('close', code)
                    if col in close_df.columns and len(close_df[col]) >= 2:
                        c2 = close_df[col].iloc[-1]
                        c1 = close_df[col].iloc[-2]
                        ret[code] = (c2 / c1 - 1.0) * 100.0 if pd.notna(c2) and pd.notna(c1) and c1 != 0 else np.nan
                    else:
                        ret[code] = np.nan
                continue

        # 3) 回退：逐只获取
        for code in batch:
            try:
                df = get_price(code, end_date=date, count=2, frequency='daily',
                               fields=['close'], skip_paused=True)
            except Exception:
                df = pd.DataFrame()
            if df is None or df.empty or 'close' not in df.columns or len(df) < 2:
                ret[code] = np.nan
                continue
            c2 = df['close'].iloc[-1]
            c1 = df['close'].iloc[-2]
            ret[code] = (c2 / c1 - 1.0) * 100.0 if pd.notna(c2) and pd.notna(c1) and c1 != 0 else np.nan

    return ret

def addPctChangeToFile(csv_path):
    if not os.path.isfile(csv_path):
        return csv_path
    df = pd.read_csv(csv_path)
    if 'code' not in df.columns:
        return csv_path
    date = os.path.splitext(os.path.basename(csv_path))[0]
    codes = df['code'].astype(str).tolist()
    m = calcPctChange(date, codes)
    df['pct_change'] = df['code'].map(m)
    df.to_csv(csv_path, index=False)
    return csv_path

def addPctChangeToFolder(root_dir):
    for sd in ['small','large']:
        d = os.path.join(root_dir, sd)
        if not os.path.isdir(d):
            continue
        for f in os.listdir(d):
            if f.endswith('.csv'):
                addPctChangeToFile(os.path.join(d, f))

addPctChangeToFolder('jq_output')

In [21]:
#获取大小市值成分股的15分钟K线的'open','close','high','low','volume','money'数据
def readCodes(csv_path):
    df = pd.read_csv(csv_path)
    return df['code'].dropna().astype(str).unique().tolist()

def dayTimeWindow(date):
    return date + " 09:30:00", date + " 15:00:00"

def toFrame(code, df, fields):
    if df is None or df.empty:
        return None
    if not isinstance(df, pd.DataFrame):
        return None
    df2 = df.copy()
    if df2.index.name is not None and df2.index.name not in df2.columns:
        df2 = df2.reset_index()
    else:
        df2 = df2.reset_index()
    cols = list(df2.columns)
    if 'time' not in cols:
        if 'datetime' in cols:
            df2 = df2.rename(columns={'datetime':'time'})
        elif 'date' in cols:
            df2 = df2.rename(columns={'date':'time'})
        elif 'index' in cols:
            df2 = df2.rename(columns={'index':'time'})
        else:
            df2['time'] = pd.to_datetime(df.index).astype(str)
    for f in fields:
        if f not in df2.columns:
            df2[f] = np.nan
    df2['code'] = code
    return df2[['time','code'] + fields]

def collectMin15(date, codes, batch_size=100):
    fields = ['open','close','high','low','volume','money']
    start_dt, end_dt = dayTimeWindow(date)
    out = []

    if not codes:
        return pd.DataFrame(columns=['time','code']+fields)

    for i in range(0, len(codes), batch_size):
        batch = codes[i:i+batch_size]
        got = False
        try:
            data = get_price(batch, start_date=start_dt, end_date=end_dt, frequency='15m', fields=fields)
            got = True
        except Exception:
            data = None

        if got and isinstance(data, dict) and len(data) > 0:
            for code in batch:
                df = data.get(code, pd.DataFrame())
                if df is None or df.empty:
                    continue
                df = df.loc[(df.index.astype(str).str[:10] == date)]
                fr = toFrame(code, df, fields)
                if fr is not None and not fr.empty:
                    out.append(fr)
            continue

        if got and isinstance(data, pd.DataFrame) and isinstance(getattr(data, 'columns', None), pd.MultiIndex):
            lv0 = data.columns.get_level_values(0)
            idx_vals = data.index
            for code in batch:
                df2 = pd.DataFrame({'time': idx_vals})
                ok = False
                for f in fields:
                    col = (f, code)
                    if col in data.columns:
                        df2[f] = data[col].values
                        ok = True
                    else:
                        df2[f] = np.nan
                if ok:
                    df2['code'] = code
                    df2 = df2.loc[(pd.Series(df2['time']).astype(str).str[:10] == date)]
                    if not df2.empty:
                        out.append(df2[['time','code']+fields])
            continue

        try:
            data2 = get_price(batch, end_date=end_dt, count=16, frequency='15m', fields=fields)
        except Exception:
            data2 = None

        if isinstance(data2, dict) and len(data2) > 0:
            for code in batch:
                df = data2.get(code, pd.DataFrame())
                if df is None or df.empty:
                    continue
                df = df.loc[(df.index.astype(str).str[:10] == date)]
                fr = toFrame(code, df, fields)
                if fr is not None and not fr.empty:
                    out.append(fr)
            continue

        for code in batch:
            df = pd.DataFrame()
            try:
                df = get_price(code, start_date=start_dt, end_date=end_dt, frequency='15m', fields=fields)
            except Exception:
                pass
            if df is None or df.empty:
                try:
                    df = get_price(code, end_date=end_dt, count=16, frequency='15m', fields=fields)
                except Exception:
                    df = pd.DataFrame()
            if df is None or df.empty:
                continue
            df = df.loc[(df.index.astype(str).str[:10] == date)]
            fr = toFrame(code, df, fields)
            if fr is not None and not fr.empty:
                out.append(fr)

    if not out:
        return pd.DataFrame(columns=['time','code']+fields)
    ret = pd.concat(out, ignore_index=True)
    ret = ret.drop_duplicates(subset=['time','code']).sort_values(['code','time'])
    return ret

def dateList(start_date, end_date):
    s = datetime.strptime(start_date, "%Y-%m-%d")
    e = datetime.strptime(end_date, "%Y-%m-%d")
    d = []
    cur = s
    while cur <= e:
        d.append(cur.strftime("%Y-%m-%d"))
        cur += timedelta(days=1)
    return d

def runMin15(root_dir='jq_output', start_date='2025-12-01', end_date='2025-12-29', out_root='jq_output_min15'):
    os.makedirs(os.path.join(out_root, 'small'), exist_ok=True)
    os.makedirs(os.path.join(out_root, 'large'), exist_ok=True)
    for d in dateList(start_date, end_date):
        for grp in ['small','large']:
            src = os.path.join(root_dir, grp, f"{d}.csv")
            if not os.path.isfile(src):
                continue
            codes = readCodes(src)
            df = collectMin15(d, codes)
            if df.empty:
                continue
            df.to_csv(os.path.join(out_root, grp, f"{d}.csv"), index=False)
            
runMin15(root_dir='jq_output', start_date='2025-10-08', end_date='2025-11-28', out_root='jq_output_min15')

Panel is deprecated and will be removed in a future version.
The recommended way to represent these types of 3-dimensional data are with a MultiIndex on a DataFrame, via the Panel.to_frame() method
Alternatively, you can use the xarray package http://xarray.pydata.org/en/stable/.
Pandas provides a `.to_xarray()` method to help automate this conversion.

  round=round)


In [None]:
#获取沪深300和中证1000的分钟级K线的'open','close','high','low','volume','money'数据
def fetchIndexMin15(out_dir='indices_min15', start_date=None, end_date=None):
    codes = ['000300.XSHG','000852.XSHG']
    fields = ['open','close','high','low','volume','money']
    if end_date is None:
        end_date = datetime.now().strftime('%Y-%m-%d')
    if start_date is None:
        start_date = (datetime.now() - timedelta(days=31)).strftime('%Y-%m-%d')
    os.makedirs(out_dir, exist_ok=True)
    frames = []
    for code in codes:
        try:
            df = get_price(code, start_date=start_date, end_date=end_date, frequency='15m', fields=fields)
        except Exception:
            df = pd.DataFrame(columns=fields)
        if df is None or df.empty:
            continue
        orig_index = df.index
        df = df.reset_index()
        if 'time' not in df.columns:
            if orig_index.name and orig_index.name in df.columns:
                df = df.rename(columns={orig_index.name: 'time'})
            elif 'datetime' in df.columns:
                df = df.rename(columns={'datetime': 'time'})
            elif 'date' in df.columns:
                df = df.rename(columns={'date': 'time'})
            elif 'index' in df.columns:
                df = df.rename(columns={'index': 'time'})
            else:
                df['time'] = pd.to_datetime(orig_index).astype(str)
        df['code'] = code
        out_df = df[['time','code'] + fields]
        out_df.to_csv(os.path.join(out_dir, f"{code}_15m_last_month.csv"), index=False)
        frames.append(out_df)
    if frames:
        all_df = pd.concat(frames, ignore_index=True)
        all_df.to_csv(os.path.join(out_dir, "indices_15m_last_month.csv"), index=False)
    return os.path.abspath(out_dir)

# 运行示例
fetchIndexMin15(out_dir='indices_min15',start_date='2025-10-08', end_date='2025-12-29')

'/home/jquser/data/indices_min15'