In [1]:
import akshare as aks
from datetime import datetime


In [15]:
from datetime import datetime, timedelta
import pandas as pd

def future_history_bar(symbol, length=100, start_date="20141231", end_date="20250228"):
    # Convert the string dates to datetime objects for future reference
    start_date = datetime.strptime(start_date, "%Y%m%d")
    end_date = datetime.strptime(end_date, "%Y%m%d")

    # Fetch the futures data
    data = aks.futures_main_sina(
        symbol=symbol, start_date=start_date.strftime("%Y%m%d"), end_date=end_date.strftime("%Y%m%d")
    )
    print(data)
    # Rename the columns for consistency
    data = data.rename(
        columns={
            "日期": "date",
            "开盘价": "open",
            "最高价": "high",
            "最低价": "low",
            "收盘价": "close",
            "成交量": "volume",
            "持仓量": "open_interest",
            "动态结算价": "settle_price",
        }
    )
    
    # Convert relevant columns to floats
    data["open"] = data["open"].astype(float)
    data["high"] = data["high"].astype(float)
    data["low"] = data["low"].astype(float)
    data["close"] = data["close"].astype(float)
    data["volume"] = data["volume"].astype(float)

    # Calculate VWAP (Volume Weighted Average Price)
    data['vwap'] = data['settle_price'] * data['volume'] / data['volume'].sum()

    # Calculate the daily percentage change in close price
    data['change'] = data['close'].pct_change()

    # Add a formatted date column (YYYYMMDD format)
    data['date'] = pd.to_datetime(data['date'])
    data['date'] = data['date'].dt.strftime('%Y/%m/%d')

    # Add stock_code column and convert it to lowercase
    # Use the lowercase version of symbol (fe) for 'code' column
    data['code'] = [symbol.lower()] * len(data)
    # Ensure the date range is continuous from start_date to end_date
    all_dates = pd.date_range(start=start_date, end=end_date, freq='D')
    data['date'] = pd.to_datetime(data['date'])
    
    # Reindex to ensure every date is covered, filling missing dates
    data = data.set_index('date').reindex(all_dates, fill_value=0).reset_index()

    # Rename the index column to 'date'
    data = data.rename(columns={"index": "date"})

    # Fill missing data: Option 1: Forward fill the missing values, or you can use backward fill or interpolation
    # In this case, since we are setting missing values as 0, we don't need further filling
    # However, we could also choose to use 'ffill' or 'bfill' if needed for specific columns

    # Ensure 'code' column values are all lowercase, just to be safe (though it's already done above)
    data['code'] = data['code'].apply(lambda x: symbol.lower())

    # Move 'stock_code' to the first column
    data.insert(1, 'code', data.pop('code'))
    data['factor'] = [1] * len(data)

    dir_name = './feature_data'
    
    # Convert the file name to lowercase
    file_name = dir_name + '/' + ''.join(symbol.split('.')).lower() + '.csv'
    
    # Save the dataframe to a CSV (with lowercase file name)
    data.to_csv(file_name, index=False)

    return data


# List of symbols
List = ['PX0', 'PX2505', 'PX2509', 'PX2506', 'PX2507', 'PX2601', 'PX2504', 'PX2508', 'PX2510', 'PX2511', 'PX2512', 'PX2602', 'PR0', 'PR2505', 'PR2506', 'PR2509', 'PR2504', 'PR2508', 'PR2507', 'PR2601', 'PR2511', 'PR2512', 'SH0', 'SH2505', 'SH2509', 'SH2506', 'SH2507', 'SH2504', 'SH2508', 'SH2601', 'SH2510', 'SH2511', 'SH2512', 'SH2602', 'PK0', 'PK2505', 'PK2510', 'PK2504', 'PK2511', 'PK2601', 
        'PK2512', 'PF0', 'PF2505', 'PF2506', 'PF2507', 'PF2504', 'PF2509', 'PF2508', 'TA0', 'TA2505', 'TA2509', 'TA2601', 'TA2504', 'TA2506', 'TA2508', 'TA2510', 'TA2511', 'TA2507', 'P0', 'V0', 'P2505', 'P2506', 'P2509', 'P2507', 'B0', 'M0', 'I0', 'JD0', 'L0', 'BB2504', 'Y0', 'C0', 'A0', 'J0', 'JM0', 'CS0', 'EG0', 'RR0', 'LH0', 'LG0', 'EB0', 'RR0', 'IF0', 'TF0', 'T2506', 'IH0', 'IC0', 'TS0', 'IM0',
        'SN0', 'NI0', 'SP0', 'NR0', 'SS0', 'BC0', 'AO0', 'BR0', 'EC0']

dir_name = './feature_data'

# Create a dictionary to hold each symbol and its time span
symbol_duration = {}

# Loop through each symbol and calculate the duration
for fe in List:
    data = future_history_bar(symbol=fe)
    
    # Calculate the number of days between start_date and end_date for each symbol
    start_date = datetime.strptime("20141231", "%Y%m%d")
    end_date = datetime.strptime("20250228", "%Y%m%d")
    duration = (end_date - start_date).days
    
    # Store the symbol and its duration
    symbol_duration[fe] = duration

# Sort symbols based on duration (longest duration first)
sorted_symbols = sorted(symbol_duration, key=symbol_duration.get, reverse=True)

# Now process the symbols in the order of longest duration first
for fe in sorted_symbols:
    data = future_history_bar(symbol=fe)
    data.to_csv(dir_name + '/' + fe.lower() + '.csv', index=False)


ValueError: Expected object or value

In [None]:
print(data)

Empty DataFrame
Columns: [date, code, open, high, low, close, volume, open_interest, settle_price, vwap, change]
Index: []


In [14]:
import numpy as np

# 假设您的 .bin 文件存储在当前目录下，文件名为 'data.bin'
data = np.fromfile('high.day.bin', dtype=np.float32)  # 根据实际数据类型修改 dtype


In [15]:
print(data.shape)

(2469,)
