In [1]:
import pandas as pd
import numpy as np
import os
import warnings

warnings.filterwarnings('ignore')

# 股票数据

In [2]:
stock_names = os.listdir('./股价数据/')
columns = ['Date', '开盘价', '收盘价', '最低价', '最高价', '成交量', '成交笔数', '成交额', '换手率']

In [3]:
# HDF文件存储
stock_price_h5 = pd.HDFStore('stock_price.h5', 'w')
for name in stock_names:
    tmp_df = pd.read_csv('./股价数据/' + name, encoding='gbk')
    tmp_df = tmp_df[columns]
    tmp_df.Date = pd.to_datetime(tmp_df.Date)
    tmp_df = tmp_df.drop_duplicates(subset=['Date'])
    tmp_df = tmp_df.set_index('Date', drop=True)
    tmp_df = tmp_df.dropna(how='all')
    stock_price_h5[name[:-4]] = tmp_df
stock_price_h5.close()

In [4]:
# 读取方法演示
print(name[0:-4])
pd.read_hdf('stock_price.h5', key=name[0:-4])

XOM.N


Unnamed: 0_level_0,开盘价,收盘价,最低价,最高价,成交量,成交笔数,成交额,换手率
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2010-01-04,68.72,69.15,68.190,69.260,27811317.0,,,0.4872
2010-01-05,69.19,69.42,68.800,69.450,30178588.0,,,0.5287
2010-01-06,69.45,70.02,69.340,70.600,35047453.0,,,0.6140
2010-01-07,69.90,69.80,69.415,70.060,27194133.0,,,0.4764
2010-01-08,69.69,69.52,69.220,69.750,24899483.0,,,0.4362
...,...,...,...,...,...,...,...,...
2022-04-25,82.37,82.26,79.290,82.655,37698264.0,,3.057357e+09,0.8905
2022-04-26,82.73,82.29,82.180,84.720,28941487.0,,2.407514e+09,0.6836
2022-04-27,83.39,84.64,82.280,85.475,32773024.0,,2.762729e+09,0.7741
2022-04-28,84.88,87.20,84.070,88.135,33683758.0,,2.908711e+09,0.7956


# 国内指数

In [5]:
file_names = os.listdir('./国内指数/')
df0 = pd.read_excel('./国内指数/' + file_names[0])
for name in file_names[1:]:
    df0 = df0.append(pd.read_excel('./国内指数/' + name))
df0

Unnamed: 0,交易所指数代码,交易日期,开盘指数,最高指数,最低指数,收盘指数
0,16,2007-02-05,1943.060,1956.790,1899.680,1906.030
1,16,2007-02-06,1901.580,1955.710,1824.600,1954.160
2,16,2007-02-07,1964.260,2029.990,1964.260,2001.370
3,16,2007-02-08,2006.750,2031.540,1980.320,2023.180
4,16,2007-02-09,2024.900,2027.850,1987.010,2003.320
...,...,...,...,...,...,...
2923,399905,2019-01-28,4341.492,4370.898,4297.963,4307.220
2924,399905,2019-01-29,4301.469,4301.469,4188.594,4254.288
2925,399905,2019-01-30,4235.275,4266.881,4211.191,4211.623
2926,399905,2019-01-31,4203.977,4240.716,4151.414,4176.474


In [6]:
df0 = df0.sort_values(['交易所指数代码', '交易日期'])
df0['交易日期'] = pd.to_datetime(df0['交易日期'])
domestic_idx = pd.HDFStore('domestic_idx.h5', 'w')
for idx in df0['交易所指数代码'].unique():
    tmp = df0[df0['交易所指数代码']==idx]
    tmp = tmp.set_index('交易日期', drop=True)
    tmp = tmp.drop('交易所指数代码', axis=1)
    if idx == 16:
        domestic_idx['sz50'] = tmp
    elif idx == 300:
        domestic_idx['hs300'] = tmp
    else:
        domestic_idx['zz500'] = tmp
domestic_idx.close()

In [7]:
pd.read_hdf('domestic_idx.h5', key='hs300')

Unnamed: 0_level_0,开盘指数,最高指数,最低指数,收盘指数
交易日期,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2007-02-05,2282.770,2304.320,2247.930,2271.800
2007-02-06,2271.370,2316.700,2198.900,2316.040
2007-02-07,2330.430,2390.870,2330.430,2369.790
2007-02-08,2376.630,2415.960,2354.700,2410.600
2007-02-09,2412.640,2417.640,2376.020,2397.250
...,...,...,...,...
2023-01-30,4267.625,4268.152,4197.860,4201.345
2023-01-31,4200.581,4212.735,4153.048,4156.858
2023-02-01,4170.132,4195.933,4137.766,4195.933
2023-02-02,4207.412,4207.412,4170.557,4181.149


# 国外指数

In [8]:
indexcds = ['DJI', 'SPX', 'IXIC', 'NDX']
df0 = pd.read_csv('国际指数.csv')
df0.Trddt = pd.to_datetime(df0.Trddt)
international_idx = pd.HDFStore('international_idx.h5', 'w')
for idx in indexcds:
    tmp = df0[df0['Indexcd']==idx]
    tmp = tmp.set_index('Trddt', drop=True)
    tmp = tmp.drop('Indexcd', axis=1)
    if idx == 'DJI':
        international_idx['DJI'] = tmp
    elif idx == 'SPX':
        international_idx['SPX'] = tmp
    elif idx == 'IXIC':
        international_idx['IXIC'] = tmp
    else:
        international_idx['NDX'] = tmp
international_idx.close()

In [9]:
pd.read_hdf('international_idx.h5', key='SPX')

Unnamed: 0_level_0,Opnidx,Highidx,Lowidx,Clsidx
Trddt,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2009-01-02,902.99,934.73,899.35,931.80
2009-01-05,929.17,936.63,919.53,927.45
2009-01-06,931.17,943.85,927.28,934.70
2009-01-07,927.45,927.45,902.37,906.65
2009-01-08,905.73,910.00,896.81,909.73
...,...,...,...,...
2023-01-27,4053.81,4094.21,4048.70,4070.56
2023-01-30,4049.45,4063.85,4015.55,4017.77
2023-01-31,4020.29,4077.16,4020.29,4076.60
2023-02-01,4070.20,4148.95,4037.20,4119.21
