In [3]:
import matplotlib
import warnings
import pandas as pd
import os


# 忽略 FutureWarnings
warnings.simplefilter(action='ignore', category=FutureWarning)

# 设置matplotlib支持中文显示
matplotlib.rcParams['font.family'] = 'SimHei'  # 设置字体为黑体
matplotlib.rcParams['axes.unicode_minus'] = False  # 正确显示负号

# 定义文件夹路径
market_info_path = 'StockData/market_info'
market_data_path = 'StockData/market_data'

# 读取所有 market_info 文件并合并
info_dfs = []
for file_name in sorted(os.listdir(market_info_path)):
    if file_name.endswith('.csv'):
        file_path = os.path.join(market_info_path, file_name)
        info_df = pd.read_csv(file_path)
        info_dfs.append(info_df)
market_info = pd.concat(info_dfs, ignore_index=True)

# 读取所有 market_data 文件并合并
data_dfs = []
for file_name in sorted(os.listdir(market_data_path)):
    if file_name.endswith('.csv'):
        file_path = os.path.join(market_data_path, file_name)
        data_df = pd.read_csv(file_path)
        data_dfs.append(data_df)
market_data = pd.concat(data_dfs, ignore_index=True)

# 将日期列格式化，自动推断日期格式
market_info['date'] = pd.to_datetime(market_info['date'], errors='coerce')  # 如果有错误，设置为NaT
market_data['time'] = pd.to_datetime(market_data['time'], format='%Y/%m/%d %H:%M', errors='coerce')

# 合并数据：使用股票代码 'code' 以及时间/日期 'date' 和 'time' 进行合并
combined_data = pd.merge(
    market_data,
    market_info,
    left_on=['code', 'time'],
    right_on=['code', 'date'],
    how='left'
)

# 删除合并后多余的 'date' 列
combined_data = combined_data.drop(columns=['date'])

# # 填充缺失值（可以使用填充零或插值等方法）
# combined_data.fillna(method='ffill', inplace=True)  # 前向填充
# combined_data.fillna(method='bfill', inplace=True)  # 后向填充
# 或使用填充 0： combined_data.fillna(0, inplace=True)

# 打印处理后的数据
print(combined_data.head())

   Unnamed: 0_x time         code      open      high       low     close  \
0             0  NaT  szse.000001  5.662246  5.746339  5.629543  5.713636   
1             1  NaT  szse.000001  5.676262  5.680934  5.503404  5.573482   
2             2  NaT  szse.000001  5.554794  5.606184  5.372593  5.452014   
3             3  NaT  szse.000001  5.386609  5.494061  5.377265  5.433327   
4             4  NaT  szse.000001  5.437999  5.582825  5.386609  5.494061   

     volume        amount  open_interest  Unnamed: 0_y name block_name  weight  
0  489910.0  5.962237e+08            NaN           NaN  NaN        NaN     NaN  
1  551114.0  6.566313e+08            NaN           NaN  NaN        NaN     NaN  
2  582118.0  6.792804e+08            NaN           NaN  NaN        NaN     NaN  
3  338407.0  3.939776e+08            NaN           NaN  NaN        NaN     NaN  
4  457768.0  5.384362e+08            NaN           NaN  NaN        NaN     NaN  
