In [25]:
import pandas as pd
import numpy as np
import re
import math
from collections import defaultdict
import matplotlib.pyplot as plt
import datetime
import os
import logging
from scipy.optimize import minimize
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from tqdm import tqdm

In [3]:
import warnings
from IPython.core.interactiveshell import InteractiveShell
warnings.filterwarnings('ignore')
InteractiveShell.ast_node_interactivity = "all" 

# 数据读取

In [26]:
data = pd.read_excel("SPX.xlsx")
data['日期'] = pd.to_datetime(data['日期']).dt.date
data.columns = ['Code', 'Name', 'Date', 'Open', 'High', 'Low', 'Close', 'Change', 'Volume', 'Turnover_Mil']
data = data[data['Date'] >=  datetime.date(2020, 1, 1)]
# data['Date'] = data['Date'].dt.date
data

Unnamed: 0,Code,Name,Date,Open,High,Low,Close,Change,Volume,Turnover_Mil
23368,SPX.GI,标普500,2020-01-02,3244.67,3258.14,3235.53,3257.85,0.0084,5.902449e+09,0.00
23369,SPX.GI,标普500,2020-01-03,3226.36,3246.15,3222.34,3234.85,-0.0071,5.637120e+09,0.00
23370,SPX.GI,标普500,2020-01-06,3217.55,3246.84,3214.64,3246.28,0.0035,6.048874e+09,0.00
23371,SPX.GI,标普500,2020-01-07,3241.86,3244.91,3232.43,3237.18,-0.0028,5.512627e+09,0.00
23372,SPX.GI,标普500,2020-01-08,3238.59,3267.07,3236.67,3253.05,0.0049,5.806283e+09,0.00
...,...,...,...,...,...,...,...,...,...,...
24722,SPX.GI,标普500,2025-04-28,5529.22,5553.66,5468.64,5528.75,0.0006,2.750592e+09,314036.58
24723,SPX.GI,标普500,2025-04-29,5508.87,5571.95,5505.70,5560.83,0.0058,2.852734e+09,295142.68
24724,SPX.GI,标普500,2025-04-30,5499.44,5581.84,5433.24,5569.06,0.0015,3.745265e+09,401116.95
24725,SPX.GI,标普500,2025-05-01,5625.14,5658.91,5597.35,5604.14,0.0063,3.216172e+09,386018.52


# 数据处理

In [None]:
folder = 'text_data'
filenames = os.listdir(folder)
date_pattern = re.compile(r'(\d{8})')
fomc_dates = []

for f in filenames:
    match = date_pattern.search(f)
    if match:
        try:
            fomc_dates.append(pd.to_datetime(match.group(1), format='%Y%m%d'))
        except Exception as e:
            print(f"Error parsing {f}: {e}")

fomc_dates = sorted(fomc_dates)
fomc_dates = [d.date() for d in fomc_dates]
fomc_dates

In [22]:
results = []

for fomc_date in fomc_dates:
    print("fomc_date:",fomc_date)
    after_dates = data[data['Date'] > fomc_date] # find next tradingdate
    print("after_dates:",after_dates)
    if not after_dates.empty:
        next_trading_day = after_dates.iloc[0]
        previous_day = data[data['Date'] <= fomc_date].iloc[-1]
        
        change = (next_trading_day['Close'] - previous_day['Close']) / previous_day['Close']
        
        results.append({
            'FOMC_Date': fomc_date.date(),
            'Prev_Close': previous_day['Close'],
            'Next_Trading_Date': next_trading_day['Date'].date(),
            'Next_Close': next_trading_day['Close'],
            'Change_%': round(change * 100, 2)
        })

# 4. 转为 DataFrame
fomc_change_df = pd.DataFrame(results)


fomc_date: 2020-01-29 00:00:00
after_dates:          Code   Name                Date     Open     High      Low    Close  \
23387  SPX.GI  标普500 2020-01-29 01:00:00  3289.46  3293.47  3271.89  3273.40   
23388  SPX.GI  标普500 2020-01-30 01:00:00  3256.45  3285.91  3242.80  3283.66   
23389  SPX.GI  标普500 2020-01-31 01:00:00  3282.33  3282.33  3214.68  3225.52   
23390  SPX.GI  标普500 2020-02-03 01:00:00  3235.66  3268.44  3235.66  3248.92   
23391  SPX.GI  标普500 2020-02-04 01:00:00  3280.61  3306.92  3280.61  3297.59   
...       ...    ...                 ...      ...      ...      ...      ...   
24722  SPX.GI  标普500 2025-04-28 00:00:00  5529.22  5553.66  5468.64  5528.75   
24723  SPX.GI  标普500 2025-04-29 00:00:00  5508.87  5571.95  5505.70  5560.83   
24724  SPX.GI  标普500 2025-04-30 00:00:00  5499.44  5581.84  5433.24  5569.06   
24725  SPX.GI  标普500 2025-05-01 00:00:00  5625.14  5658.91  5597.35  5604.14   
24726  SPX.GI  标普500 2025-05-02 00:00:00  5645.88  5700.70  5642.28  5686.67

In [20]:
fomc_change_df

Unnamed: 0,FOMC_Date,Prev_Close,Next_Trading_Date,Next_Close,Change_%
0,2020-01-29,3276.24,2020-01-29,3273.4,-0.09
1,2020-03-15,2711.02,2020-03-16,2386.13,-11.98
2,2020-04-29,2939.51,2020-04-30,2912.43,-0.92
3,2020-06-10,3190.14,2020-06-11,3002.1,-5.89
4,2020-07-29,3258.44,2020-07-30,3246.22,-0.38
5,2020-09-16,3385.49,2020-09-17,3357.01,-0.84
6,2020-11-05,3443.44,2020-11-05,3510.45,1.95
7,2020-12-16,3694.62,2020-12-16,3701.17,0.18
8,2021-01-27,3849.62,2021-01-27,3750.77,-2.57
9,2021-03-17,3974.12,2021-03-18,3915.46,-1.48
