
## Package Import

In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.callbacks import EarlyStopping
import keras_tuner as kt
import numpy as np
import pandas as pd
import pickle


In [2]:
gpus = tf.config.list_physical_devices('GPU')
if gpus:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print("GPU exists ")
else:
    print("GPU does not exist")


GPU exists 


## Data Engineering

In [None]:
df=pd.read_csv('调整后月频回归的面板数据.csv',header=0)

In [None]:
n_steps = 3  
features = ['CI005001', 'CI005002',
       'CI005003', 'CI005004', 'CI005005', 'CI005006', 'CI005007', 'CI005008',
       'CI005009', 'CI005010', 'CI005011', 'CI005012', 'CI005013', 'CI005014',
       'CI005015', 'CI005016', 'CI005017', 'CI005018', 'CI005019', 'CI005020',
       'CI005021', 'CI005022', 'CI005023', 'CI005024', 'CI005025', 'CI005026',
       'CI005027', 'CI005028', 'CI005029', 'CI005030', 'Analyst Sentiment',
       'Beta', 'Book-to-Price', 'Dividend Yield', 'Earnings Quality',
       'Earnings Variability', 'Earnings Yield', 'Growth', 'Industry Momentum',
       'Investment Quality', 'Leverage', 'Liquidity', 'Long-Term Reversal',
       'Mid Capitalization', 'Momentum', 'Profitability',
       'Residual Volatility', 'Seasonality', 'Short-Term Reversal', 'Size'] 
target = 'next_Rtn'


In [None]:
# 设定分割线
split_date = pd.to_datetime('2022-12-31')

X_train_list, y_train_list = [], []
X_test_list, y_test_list = [], []

for stock, group in df.groupby('S_INFO_WINDCODE'):
    group = group.sort_values('TRADE_DT').reset_index(drop=True)
    arr_features = group[features].values
    arr_dates = group['TRADE_DT'].values
    arr_target = group[target].values

    for i in range(n_steps, len(group)):
        end_date = arr_dates[i]

        window_x = arr_features[i-n_steps:i, :]  
        window_y = arr_target[i]


        if end_date <= split_date:
            X_train_list.append(window_x)
            y_train_list.append(window_y)
        else:
            X_test_list.append(window_x)
            y_test_list.append(window_y)


X_train = np.array(X_train_list)
y_train = np.array(y_train_list)
X_test = np.array(X_test_list) 
y_test = np.array(y_test_list)

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)


with open('train_data.pkl', 'wb') as f:
    pickle.dump((X_train, y_train), f)

with open('test_data.pkl', 'wb') as f:
    pickle.dump((X_test, y_test), f)

(426410, 3, 50) (426410,)
(72263, 3, 50) (72263,)


## Model Building

In [82]:
with open('train_data.pkl', 'rb') as f:
    X_train, y_train = pickle.load(f)
with open('test_data.pkl', 'rb') as f:
    X_test, y_test = pickle.load(f)

In [None]:
def build_model(hp):
    model = keras.Sequential()
    model.add(layers.LSTM(units=hp.Int('lstm_units_1', min_value=32, max_value=128, step=32),
                          return_sequences=True,
                          input_shape=(n_steps, len(features))))
    model.add(layers.Dropout(rate=hp.Float('dropout_1', min_value=0.1, max_value=0.5, step=0.1)))
    model.add(layers.LSTM(units=hp.Int('lstm_units_2', min_value=32, max_value=128, step=32)))
    model.add(layers.Dropout(rate=hp.Float('dropout_2', min_value=0.1, max_value=0.5, step=0.1)))
    model.add(layers.Dense(1))
    
    lr = hp.Choice('learning_rate', values=[1e-4])
    model.compile(optimizer=keras.optimizers.Adam(learning_rate=lr,clipnorm=1.0), loss='mse', metrics=['mae'])
    model.summary
    return model


In [84]:
early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
hp=kt.HyperParameters()
model=build_model(hp)
history = model.fit(X_train, y_train, epochs=10, batch_size=32,
                    validation_split=0.1, callbacks=[early_stop])

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [88]:
test_loss, test_mae = model.evaluate(X_test, y_test)
print("MSE:", test_loss)
print("MAE:", test_mae)

MSE: 0.015331553295254707
MAE: 0.08507969975471497


In [None]:
model.save('lstm_model.h5')

模型已保存至 lstm_model.h5
