In [2]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.callbacks import EarlyStopping,ReduceLROnPlateau
import numpy as np
import pandas as pd
import pickle
import keras_tuner as kt

In [3]:
with open('train_data.pkl', 'rb') as f:
    X_train, y_train = pickle.load(f)
with open('test_data.pkl', 'rb') as f:
    X_test, y_test = pickle.load(f)

In [None]:
n_steps = 3  
features = ['CI005001', 'CI005002',
       'CI005003', 'CI005004', 'CI005005', 'CI005006', 'CI005007', 'CI005008',
       'CI005009', 'CI005010', 'CI005011', 'CI005012', 'CI005013', 'CI005014',
       'CI005015', 'CI005016', 'CI005017', 'CI005018', 'CI005019', 'CI005020',
       'CI005021', 'CI005022', 'CI005023', 'CI005024', 'CI005025', 'CI005026',
       'CI005027', 'CI005028', 'CI005029', 'CI005030', 'Analyst Sentiment',
       'Beta', 'Book-to-Price', 'Dividend Yield', 'Earnings Quality',
       'Earnings Variability', 'Earnings Yield', 'Growth', 'Industry Momentum',
       'Investment Quality', 'Leverage', 'Liquidity', 'Long-Term Reversal',
       'Mid Capitalization', 'Momentum', 'Profitability',
       'Residual Volatility', 'Seasonality', 'Short-Term Reversal', 'Size']  
target = 'next_Rtn'

In [4]:
def build_gru_model(hp):
    model = keras.Sequential()
    model.add(layers.GRU(
        units=hp.Int('gru_units_1', min_value=64, max_value=256, step=32),
        return_sequences=True,
        input_shape=(n_steps, len(features))
    ))
    model.add(layers.Dropout(
        rate=hp.Float('dropout_1', min_value=0.2, max_value=0.5, step=0.1)
    ))
    model.add(layers.GRU(
        units=hp.Int('gru_units_2', min_value=32, max_value=128, step=32)
    ))
    model.add(layers.Dropout(
        rate=hp.Float('dropout_2', min_value=0.1, max_value=0.4, step=0.1)
    ))
    model.add(layers.Dense(1))
    lr = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])
    optimizer = keras.optimizers.Adam(learning_rate=lr, clipnorm=1.0) 
    model.compile(optimizer=optimizer, loss='mse', metrics=['mae'])
    return model

In [9]:
tuner = kt.RandomSearch(
    build_gru_model,
    objective='val_loss',
    max_trials=10, 
    executions_per_trial=1,
    directory='gru_tuning',
    project_name='stock_prediction'
)

In [10]:
early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3)

In [12]:
tuner.search(
    X_train, y_train,
    epochs=5,
    batch_size=32,
    validation_split=0.2,
    callbacks=[early_stop, reduce_lr],
    verbose=1
)

Trial 10 Complete [00h 06m 39s]
val_loss: 0.0168549045920372

Best val_loss So Far: 0.016798270866274834
Total elapsed time: 01h 07m 53s


In [13]:
best_model = tuner.get_best_models(num_models=1)[0]
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]

In [14]:
print("\nBest parameters:")
print(f"GRU 1: {best_hps.get('gru_units_1')}")
print(f"GRU 2: {best_hps.get('gru_units_2')}")
print(f"Dropout 1: {best_hps.get('dropout_1')}")
print(f"Dropout 2: {best_hps.get('dropout_2')}")
print(f"lr: {best_hps.get('learning_rate')}")


Best parameters:
GRU 1: 64
GRU 2: 32
Dropout 1: 0.30000000000000004
Dropout 2: 0.2
lr: 0.001


In [15]:
history = best_model.fit(
    X_train, y_train,
    epochs=20,
    batch_size=32,
    callbacks=[early_stop, reduce_lr],
    verbose=1
)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [16]:
def calculate_metrics(y_test, y_pred):
    n = len(y_test)
    
    mse = sum((y_test[i] - y_pred[i]) ** 2 for i in range(n)) / n
    
    rmse = mse ** 0.5  
    
    mae = sum(abs(y_test[i] - y_pred[i]) for i in range(n)) / n
    
    mean_y = sum(y_test) / n
    std_y=np.std(y_test)
    ss_tot = sum((y - mean_y) ** 2 for y in y_test)
    ss_res = sum((y_test[i] - y_pred[i]) ** 2 for i in range(n))
    r2 = 1 - (ss_res / ss_tot) if ss_tot != 0 else 0
    
    return mse, rmse, mae, r2, mean_y,std_y




In [20]:
y_pred = best_model.predict(X_test)
mse, rmse, mae, r2,mean_y,std_y= calculate_metrics(y_test, y_pred)
print("\nFinal results:")
print(f"MSE: {mse.item():.6f}")
print(f"RMSE: {rmse.item():.6f}")
print(f"MAE: {mae.item():.6f}")
print(f"R²: {r2.item():.6f}")
print(f"y_mean :{mean_y},y_std:{std_y}")


Final results:
MSE: 0.015842
RMSE: 0.125866
MAE: 0.086968
R²: -0.077416
y_mean :-0.007134583690839367,y_std:0.12126015501029316


In [21]:
best_model.save('GRU_model.h5')
print("模型已保存至 GRU_model.h5")

模型已保存至 GRU_model.h5
