# Training

In [None]:
import tensorflow as tf
import pandas as pd
import os
import matplotlib.pyplot as plt
from alphanet import AlphaNetV3
from alphanet.data import TimeSeriesData, TrainValData
from alphanet.metrics import UpDownAccuracy

## Data Processing

In [None]:
csi = pd.read_csv("./data/CSI500.zip", dtype={"代码": "category",
                                              "简称": "category"})
csi.drop(columns=["简称"], inplace=True)
csi = csi.loc[csi["日期"] >= 20110131, :]

# 新增特征
csi["close/free_turn"] = csi["收盘价(元)"] / csi["换手率(基准.自由流通股本)"]
csi["open/turn"] = csi["开盘价(元)"] / csi["换手率(%)"]
csi["volume/low"] = csi["成交量(股)"] / csi["最低价(元)"]
csi["vwap/high"] = csi["均价"] / csi["最高价(元)"]
csi["low/high"] = csi["最低价(元)"] / csi["最高价(元)"]
csi["vwap/close"] = csi["均价"] / csi["收盘价(元)"]

# 计算十日回报
trading_dates = csi["日期"].unique()
trading_dates.sort()
dates_shift_dictionary = dict(zip(trading_dates[10:], trading_dates[:-10]))
csi_slice = csi[["代码", "日期", "收盘价(元)"]].copy()
csi_slice_date_shift = csi[["代码", "日期", "收盘价(元)"]].copy()
csi_slice_date_shift["日期"] = csi_slice_date_shift["日期"]\
    .map(lambda x: dates_shift_dictionary.get(x, None))
csi_slice_date_shift.rename(columns={"收盘价(元)": "10交易日后收盘价(元)"},
                            inplace=True)
csi_slice_date_shift.dropna(inplace=True)
csi_slice_date_shift["日期"] = [d for d in csi_slice_date_shift["日期"]]
csi_slice = csi_slice.merge(csi_slice_date_shift,
                            how="inner",
                            left_on=["代码", "日期"],
                            right_on=["代码", "日期"])
close_price = csi_slice["收盘价(元)"]
future_close_price = csi_slice["10交易日后收盘价(元)"]
csi_slice["10日回报率"] = future_close_price / close_price - 1
csi_slice.drop(columns=["收盘价(元)", "10交易日后收盘价(元)"], inplace=True)
csi = csi_slice.merge(csi,
                      how="inner",
                      left_on=["代码", "日期"],
                      right_on=["代码", "日期"])

## Build TimeSeriesData

In [None]:
codes = csi.代码.cat.categories
stock_data = []
for code in codes:
    table_part = csi.loc[csi.代码 == code, :]
    stock_data.append(TimeSeriesData(dates=table_part["日期"].values,
                                     data=table_part.iloc[:, 3:].values,
                                     labels=table_part["10日回报率"].values))
data = TrainValData(stock_data)

## Define Training Functions

In [None]:
def try_mkdirs(path):
    if not os.path.exists(path):
        os.makedirs(path)

try_mkdirs("./models")


def do_training(beginning_date, training_id):
    net = AlphaNetV3(metrics=[tf.keras.metrics.RootMeanSquaredError(),
                              UpDownAccuracy()])
    train, val = data.get(beginning_date)
    m = net.model()
    path_str = "./models/{training_id:02d}/{beginning_date:08d}/"
    folder_path = path_str.format(training_id=training_id,
                                  beginning_date=beginning_date)
    try_mkdirs(folder_path)
    file_path = folder_path + "{epoch:04d}-{val_loss:.4f}.hdf5"
    ckp = tf.keras.callbacks.ModelCheckpoint(filepath=file_path,
                                             save_freq="epoch",
                                             save_weights_only=True)
    es = tf.keras.callbacks.EarlyStopping(monitor="val_loss",
                                          patience=10,
                                          restore_best_weights=True)
    h = m.fit(train.prefetch(50000).batch(500),
              validation_data=val.batch(500),
              epochs=100,
              callbacks=[es, ckp])
    return h, net

## Training

In [None]:
ROLLING_BEGINNING_LIST = [20110131,
                          20110731,
                          20120131,
                          20120731,
                          20130131,
                          20130731,
                          20140131,
                          20140731,
                          20150131]

TRAINING_ID = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

try_mkdirs("./models/best/")
model_path_str = "./models/best/{training_id:02d}_{beginning:08d}"

for tid in TRAINING_ID:
    for beginning in ROLLING_BEGINNING_LIST:
        model_path = model_path_str.format(training_id=tid,
                                           beginning=beginning)
        history, model = do_training(tid, beginning)
        model.save(model_path)
        pd.DataFrame(history.history).plot()
        plt.title(model_path)
        plt.show()