In [1]:
import pandas as pd
import numpy as np
import altair as alt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import MinMaxScaler

import warnings
warnings.filterwarnings("ignore")

In [2]:
# Data preprocessing part for LSTM to get data frame with features of different time periods.
def data_process(price_dict, stock_code):
    stock_df = price_dict[stock_code]
    stock_df = stock_df[::-1].reset_index(drop=True)

    for i in range(2, 5):
        stock_df["short_close{}".format(i)] = stock_df['close'].shift(i)

    for i in range(2, 9):
        stock_df["medium_close{}".format(i)] = stock_df['close'].shift(i)

    for i in range(2, 16):
        stock_df["long_close{}".format(i)] = stock_df['close'].shift(i)

    stock_df = stock_df.dropna(axis=0).reset_index(drop=True)
    stock_close = stock_df['close']
    stock_df = stock_df.filter(regex=("short*|medium*|long*"))

    return stock_df, stock_close

In [3]:
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout
import tensorflow as tf
from sklearn.preprocessing import StandardScaler

# Input features and stock close prices to train the model.
# Return trained LSTM model.
def train(feature_df, stock_close):
    # generate features
    # X_train, X_test, y_train, y_test = train_test_split(stock_df, stock_df['close'], random_state=0, shuffle=False, test_size=0.15)

    # X_train = np.array(X_train).reshape(-1, X_train.shape[1])
    # X_test = np.array(X_test).reshape(-1, X_test.shape[1])
    # y_train = np.array(y_train).reshape(-1, 1)
    # y_test = np.array(y_test).reshape(-1, 1)

    lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
        initial_learning_rate=0.005,
        decay_steps=10000,
        decay_rate=1e-6)

    model = Sequential()
    model.add(LSTM(64, return_sequences=False, input_shape=(feature_df.shape[1], 1)))
    # model.add(Dropout(0.2))
    model.add(Dense(32))
    model.add(Dense(1))
    model.compile(loss="mean_squared_error", optimizer=tf.keras.optimizers.Adam(learning_rate=lr_schedule))

    # model.fit(feature_df, stock_close, batch_size=10, epochs=15)
    model.fit(feature_df, stock_close, batch_size=30, epochs=5)

    return model

2023-03-08 15:45:27.736124: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [4]:
# Predict the future stock values.
def predict(model, predict_time):
    res = []
    curr_price = list(feature_df.iloc[-1])

    for i in range(predict_time):
        pred = model.predict(np.array(curr_price).reshape(-1,feature_df.shape[1]))
        res.append(pred.flatten()[-1])

        for j in range(len(curr_price)-1):
            curr_price[i+1] = curr_price[i]

            curr_price[0] = pred.flatten()[-1]
            curr_price[3] = pred.flatten()[-1]
            curr_price[10] = pred.flatten()[-1]

    return res
            

In [5]:
import numpy as np

def update_dict(pred_close, price_dict, stock_code):
    for close in pred_close:
        row_to_add = dict(price_dict[stock_code].iloc[-1])
        row_to_add['pre_close'] = row_to_add['close']
        row_to_add['close'] = close
        row_to_add['change'] = row_to_add['close'] - row_to_add['pre_close']
        row_to_add['pct_chg'] = row_to_add['change'] / row_to_add['pre_close'] * 100
        
        row_to_add['log_rtn'] = np.log(row_to_add['close'] / row_to_add['pre_close'])

        price_dict[stock_code] = pd.concat([price_dict[stock_code], pd.DataFrame([row_to_add])], axis=0)



In [6]:
import pandas as pd
import numpy as np
from pypfopt.efficient_frontier import EfficientFrontier
from pypfopt import expected_returns
from pypfopt import risk_models
from pypfopt.black_litterman import BlackLittermanModel
import random
from tqdm import tqdm

# efficient frontier
def get_efficient_frontier(d, method="sample"):
    stock_prices = pd.DataFrame(columns=list(d.keys()))
    for ts_code, df in d.items():
        stock_prices[ts_code] = df["close"]
    stock_prices = stock_prices.iloc[::-1]
    if method == "sample":
        mu = expected_returns.mean_historical_return(stock_prices) # Calculate annualised mean (daily) historical return from input (daily) asset prices.
        cov_matrix = risk_models.risk_matrix(stock_prices, method="sample_cov") # Calculate the annualised sample covariance matrix of (daily) asset returns.
    elif method == "time_exp":
        mu = expected_returns.ema_historical_return(stock_prices) # Calculate the exponentially-weighted mean of (daily) historical returns, giving higher weight to more recent data.
        cov_matrix = risk_models.risk_matrix(stock_prices, method="exp_cov") # Estimate the exponentially-weighted covariance matrix, which gives greater weight to more recent data.
    elif method == "CAPM":
        mu = expected_returns.capm_return(stock_prices) # Compute a return estimate using the Capital Asset Pricing Model.
        cov_matrix = risk_models.risk_matrix(stock_prices, method="sample_cov") # Estimate the semicovariance matrix, i.e the covariance given that the returns are less than the benchmark.
    ef = EfficientFrontier(mu, cov_matrix)
    return ef, mu, cov_matrix

In [7]:
rf = 0.02 # risk free return
freq = 252 # trading days in a year

def bl_build(predicted_views):
    filtered_stocks = pd.read_pickle(os.path.join(GOOGLE_DRIVE_PATH, "Data/filtered_stocks_daily.pickle"))
    ef1, mu1, cov_matrix1 = get_efficient_frontier(filtered_stocks, method="time_exp")
    prior_weight_dict1 = ef1.max_sharpe(risk_free_rate=rf)
    print(ef1.portfolio_performance(verbose=True, risk_free_rate=rf))

    filtered_ts_code = list(filtered_stocks.keys())
    Sigma = np.array(cov_matrix1)
    market_weights = np.array(list(ef1.clean_weights().values())).reshape(-1, 1)
    risk_aversion = ef1.portfolio_performance(risk_free_rate=rf)[2] / np.sqrt(np.dot(np.dot(market_weights.T, Sigma), market_weights))
    Pi = risk_aversion * np.dot(Sigma, market_weights)
    P = np.diag(np.diag(np.ones((len(filtered_ts_code), len(filtered_ts_code))))) # picking matrix
    Q = np.array(predicted_views).reshape(-1, 1) # view matrix (to be changed)
    tau = 1 / (252 * 10) # 1/T
    Omega = np.diag(np.diag(np.dot(np.dot(P, tau * Sigma), P.T))) 

    bl = BlackLittermanModel(cov_matrix=Sigma, pi=Pi, Q=Q, P=P, omega=Omega, tau=tau)
    returns = bl.bl_returns()
    returns.index = filtered_ts_code
    cov = bl.bl_cov()
    ef1_post = EfficientFrontier(returns, cov)
    post_weight_dict1 = ef1_post.max_sharpe(risk_free_rate=rf)
    print(ef1_post.portfolio_performance(verbose=True, risk_free_rate=rf))

    return prior_weight_dict1, returns



In [8]:
def true_annual_return(weight_dict, filtered_stocks_val_dict, returns): # weight data type: ordered dict
    weight = np.array(list(weight_dict.values())).reshape(-1,1)
    stocks = list(weight_dict.keys())
    daily_return_rate = []
    for ts_code in stocks:
        daily_return_rate.append(np.average(filtered_stocks_val_dict[ts_code]["pct_chg"]))
    daily_return_rate = np.array(returns).reshape(-1,1)
    annual_return = np.dot(weight.T, daily_return_rate) 
    
    return annual_return[0][0] * 2.52 # 252 working days for a year

In [None]:
price_dict = pd.read_pickle('data/filtered_stocks_daily.pickle')
stock_codes = list(price_dict.keys())
predicted_views = []

for index in tqdm(range(len(stock_codes))):
    stock_code = stock_codes[index]
    if (stock_code == '600260.SH'): continue
    
    feature_df, stock_close = data_process(price_dict, stock_code)
    model = train(feature_df, stock_close)
    pred_close = predict(model, 20)
    # update_dict(pred_close, price_dict, stock_code)

    rtn = 0
    for i in range(len(pred_close)-1):
        rtn += (pred_close[i+1] - pred_close[i]) / pred_close[i]
    rtn /= (len(pred_close) - 1)
    predicted_views.append(rtn)



# with open(os.path.join(GOOGLE_DRIVE_PATH, 'Data/predicted_filtered_stocks_daily.pickle'), "wb") as f:
#     pickle.dump(price_dict, f) 
    

In [24]:
# Save output to a file.

with open('predictions.txt', "w") as txt_file:
    for pred in predicted_views:
        txt_file.write(str(pred) + "\n")

In [1]:
prior_weight_dict1, returns = bl_build(predicted_views)


filtered_stocks_val_dict = pd.read_pickle(os.path.join(GOOGLE_DRIVE_PATH, "Data/filtered_stocks_val.pickle"))
true_annual_return(prior_weight_dict1, filtered_stocks_val_dict, returns)

NameError: ignored

In [9]:
import pandas as pd
filtered_stocks_val_dict = pd.read_pickle(os.path.join(GOOGLE_DRIVE_PATH, "Data/filtered_stocks_val.pickle"))

AttributeError: ignored

In [None]:
df = pd.read_pickle(os.path.join(GOOGLE_DRIVE_PATH, 'Data/filtered_stocks_daily.pickle'))
df = df['600029.SH']
df = df[::-1].reset_index(drop=True)
df

Unnamed: 0,ts_code,trade_date,open,high,low,close,pre_close,change,pct_chg,vol,amount,log_rtn,scaled_log_rtn,scaled_rtn
0,600029.SH,2020-01-02,7.27,7.34,7.22,7.28,7.18,0.10,1.3928,676055.75,492855.980,0.013831,0.647769,0.641604
1,600029.SH,2020-01-03,7.30,7.35,7.16,7.18,7.28,-0.10,-1.3736,757360.48,548532.445,-0.013831,-0.650377,-0.656507
2,600029.SH,2020-01-06,7.05,7.05,6.88,6.94,7.18,-0.24,-3.3426,1310285.46,909705.962,-0.033998,-1.596717,-1.580444
3,600029.SH,2020-01-07,6.97,7.07,6.95,7.05,6.94,0.11,1.5850,636492.12,445156.096,0.015726,0.736666,0.731792
4,600029.SH,2020-01-08,6.90,6.97,6.87,6.94,7.05,-0.11,-1.5603,725097.13,501913.098,-0.015726,-0.739274,-0.744114
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
739,600029.SH,2023-01-18,7.68,7.88,7.65,7.70,7.74,-0.04,-0.5168,282444.58,219129.373,-0.005181,-0.244451,-0.254460
740,600029.SH,2023-01-19,7.71,7.77,7.60,7.70,7.70,0.00,0.0000,332677.23,255303.867,0.000000,-0.001304,-0.011956
741,600029.SH,2023-01-20,7.72,7.87,7.66,7.85,7.70,0.15,1.9481,410048.16,320163.154,0.019293,0.904072,0.902174
742,600029.SH,2023-01-30,7.88,7.90,7.56,7.58,7.85,-0.27,-3.4395,723431.23,553040.009,-0.035000,-1.643772,-1.625913


In [None]:
price_dict['600029.SH']

Unnamed: 0,ts_code,trade_date,open,high,low,close,pre_close,change,pct_chg,vol,amount,log_rtn,scaled_log_rtn,scaled_rtn
0,600029.SH,2023-01-31,7.54,7.61,7.28,7.330000,7.580000,-0.250000,-3.298200,698147.41,518059.976,-0.033538,-1.575134,-1.559609
1,600029.SH,2023-01-30,7.88,7.90,7.56,7.580000,7.850000,-0.270000,-3.439500,723431.23,553040.009,-0.035000,-1.643772,-1.625913
2,600029.SH,2023-01-20,7.72,7.87,7.66,7.850000,7.700000,0.150000,1.948100,410048.16,320163.154,0.019293,0.904072,0.902174
3,600029.SH,2023-01-19,7.71,7.77,7.60,7.700000,7.700000,0.000000,0.000000,332677.23,255303.867,0.000000,-0.001304,-0.011956
4,600029.SH,2023-01-18,7.68,7.88,7.65,7.700000,7.740000,-0.040000,-0.516800,282444.58,219129.373,-0.005181,-0.244451,-0.254460
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,600029.SH,2020-01-02,7.27,7.34,7.22,6.678808,6.683117,-0.004309,-0.064479,676055.75,492855.980,-0.000645,0.647769,0.641604
0,600029.SH,2020-01-02,7.27,7.34,7.22,6.673977,6.678808,-0.004831,-0.072331,676055.75,492855.980,-0.000724,0.647769,0.641604
0,600029.SH,2020-01-02,7.27,7.34,7.22,6.668396,6.673977,-0.005581,-0.083629,676055.75,492855.980,-0.000837,0.647769,0.641604
0,600029.SH,2020-01-02,7.27,7.34,7.22,6.659369,6.668396,-0.009027,-0.135363,676055.75,492855.980,-0.001355,0.647769,0.641604
