In [None]:
# QuantBook Analysis Tool 
# For more information see [https://www.quantconnect.com/docs/v2/our-platform/research/getting-started]
qb = QuantBook()
spy = qb.AddEquity("SPY")
history = qb.History(qb.Securities.Keys, 360, Resolution.Daily)

# Indicator Analysis
bbdf = qb.Indicator(BollingerBands(30, 2), spy.Symbol, 360, Resolution.Daily)
bbdf.drop('standarddeviation', axis=1).plot()

In [None]:
qb = QuantBook()
import pandas as pd
import numpy as np
from scipy import optimize
import pandas as pd



def efficient_frointier_on_sharp(cov_mat,exp_vec,rf):
    """
    Output: weights; sharp ratio
    """
    f = lambda x: -1*(exp_vec@x - rf)/np.sqrt(x@cov_mat@x.T)
    n = len(exp_vec)
    # boundary = ((0, 1) for _ in range(n))
    constraints = {'type':'eq', 'fun': lambda x: np.sum(x) - 1}
    opt = optimize.minimize(f,np.ones(n)/n,constraints=constraints)
    return opt.x,-opt.fun

## risk parity portfolio construction
def port_vol_calculate(weights,covariance):
    variance = weights@covariance@weights.T
    return np.sqrt(variance)

def component_std_calculate(weights,covariance):
    port_vol = port_vol_calculate(weights,covariance)
    return weights*(covariance@weights.T)/port_vol

def component_std_sse_calculate(weights,covariance,budget=None):
    if not budget:
        budget = np.ones_like(weights)
    csd = component_std_calculate(weights,covariance)/budget
    scale_csd = csd - csd.mean()
    sse = scale_csd @ scale_csd.T

    return sse

def risk_parity_portfolio_on_sse(covariance,budget = None):
    n = covariance.shape[0]
    cons = {'type':'eq',"fun":lambda w:np.sum(w)-1}
    bounds = ((0,1) for i in range(n))
    opt_result = optimize.minimize(lambda w: 1e3*component_std_sse_calculate(w,covariance,budget),x0 = np.array([1/n]*n),constraints=cons,bounds=bounds)

    return opt_result.x

## construct the risk portfolio based on the es
def component_es(weights,returns,delta = 1e-6):
    n = len(weights)
    port_es = Return_ES(returns@weights.T)
    es_list = np.zeros(n)
    for i in range(n):
        ind_w = weights[i]
        weights[i] += delta
        es_list[i] = ind_w * (Return_ES(returns@weights.T)-port_es)/delta
        weights[i] = ind_w

    return es_list

def component_es_sse(weights,returns,budget,delta = 1e-6):
    """
    Budge should be within the list form.
    """
    if not budget:
        budget = np.ones_like(weights)
    ces= component_es(weights,returns,delta)/budget
    scale_com_es = ces - ces.mean()
    return scale_com_es @ scale_com_es.T

def risk_parity_port_es(returns,budget=None):
    n = returns.shape[1]
    cons = {'type':'eq','fun':lambda w:np.sum(w) - 1}
    bounds = ((0,1) for i in range(n))
    opt_result = optimize.minimize(lambda w: 1e5*component_es_sse(w,returns,budget),x0 = np.ones(n)/n,constraints=cons,bounds=bounds)

    return opt_result.x

def Return_ES(sim_x,alpha=0.05):
    order_x = np.sort(sim_x)
    n = alpha*len(order_x)
    up_n = int(np.ceil(n))
    dn_n = int(np.floor(n))
    VaR = (order_x[up_n+1]+order_x[dn_n-1])/2
    ES = -1*np.mean(order_x[order_x <= VaR])
    return ES

In [None]:
# To extract portfolio NAV from backtest results 
backtest1 = api.ReadBacktest(16527795, 'd8440577cc94ba7f8f4dc3a4c6523a8d') 

backtest2 = api.ReadBacktest(16527795, '1956e15c56f7188cde0a6faf081cdd5b') 
chartpoint1_ls = backtest1.Charts["Strategy Equity"].Series["Equity"].Values 
chartpoint2_ls = backtest2.Charts["Strategy Equity"].Series["Equity"].Values 
nav1 = [x.Close for x in chartpoint1_ls if x.Time.hour != 5] 
nav2 = [x.Close for x in chartpoint2_ls if x.Time.hour != 5] 
date = [x.Time for x in chartpoint1_ls if x.Time.hour != 5] 
nav1 = pd.Series(nav1,index=date)
nav2 = pd.Series(nav2,index=date)
re_strategy1 = nav1.pct_change()
re_strategy2 = nav2.pct_change()
## effcient frontier optimization

In [None]:

backtest_2023 = api.ReadBacktest(16527795, 'cff5189b6f03630c98a2e420f5eea720') 
cp_2023 = backtest_2023.Charts["Strategy Equity"].Series["Equity"].Values 
nav_23 = [x.Close for x in cp_2023 if x.Time.hour != 5] 
date_23 = [x.Time for x in cp_2023 if x.Time.hour != 5] 
nav_23 = pd.Series(nav_23,index=date_23)*10
nav_23 = nav_23.loc[(nav_23.index<='2023-10-10')&(nav_23.index>='2023-3-10')]
rtn_23 = nav_23.pct_change()
plt.figure(figsize=(20,10),dpi=100, edgecolor='grey')
plt.plot(nav_23, linewidth=3, color='orange')
plt.title("")
plt.grid(
         linestyle='--',
         linewidth=1,
         alpha=0.3) 
plt.savefig('2023_pv')

In [None]:
(np.power(np.prod(1+rtn_23),1/len(nav_23))-1)/np.std(rtn_23) 

In [None]:
np.mean(rtn_23)/np.std(rtn_23)

In [None]:
(np.power(np.prod(1+rtn_23),252/len(nav_23))-1) - np(1+0.025) /(np.sqrt(252/len(nav_23))*np.std(rtn_23))

In [None]:
np.mean(nav_23.apply(np.log).diff().dropna())*252/(np.sqrt(252/len(nav_23))*np.std(rtn_23))

In [None]:
(np.power(np.prod(1+rtn_23),1/len(nav_23))-1)/np.std(rtn_23)

In [None]:
(nav_23/nav_23.rolling(10).max() -1).min()

In [None]:
plt.plot(re_strategy1)
plt.plot(re_strategy2)

In [None]:
rtn_df = pd.concat((re_strategy1, re_strategy2),axis=1).dropna()
cov = rtn_df.cov()
rtn_df.mean().values
efficient_frointier_on_sharp(exp_vec=rtn_df.mean().values,cov_mat=cov.values, rf=0.025)

In [None]:
risk_parity_port_es(rtn_df)

In [None]:
from A2C import A2C_Trading, StockTradingEnv
import torch
import torch.optim as optim
import json

import joblib
def train_epoch(model, env,features, price, optimizer, epoch, gamma=0.99,std_dev =torch.tensor([0.05])):
    state = env.reset()
    
    episode_reward = 0
    # Initialize LSTM hidden and cell states
    hidden_critic = None
    hidden_actor = None
    count = 1
    alpha = 0.95
    epsilon = 0.01
    ema_reward = 0
    port_vals = []
    balances = []
    ports = []
    actions = []
    current_step = lookback_window_size
    while True:
        # Convert state to appropriate tensor format for the model
        state_tensor = torch.FloatTensor(state).unsqueeze(0)
        window_start = current_step - env.lookback_window_size
        window_end = current_step
        current_step +=10
        this_feature = features[:,:,window_start:window_end]
        this_price = price[window_end,:]
        count += 1
        # Get action probabilities and state value from the model

        
        action_mean, state_value, hidden_actor, hidden_critic = model(state_tensor, hidden_actor, hidden_critic)
        
        hidden_actor = (hidden_actor[0].detach(), hidden_actor[1].detach())
        hidden_critic = (hidden_critic[0].detach(), hidden_critic[1].detach())
        
        # Sample action from the probability distribution
        action = torch.normal(action_mean, std_dev)  
        action = action.clamp(-0.2, 0.2)  # Clamp to action space

        # Take action in the environment
        #print(action)
        if torch.isnan(action).any():
            
            action_np =np.zeros(action.detach().numpy().shape)
        else:
            action_np = action.detach().numpy()
            
        next_state, reward, done = env.step(action_np,this_feature, this_price)
        ema_reward = ema_reward * alpha + reward * (1 - alpha)
        if reward < 0:
            reward *= 2
        normalized_reward = reward / (ema_reward + epsilon)
        episode_reward += reward

        next_state_tensor = torch.FloatTensor(next_state).unsqueeze(0)

        _, next_state_value, _,_ = model(next_state_tensor, hidden_actor, hidden_critic)
    
        # Calculate advantage
        advantage = normalized_reward + (gamma * next_state_value * (1 - int(done))) - state_value.detach()

        # Calculate the log probabilities
        variances = std_dev ** 2
        log_probs = -((action - action_mean) ** 2) / (2 * variances) - np.log(std_dev * np.sqrt(2 * torch.pi))

        # Actor loss
        actor_loss = -torch.mean(log_probs * advantage.detach())
        
        
        # Critic loss
        critic_loss = torch.nn.functional.mse_loss(state_value, normalized_reward + gamma * next_state_value.detach() * (1 - int(done)))

        # Aggregate losses
        total_loss = actor_loss + critic_loss

        # Perform backpropagation on total loss
        optimizer.zero_grad()
        total_loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        optimizer.step()
        
        # Transition to the next state
        state = next_state
        port_vals.append(env.total_portfolio_value)
        balances.append(env.balance)
        actions.append(action_np)
        if done:
            break
        
    env.render()
    print("Epoch{}: rewards: {:.4f}".format(epoch, env.total_portfolio_value-env.initial_balance))
    return episode_reward, port_vals,balances,ports,actions

In [None]:

def train(model, env,features, price, optimizer, num_epoch=50, gamma=0.99,std_dev =torch.tensor([0.03]), model_key="A2Cmodel"):
    rewards =[]
    hidden_critic = None
    hidden_actor = None
    max_reward = 0
    for i in range(1, num_epoch):
        state = env.reset()
    
        episode_reward = 0
        # Initialize LSTM hidden and cell states
        
        count = 1
        alpha = 0.95
        epsilon = 0.01
        ema_reward = 0
        port_vals = []
        balances = []
        ports = []
        actions = []
        current_step = lookback_window_size
        while True:
            # Convert state to appropriate tensor format for the model
            state_tensor = torch.FloatTensor(state).unsqueeze(0)
            window_start = current_step - env.lookback_window_size
            window_end = current_step
            current_step +=10
            this_feature = features[:,:,window_start:window_end]
            this_price = price[window_end,:]
            count += 1
            # Get action probabilities and state value from the model

        
            action_mean, state_value, hidden_actor, hidden_critic = model(state_tensor, hidden_actor, hidden_critic)
        
            hidden_actor = (hidden_actor[0].detach(), hidden_actor[1].detach())
            hidden_critic = (hidden_critic[0].detach(), hidden_critic[1].detach())
        
            # Sample action from the probability distribution
            action = torch.normal(action_mean, std_dev)  
            action = action.clamp(-0.1, 0.1)  # Clamp to action space

            # Take action in the environment
            #print(action)
            if torch.isnan(action).any():
            
                action_np =np.zeros(action.detach().numpy().shape)
            else:
                action_np = action.detach().numpy()
            
            next_state, reward, done = env.step(action_np,this_feature, this_price)
            if reward<0:
                reward *=2
            ema_reward = ema_reward * alpha + reward * (1 - alpha)
            normalized_reward = reward / (ema_reward + epsilon)
            episode_reward += reward
            next_state_tensor = torch.FloatTensor(next_state).unsqueeze(0)

            _, next_state_value, _,_ = model(next_state_tensor, hidden_actor, hidden_critic)
    
            # Calculate advantage
            advantage = normalized_reward + (gamma * next_state_value * (1 - int(done))) - state_value.detach()

            # Calculate the log probabilities
            variances = std_dev ** 2
            log_probs = -((action - action_mean) ** 2) / (2 * variances) - np.log(std_dev * np.sqrt(2 * torch.pi))

            # Actor loss
            actor_loss = -torch.mean(log_probs * advantage.detach())
        
        
            # Critic loss
            critic_loss = torch.nn.functional.mse_loss(state_value, normalized_reward + gamma * next_state_value.detach() * (1 - int(done)))

            # Aggregate losses
            total_loss = actor_loss + critic_loss

            # Perform backpropagation on total loss
            optimizer.zero_grad()
            total_loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()
        
            # Transition to the next state
            state = next_state
            port_vals.append(env.total_portfolio_value)
            balances.append(env.balance)
            actions.append(action_np)
            if done:
                break
        
        env.render()
        rewards.append(episode_reward)
        if env.total_portfolio_value-env.initial_balance > max_reward:
            max_reward = env.total_portfolio_value-env.initial_balance
            joblib.dump(model, file_name)
        print("Epoch{}: rewards: {:.4f}".format(i, env.total_portfolio_value-env.initial_balance))
    
    
    return episode_reward, port_vals,balances,ports,actions

- AAPL R735QTJ8XC9X
- GOOCV VP83T1ZUHROL
- MSFT R735QTJ8XC9X
- BRKB R735QTJ8XC9X
- XON R735QTJ8XC9X
- AMZN R735QTJ8XC9X
- FB V6OIPNZEM8V9
- JNJ R735QTJ8XC9X
- CMB R735QTJ8XC9X
- GE R735QTJ8XC9X

In [None]:
qb = QuantBook()

jpm = qb.AddEquity("JPM", Resolution.Daily).Symbol
brk =  qb.AddEquity("BRK.B", Resolution.Daily).Symbol
aapl = qb.AddEquity("AAPL", Resolution.Daily).Symbol
amzn = qb.AddEquity("AMZN", Resolution.Daily).Symbol
msft = qb.AddEquity("MSFT", Resolution.Daily).Symbol
goog = qb.AddEquity("GOOG", Resolution.Daily).Symbol
xon = qb.AddEquity("XON R735QTJ8XC9X", Resolution.Daily).Symbol
fb = qb.AddEquity("FB V6OIPNZEM8V9", Resolution.Daily).Symbol
jnj = qb.AddEquity("JNJ", Resolution.Daily).Symbol
ge = qb.AddEquity("GE R735QTJ8XC9X",Resolution.Daily).Symbol
symbol_list = [jpm, brk,aapl, amzn, msft] #goog, xon, fb, jnj, ge
start = datetime(2017, 1, 1)
end = datetime(2022, 1,1)

In [None]:
ema = ExponentialMovingAverage(30)
rsi = RelativeStrengthIndex(30)
arma = AutoRegressiveIntegratedMovingAverage(1,2,1,20, True)
rdv = RelativeDailyVolume(10)
bb = BollingerBands(20, 2)
alp = Momentum(20)
emas = {}
rsis = {}
armas = {}
rdvs = {}
bbs = {}
alps = {}

for symbol in symbol_list:
    emas[symbol] = qb.Indicator(ema, symbol, start-timedelta(days=60),end, Resolution.Daily)
    emas[symbol] = emas[symbol].loc[emas[symbol].index>=start]
    rsis[symbol] = qb.Indicator(rsi, symbol, start-timedelta(days=60),end, Resolution.Daily)
    rsis[symbol] = rsis[symbol].loc[rsis[symbol].index>=start]
    armas[symbol] = qb.Indicator(arma, symbol, start-timedelta(days=60),end, Resolution.Daily)
    armas[symbol] = armas[symbol].loc[armas[symbol].index>=start]
    rdvs[symbol] = qb.Indicator(rdv, symbol, start-timedelta(days=60),end, Resolution.Daily)
    rdvs[symbol] = rdvs[symbol].loc[rdvs[symbol].index>=start,:]
    bbs[symbol] = qb.Indicator(bb, symbol, start-timedelta(days=60),end, Resolution.Daily)
    bbs[symbol] = bbs[symbol].loc[bbs[symbol].index>=start]
    alps[symbol] = qb.Indicator(alp, symbol, start-timedelta(days=60),end, Resolution.Daily)
    alps[symbol] = alps[symbol].loc[alps[symbol].index>=start]
    


In [None]:
vols = {}
for symbol in symbol_list:
    vol = qb.History(symbol, start-timedelta(days=60),end, Resolution.Daily)['close'].unstack(level=1).T.rolling(30).std().dropna()
    vol.columns = ['volatility']
    vols[symbol] = vol
    vols[symbol] = vols[symbol].loc[vols[symbol].index>=start,:]

In [None]:
indicators = [rsis,  armas, vols, rdvs, bbs,alps]
price = qb.History(symbol_list, start,end, Resolution.Daily)['close'].unstack(level=1).T
price

In [None]:
price.apply(np.log).diff().dropna().corr()

In [None]:
columns = ['relativestrengthindex','autoregressiveintegratedmovingaverage', 'volatility','relativedailyvolume',	'bandwidth','percentb','momentum']

In [None]:
features = []
for symbol in symbol_list:
    df = None
    for ind in indicators:
        if df is None:
            df = ind[symbol]
        else:
            df = df.join(ind[symbol])
    features.append(df.loc[:,columns].values.T)

features = np.array(features)

In [None]:
df.loc[:,columns].corr()

In [None]:
num_stocks = len(symbol_list)  # example number of stocks
num_features = features.shape[1]
lookback_window_size = 30
state_dim = lookback_window_size * num_stocks*num_features + 1 + num_stocks
action_dim = num_stocks

#A2C model
model = A2C_Trading(state_dim, action_dim,hidden_size=64)

In [None]:
env = StockTradingEnv(features=features[:,:,:lookback_window_size],price=price.values[lookback_window_size,:],end_step=features.shape[2],lookback_window_size=lookback_window_size)

# Optimizer
optimizer = optim.Adam(model.parameters(), lr=1e-5)
model_key = "A2Cmodel1"
file_name = qb.ObjectStore.GetFilePath(model_key)
reward, port_vals,balances,ports,actions = train(model,env,features,price.values[:,:], optimizer, num_epoch=30,std_dev =torch.tensor([0.02]), model_key="A2Cmodel_c")


In [None]:
model = A2C_Trading(state_dim, action_dim,hidden_size=128)

In [None]:
env = StockTradingEnv(features=features[:,:,:lookback_window_size],price=price.values[lookback_window_size,:],end_step=features.shape[2],lookback_window_size=lookback_window_size)

# Optimizer
optimizer = optim.Adam(model.parameters(), lr=1e-5)
model_key = "A2Cmodel2"
file_name = qb.ObjectStore.GetFilePath(model_key)

max_reward = 10000
num_epoch = 30
rewards = []
#torch.manual_seed(2550667350516364191) #3632672893473863851
#torch.random.manual_seed(11249560557726264621) #35777981873920709
for epoch in range(1, num_epoch + 1):
    reward, port_vals,balances,ports,actions = train_epoch(model,env,features,price.values[:,:], optimizer, epoch)
    rewards.append(reward)
    if reward > max_reward:
        max_reward = reward
        joblib.dump(model, file_name)


In [None]:
model_key = "A2Cmodel"
file_name = qb.ObjectStore.GetFilePath(model_key)
joblib.dump(model, file_name)

In [None]:
model_key = "A2Cmodel"
file_name = qb.ObjectStore.GetFilePath(model_key)

In [None]:
loaded_model = joblib.load(file_name)
loaded_model

In [None]:
env = StockTradingEnv(features=features[:,:,:lookback_window_size],price=price.values[lookback_window_size,:],end_step=2014,lookback_window_size=lookback_window_size)


In [None]:
state = env.reset()
state_tensor = torch.FloatTensor(state).unsqueeze(0)
hidden_actor = None
hidden_critic = None
action_mean, state_value, hidden_actor, hidden_critic = loaded_model(state_tensor, hidden_actor, hidden_critic)