In [20]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [21]:
from ta import add_all_ta_features
import yfinance as yf

In [22]:
import gym 
from stable_baselines3.common.vec_env import DummyVecEnv
from backtesting import Backtest, Strategy
from stable_baselines3 import A2C, DDPG, DQN, PPO

In [23]:
import backtesting
from backtesting import Backtest, Strategy

backtesting.set_bokeh_output(notebook=False)

## Loads Stocks data
1. Apple Inc. (AAPL)
2. Microsoft Corp. (MSFT)
3. Amazon.com, Inc. ( AMZN)
4. Tesla, Inc. (TSLA)
5. Nvidia Corp. (NVDA)

In [24]:
aapl_df = yf.Ticker("AAPL").history(interval= "1d",start="2018-01-01", end="2022-12-31")
msft_df = yf.Ticker("MSFT").history(interval= "1d",start="2018-01-01", end="2022-12-31")
amzn_df = yf.Ticker("AMZN").history(interval= "1d",start="2018-01-01", end="2022-12-31")
tsla_df = yf.Ticker("TSLA").history(interval= "1d",start="2018-01-01", end="2022-12-31")
nvda_df = yf.Ticker("NVDA").history(interval= "1d",start="2018-01-01", end="2022-12-31")


cleaning data

In [25]:
def process_input(dataframe):
    if (dataframe.index.name == 'Date'):
        dataframe = dataframe.reset_index()

    dataframe['Date'] = pd.to_datetime(dataframe['Date'].dt.date)
    dataframe = dataframe.set_index('Date')
    return dataframe


In [26]:
aapl_df = process_input(aapl_df)
msft_df = process_input(msft_df)
amzn_df = process_input(amzn_df)
tsla_df = process_input(tsla_df)
nvda_df = process_input(nvda_df)


In [27]:
print("null of AAPL is", aapl_df.isnull().sum().sum())
print("null of MSFT is", msft_df.isnull().sum().sum())
print("null of AMZN is", amzn_df.isnull().sum().sum())
print("null of TSLA is", tsla_df.isnull().sum().sum())
print("null of NDVA is", nvda_df.isnull().sum().sum())

null of AAPL is 0
null of MSFT is 0
null of AMZN is 0
null of TSLA is 0
null of NDVA is 0


In [28]:
new_aapl_df = aapl_df.copy()
new_aapl_df["tic"] = "AAPL"

msft_df_df = msft_df.copy()
msft_df_df["tic"] = "MSFT"

mixed_df = pd.concat([new_aapl_df, msft_df_df])

In [29]:
mixed_df.loc['2018-01-02']['Open']/mixed_df.loc['2018-01-03']['Open']

Date
2018-01-02   NaN
2018-01-02   NaN
2018-01-03   NaN
2018-01-03   NaN
Name: Open, dtype: float64

## Portfolio performance
The performance of the portfolio can be indicated by many aspects: sharpe, daily rate of return, cumulative return, and varience

### DRR
The Daily rate of return can be computed by

$$ DRR = { Price_{today} - Price_{prevday}} $$

In [30]:
# use DRR column
def portfolio_daily_rate_of_return(portfolio_df, initial_amount):
    if not ('DRR' in portfolio_df):
        portfolio_df['DRR'] = np.nan
    portfolio_df['DRR'] = ( portfolio_df['Sum'] - np.roll(portfolio_df['Sum'], shift=1))/portfolio_df['Sum']
    portfolio_df.loc[portfolio_df.index[0], 'DRR']= (portfolio_df.loc[portfolio_df.index[0], 'Sum'] -initial_amount) /initial_amount
    return portfolio_df 


### CRR
The Cumulative rate or the sum of the DRR can be computed by

$$ CRR = { Price_{today} - Price_{init}} $$

In [31]:
# use CRR column
def portfolio_cumulative_rate_of_return(portfolio_df, initial_amount):
    if not ('DRR' in portfolio_df):
        portfolio_df = portfolio_daily_rate_of_return(portfolio_df, initial_amount)
    if not ('CRR' in portfolio_df):
        portfolio_df['CRR'] = np.nan
    
    portfolio_df['CRR'] = ( portfolio_df['Sum'] - initial_amount)/initial_amount

    return portfolio_df 

### Varience
The Varience can be computed by using the following equation

$$ Var = { ( DRR - E_{DRR} )^2 \over Time} $$

In [32]:
def portfolio_varience(portfolio_df):
    drr_sum = portfolio_df['DRR'].sum()/portfolio_df.__len__()

    var = np.square(portfolio_df['DRR'] - drr_sum).sum()/portfolio_df.__len__()
    return var

### Sharpe ratio
The ratio can be computed by using the following equation

$$ Sharpe = { R_{portfolio} - R_{riskfree} \over \sigma_{portfolio}} $$

In [33]:
# compute the portfolio sharpe ratio
def portfolio_sharpe_ratio(portfolio_df, port_sd, riskfree=0.0151, at_index=-1):
    asset_return = portfolio_df.loc[portfolio_df.index[at_index], 'CRR']
    return (asset_return - riskfree)/port_sd

## The following code are the implementation of two famous portfolio balancing methods
- Constance rebalance
- Buy and Hold

In [34]:
class ConstanceBalancingOnTime:
    def __init__(self, 
                df,
                holding_ratio,
                rebalance_time,
                initial_amount,
                transaction_cost_pct,
                lookback=252,
                day = 0):
        self.df = df
        self.rebalance_time = rebalance_time
        self.holding_ratio = np.array(holding_ratio)
        self.initial_amount = initial_amount
        self.transaction_cost_pct = transaction_cost_pct
        self.lookback = lookback
        self.day = day

        self.ticker_list = df["tic"].unique()
        self.stock_count = len(self.ticker_list)
        # the first one is cash on hand followed by the stock in the tickers
        self.asset_amount = self.holding_ratio * initial_amount

        self.portfolio_memory = self._create_initial_memory()
        self._write_memory_at_index(0)

        #for keeping matric
        self.metric = {}

        

    def step(self):
        # incase 
        self.day += 1
        # if (self.day == 1):
        #     current_value = self.initial_amount / (self.transaction_cost_pct * self.holding_ratio[1:].sum() + self.initial_amount)
        #     self.asset_amount = current_value * self.holding_ratio
        #     return
        
        for index in range(self.stock_count):
            try:
                self.asset_amount[index+1] *= self._update_assetprice_by_ratio(int(self.day), self.ticker_list[index])
            except:
                print("error at day",self.day, index)
        
        # rebalancing
        if (self.day % self.rebalance_time == 0):

            after_asset_amount = np.array([])
            
            current_asset_value = self.asset_amount.sum()
            current_asset_ratio = self.asset_amount/current_asset_value

            #check the sign of asset allowcation
            asset_adapt_sign = self.holding_ratio >= current_asset_ratio
                
            asset_matrix = np.array([np.append(self.holding_ratio.copy(), 0)])
            
            for i in range(self.stock_count+1): 
                # the amount of add in that we will add into the asset line
                add_in_asset_line = 1
                if ((self.holding_ratio == current_asset_ratio)[i] ):
                    pass
                elif (asset_adapt_sign[i] ):
                    add_in_asset_line += self.transaction_cost_pct
                else:
                    add_in_asset_line -= self.transaction_cost_pct
                # print(add_in_asset_line)
                current_asset_line = np.zeros(self.stock_count+2)
                current_asset_line[i] = add_in_asset_line
                current_asset_line[-1] = 1
                # print(asset_matrix.shape, current_asset_line.shape)
                asset_matrix = np.append(asset_matrix.copy(), np.array([current_asset_line.copy()]), axis=0)
            asset_matrix = asset_matrix.T
            
            modify_amount = np.linalg.solve(asset_matrix, np.append(self.asset_amount, 0))[1:]
            self.asset_amount = self.asset_amount - modify_amount
        
        # write the allocation to the memory df
        self._write_memory_at_index(self.day)

    def run(self):
        time_range = self.df.__len__() / self.stock_count
        for i in range(int(time_range)-1):
            self.step()

    def reset(self):
        self.day = 0
        self.asset_amount = self.holding_ratio * self.initial_amount

        self.portfolio_memory = self._create_initial_memory()
        self._write_memory_at_index(0)

    def get_asset_amount(self):
        return self.asset_amount.sum()
    
    def add_return(self, method):
        self.portfolio_memory = method(self.portfolio_memory, self.initial_amount)

    def add_metric(self, new_matric, new_matric_name, **kwargs):
        matric_res = new_matric(self.portfolio_memory, **kwargs)
        self.metric.update({new_matric_name: matric_res}) 

    def _create_initial_memory(self):
        col = np.append(np.array(['Cash']), self.ticker_list)
        col = np.append(col, np.array(['Sum']))
        initial_memory = pd.DataFrame(index=self.df.index.unique(), columns=col)
        
        return initial_memory
    
    def _update_assetprice_by_ratio(self, time_index: int, ticker: str):
        return self.df[self.df['tic'] == ticker].iloc[time_index]['Close']/self.df[self.df['tic'] == ticker].iloc[time_index-1]['Close']
    
    def _write_memory_at_index(self, index: int):
        self.portfolio_memory.iloc[index]['Cash'] = self.asset_amount[0]
        
        for stock_index in range(self.stock_count):
            current_ticker = self.ticker_list[stock_index]
            self.portfolio_memory.iloc[index][current_ticker] = self.asset_amount[stock_index+1]
        self.portfolio_memory.iloc[index]['Sum'] = self.asset_amount.sum()
    


### Constance rebalance

In [35]:
env_kwargs = {
  "holding_ratio": [0,0.5,0.5], 
  "rebalance_time": 60,
  "initial_amount": 1000000, 
  "transaction_cost_pct": 0.001
}

cr = ConstanceBalancingOnTime(df = mixed_df, **env_kwargs)


In [36]:
cr.reset()
cr.run()


In [37]:
print("rebalance time", cr.rebalance_time)
print("initial amount", cr.initial_amount)
print("tickers", cr.ticker_list)
print("stock count", cr.stock_count)


rebalance time 60
initial amount 1000000
tickers ['AAPL' 'MSFT']
stock count 2
asset amount [      0.         1492155.28292451 1649067.39751928]
asset 
            Cash           AAPL           MSFT             Sum
Date                                                         
2018-01-02  0.0       500000.0       500000.0       1000000.0
2018-01-03  0.0  499913.141309  502326.999487  1002240.140797
2018-01-04  0.0  502235.141869  506748.114247  1008983.256117
2018-01-05  0.0  507953.261255  513030.838047  1020984.099301
2018-01-08  0.0  506066.533757  513554.437737  1019620.971494


In [38]:
cr.add_return(portfolio_daily_rate_of_return)
cr.add_return(portfolio_cumulative_rate_of_return)

In [42]:
cr.add_metric(portfolio_varience, "varience")

In [43]:
env_kwargs = {
  "port_sd": np.sqrt(cr.metric['varience']), 
  "riskfree": 0.089,
}

cr.add_metric(portfolio_sharpe_ratio, "sharpe", **env_kwargs)

In [44]:
cr.metric

{'varience': 0.0003689325133336441, 'sharpe': 106.84421626591556}

### Buy and Hold

In [45]:
## let the time to be very high so the portfolio will not rebalance
env_kwargs = {
  "holding_ratio": [0,0.5,0.5], 
  "rebalance_time": 9999999999,
  "initial_amount": 1000000, 
  "transaction_cost_pct": 0.001
}

buyhold = ConstanceBalancingOnTime(df = mixed_df, **env_kwargs)


In [46]:
buyhold.reset()
buyhold.run()

In [47]:
print("rebalance time", buyhold.rebalance_time)
print("initial amount", buyhold.initial_amount)
print("tickers", buyhold.ticker_list)
print("stock count", buyhold.stock_count)


rebalance time 9999999999
initial amount 1000000
tickers ['AAPL' 'MSFT']
stock count 2


In [52]:
buyhold.add_return(portfolio_daily_rate_of_return)
buyhold.add_return(portfolio_cumulative_rate_of_return)
buyhold.add_metric(portfolio_varience, "varience")
env_kwargs = {
  "port_sd": np.sqrt(cr.metric['varience']), 
  "riskfree": 0.089,
}

buyhold.add_metric(portfolio_sharpe_ratio, "sharpe", **env_kwargs)

In [53]:
buyhold.metric

{'varience': 0.00036842913759998504, 'sharpe': 103.02625380634015}