In [1]:
from scipy.stats import norm
import pandas as pd
import matplotlib.pyplot as plt
import time
import os 

#crt_dir = os.path.abspath("")
#data_dir = os.path.abspath("data")

agent_list = [.05, .10, .20, .50, .80, .90]
path = './data/Monthly_Excess_Return_Rates.csv'

def data_load(path): 
    '''
    Input: 
    path of the data (Monthly_Excess_Return_Rates.csv)
    Output: df
    '''
    all_data = pd.read_csv(path)
    return all_data
df = data_load(path)
df.head()
#df.tail()

Unnamed: 0,Trdmnt,600000,600009,600010,600015,600016,600019,600028,600029,600030,...,601398,601600,601601,601628,601766,601857,601899,601939,601988,601998
0,2009-12-01,-0.016842,0.137451,-0.02911,0.096287,0.025417,0.180519,0.155909,0.00983,0.08653,...,0.038297,-0.008037,0.040879,0.034442,-0.030866,0.024128,-0.037856,0.041989,0.049115,0.268206
1,2010-01-01,-0.097292,0.063012,-0.133322,-0.134706,-0.087823,-0.217177,-0.191352,0.003094,-0.111708,...,-0.110312,-0.124869,-0.139249,-0.148274,-0.044035,-0.055402,-0.14086,-0.089093,-0.054974,-0.159815
2,2010-02-01,0.055229,0.02995,0.027921,0.016714,0.011975,0.05883,-0.01324,0.085172,-0.035802,...,0.002268,-0.002644,0.072805,-0.004444,0.018327,-0.007972,0.02465,-0.005396,0.0079,0.00103
3,2010-03-01,0.096505,-0.033726,0.017421,0.166786,0.047257,-0.021756,0.037117,0.138627,0.038408,...,0.020731,0.010762,0.134986,0.053741,-0.001856,-0.014164,-0.039415,0.005249,0.034376,0.055698
4,2010-04-01,-0.097993,-0.136233,-0.075142,-0.009656,-0.088982,-0.127491,-0.142521,0.116025,0.016441,...,-0.090209,-0.105439,-0.063708,-0.068921,-0.045021,-0.059488,-0.025027,-0.07593,-0.025166,-0.177366


In [2]:
def bayesFormula(returnR, var, a, b):
    n = len(returnR)
    coeff = b**2/(var+n*b**2)
    sigma2 = coeff*var
    miu = (1-n*coeff)*a+coeff*sum(returnR)
    return miu, sigma2

x = [12, -4, -6, -12]
print(bayesFormula(x, var=1, a=0, b=1))

(-2.0, 0.2)


In [9]:
def bayesPredict(stocks, agent_type, batch_size=10, relaxCoeff=8):
    '''
    This function is used to predict "next-month" return.
    The prior and likelihood are both NORMAL distribution model.
    Prior model has informative parameter settings (based on the stock history)
    its MEAN = Historical mean rate of return; its VARIANCE =  relaxCoeff * Sample VAR
    This is a meaningful setting, if we believe stock market follows a MEAN-REVERTING process. 

    Input: 
    stocks df: monthly excess return from 2010 to 2022
    agent_type: float in (0,1); near 0 -> more conservative for stock return; near 1 -> more aggressive
    batch_size: we use this number of months as Bayes observations
    relaxCoeff: int; bigger -> prediction emphasizes more on the recent batch; smaller -> more stable around historical mean
    Output: predicted df 
    '''
    
    # Suppose our rolling-horizon prediction starts at:
    starting_date = '2017-01-01'
    # We will use sample variance before this date as "b" in bayes formula.
    date_list = list(stocks['Trdmnt'])
    starting_idx = date_list.index(starting_date)
    stock_list = []

    for name in stocks:
        if name.isdigit():
            stock_list.append(name)
    predict_df = pd.DataFrame(stocks, columns=stock_list)
    timeLine = pd.to_datetime(stocks['Trdmnt'])
    predict_df.index = timeLine

    for name in stock_list:
        returnR = stocks[name]
        hist_R = returnR[:starting_idx]
        B = hist_R.var()
        A = hist_R.mean()
        # For every month after starting date, we use Bayes predict. 
        # We only use "batch_size" monthly data for prediction this step.
        # print('History sample mean:', A)
        for seq_num in range(starting_idx, len(date_list)):
            ref_batch = returnR[seq_num-batch_size:seq_num]
            good = ref_batch[ref_batch==ref_batch]
            var = good.var()
            miu, sigma2 = bayesFormula(returnR=good, var=var, a=A, b=relaxCoeff*B)
            percentile_R = norm.ppf(q=agent_type, loc=miu, scale=(sigma2**0.5))
            if percentile_R < -1 or percentile_R > 1:
                print('Strange Prediction!')
            predict_df.loc[timeLine[seq_num], name] = percentile_R

            #print(date_list[seq_num], 'Bayes predicted \mu', round(miu,5), \
            #    'Actual return is', round(returnR[seq_num],5), \
            #    'Mean return in last batch', round(good.mean(),5))  
        # print(len(predict_dict[name]), (len(date_list)-starting_idx))

    #print(predict_df.tail())
    #print(stocks.tail())
    return predict_df

agent_risk_coeff = 0.2
predict_df = bayesPredict(df, agent_risk_coeff)
predict_df.tail()
predict_df.to_csv('BayesPredict'+str(agent_risk_coeff)+'.csv')

0     2009-12-01
1     2010-01-01
2     2010-02-01
3     2010-03-01
4     2010-04-01
         ...    
151   2022-07-01
152   2022-08-01
153   2022-09-01
154   2022-10-01
155   2022-11-01
Name: Trdmnt, Length: 156, dtype: datetime64[ns]
