In [4]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
import seaborn as sns
import pickle

import matplotlib.pyplot as plt
plt.style.use("ggplot")
%config InlineBackend.figure_format='retina'

import QuantTrading.ImpactFitting as IF



## Data Acquisition
Data initilialised in this section are all for all dates, all stocks

In [5]:
def load_from_pickle(filename):
    path = '../pkl_dump/'
    with open(path + filename, 'rb') as f:
        return pickle.load(f)

# Load data
traded_volume_df = load_from_pickle('traded_volume_df.pkl')
px_df = load_from_pickle('px_df.pkl')
daily_stock_info_df = load_from_pickle('daily_stock_info_df.pkl')
monthly_scaling_factor = load_from_pickle('monthly_scaling_factor.pkl')
stocks = traded_volume_df.reset_index()["stock"].unique()

## Price Impact

There are several impact model to choose from: 1) naive OW 2) AFS 3) Ridge-style OW.

**DODGY!!!** R2 is maxed at a max half-life!! Bug or Why?

### 1) OW - `model_type = "linear"`

In [6]:
in_sample_month = 5
# half_life_list = np.arange(10800, 14401, 1800)
half_life_list = np.array([3600])
rsq_data = np.zeros((len(stocks), len(half_life_list)))

reg_summary_list = []
for i in range(len(half_life_list)):
    model_type = "linear"
    impact_px_df = IF.get_impact_state(traded_volume_df, monthly_scaling_factor, 
                                    half_life_list[i], model_type)
    reg_summary_temp = IF.get_regression_results(impact_px_df, px_df, 
                                            in_sample_month, explanation_horizon_periods=6)
    reg_summary_temp["half_life"] = half_life_list[i]
    
    reg_summary_list.append(reg_summary_temp[["beta_estimate", "alpha_estimate", "is_rsq", "oos_rsq", "half_life"]])

rsq_table = pd.concat(reg_summary_list).reset_index()

  cum_impact = pre_ewm.ewm(alpha=1-decay_factor, adjust=False, axis="columns").mean()  # Across columns


In [7]:
rsq_table.groupby("stock").max("is_rsq")

Unnamed: 0_level_0,beta_estimate,alpha_estimate,is_rsq,oos_rsq,half_life
stock,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
A,85.021725,0.000558,0.024063,-0.109038,3600
AAL,100.032251,0.000648,0.051204,0.068017,3600
AAP,125.527825,0.000554,0.061591,0.023734,3600
AAPL,277.467167,0.000482,0.157565,-0.127719,3600
ABBV,135.922632,0.000487,0.045509,0.120975,3600
ABC,99.9196,0.000599,0.025915,-0.009289,3600
ABMD,175.6807,0.000772,0.071054,-0.174724,3600
ABT,147.981265,0.000444,0.057783,-0.066515,3600
ACN,140.837356,0.000391,0.041441,-0.049647,3600
ADBE,135.727728,0.000515,0.029595,0.014924,3600


### 2) AFS - `model_type = "sqrt"`

In [82]:
in_sample_month = 3
# half_life_list = np.arange(10800, 14401, 1800)
half_life_list = [900, 1800, 3600, 7200, 14400]
rsq_data = np.zeros((len(stocks), len(half_life_list)))

reg_summary_list = []
for i in range(len(half_life_list)):
    model_type = "sqrt"
    impact_px_df = IF.get_impact_state(traded_volume_df, monthly_scaling_factor, 
                                    half_life_list[i], model_type)
    reg_summary_temp = IF.get_regression_results(impact_px_df, px_df, 
                                            in_sample_month, explanation_horizon_periods=6)
    reg_summary_temp["half_life"] = half_life_list[i]
    
    reg_summary_list.append(reg_summary_temp[["beta_estimate", "alpha_estimate", "is_rsq", "oos_rsq", "half_life"]])

rsq_table = pd.concat(reg_summary_list).reset_index()

  cum_impact = pre_ewm.ewm(alpha=1-decay_factor, adjust=False, axis="columns").mean()  # Across columns
  cum_impact = pre_ewm.ewm(alpha=1-decay_factor, adjust=False, axis="columns").mean()  # Across columns
  cum_impact = pre_ewm.ewm(alpha=1-decay_factor, adjust=False, axis="columns").mean()  # Across columns
  cum_impact = pre_ewm.ewm(alpha=1-decay_factor, adjust=False, axis="columns").mean()  # Across columns
  cum_impact = pre_ewm.ewm(alpha=1-decay_factor, adjust=False, axis="columns").mean()  # Across columns


### 3) Extended: Ridge-style

### Ridge-style OW

## Synthetic Alpha (Done)

In [3]:
import QuantTrading.SyntheticAlpha as SA
SA.get_synthetic_alpha(0.6, px_df, "AAPL")

(0.602489275982515, 3.2902960222108816e-07, 1.1906048425404063e-07)


  return synthetic_alphas.ewm(halflife=200, axis="columns").mean()


time,09:30:00,09:30:10,09:30:20,09:30:30,09:30:40,09:30:50,09:31:00,09:31:10,09:31:20,09:31:30,...,15:58:30,15:58:40,15:58:50,15:59:00,15:59:10,15:59:20,15:59:30,15:59:40,15:59:50,16:00:00
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2019-01-02,0.0,0.0,0.0,0.0,0.0,0.019565,0.033478,0.043736,0.051487,0.057435,...,0.013254,0.013218,0.013182,0.013148,0.013113,0.013079,0.013043,0.013005,0.012964,0.012919
2019-01-03,0.0,0.0,0.0,0.0,0.0,-0.014960,-0.025850,-0.034021,-0.040254,-0.045076,...,-0.021168,-0.021103,-0.021036,-0.020969,-0.020901,-0.020832,-0.020761,-0.020690,-0.020618,-0.020547
2019-01-04,0.0,0.0,0.0,0.0,0.0,-0.001033,-0.001778,-0.002314,-0.002649,-0.002784,...,0.008271,0.008261,0.008249,0.008233,0.008214,0.008192,0.008169,0.008144,0.008117,0.008089
2019-01-07,0.0,0.0,0.0,0.0,0.0,0.002612,0.004572,0.006128,0.007385,0.008480,...,0.002756,0.002742,0.002727,0.002711,0.002697,0.002684,0.002672,0.002661,0.002651,0.002641
2019-01-08,0.0,0.0,0.0,0.0,0.0,-0.003819,-0.006666,-0.008957,-0.010858,-0.012467,...,-0.000491,-0.000490,-0.000488,-0.000489,-0.000491,-0.000494,-0.000498,-0.000501,-0.000502,-0.000500
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2019-12-20,0.0,0.0,0.0,0.0,0.0,-0.000753,-0.001272,-0.001705,-0.002046,-0.002327,...,-0.002929,-0.002928,-0.002927,-0.002926,-0.002924,-0.002921,-0.002917,-0.002911,-0.002902,-0.002892
2019-12-23,0.0,0.0,0.0,0.0,0.0,0.001148,0.001805,0.002207,0.002464,0.002610,...,-0.004444,-0.004430,-0.004416,-0.004401,-0.004387,-0.004372,-0.004358,-0.004343,-0.004328,-0.004313
2019-12-24,0.0,0.0,0.0,0.0,0.0,-0.008748,-0.014998,-0.019684,-0.023354,-0.026336,...,-0.009143,-0.009114,-0.009085,-0.009057,-0.009028,-0.009000,-0.008971,-0.008942,-0.008911,-0.008880
2019-12-26,0.0,0.0,0.0,0.0,0.0,0.003550,0.005965,0.007713,0.009044,0.010116,...,0.006582,0.006563,0.006544,0.006524,0.006504,0.006483,0.006462,0.006440,0.006418,0.006396


## Optimal Trading Strategy (Not started, should be very short)

## Backtesting (Barely Started)

In future versions use `cum_impacts` as input

In [168]:
# def impact_adjusted_prices(pre_ewm, px_df, scaling_df, half_life, impact_coef_df, model_type):
#     cum_impacts = impact_state(pre_ewm, scaling_df, half_life, model_type).T
#     cum_returns = px_df.T / px_df.T.iloc[0, :] - 1
#     stock_date_df = cum_returns.T.iloc[:, 0].reset_index()
#     # stock_date_df["month"] = pd.to_datetime(stock_date_df["date"]).dt.month
#     stock_date_df.drop(["date"], axis="columns", inplace=True)
#     impact_coefficients = pd.merge(stock_date_df, impact_coef_df, on=["stock"], how="left")["beta_estimate"].values
#     cum_returns -= cum_impacts * impact_coefficients
#     adjusted_px_df = (px_df.T.iloc[0, :] * (cum_returns + 1)).T.reset_index()
#     return adjusted_px_df


## Performance Analysis (Need plots!)