In [1]:
import pandas as pd
import os
import numpy as np
import random
from arch.unitroot import PhillipsPerron
from statsmodels.tsa.vector_ar.vecm import VAR
from statsmodels.tsa.stattools import adfuller, coint
from utils import Utils as Ut
from config import Config, logger
from main import INITIAL_INVESTMENT
from trade import Trade

In [None]:
config = Config()
path = Config.get_path_to_data

In [None]:
ticker1 = "AZN"
ticker2 = "PFE"
start_time = "2004-06-16"
end_time = "2010-06-16"
invest = 1000

In [None]:
'''1. Load data'''

pair_paths = Ut.get_data(ticker1, ticker2, path)
unclean_price_series1, unclean_price_series2 = Ut.open_pair_data(ticker1, ticker2, pair_paths)

In [None]:
'''2. Clean data'''

price_series1_whole = Ut.clean_df(unclean_price_series1)
price_series2_whole = Ut.clean_df(unclean_price_series2)
price_series1_whole_nodiff = Ut.clean_df_nodiff(unclean_price_series1)
price_series2_whole_nodiff =  Ut.clean_df_nodiff(unclean_price_series2)

price_series1 = Ut.create_training_set(price_series1_whole, start_time, end_time) # Needs to be adjusted after every run. Simply add on the forecast period to this
price_series2 = Ut.create_training_set(price_series2_whole, start_time, end_time) # Needs to be adjusted after every run. Simply add on the forecast period to this


In [None]:
'''3. Cointegration test'''

coint_ratio = Ut.johansen_cointegration(price_series1, price_series2)

logger.info(f"Cointegration test with window")

In [None]:

'''4. Unit root tests'''

#ADF
result = adfuller(price_series1)
result2 = adfuller(price_series2)

# print("p val", result[1])
# print("p val", result2[1])
# p val 1.6410552858534234e-21
# p val 2.0682275928364335e-10

#Phillips Perron
pp1 = PhillipsPerron(price_series1, trend='n')
# print(ticker1 + "...\n", str(pp1.summary()), "\n")
pp2 = PhillipsPerron(price_series2, trend='n')
# print(ticker2 + "...\n", str(pp2.summary()))

In [None]:
'''5. Picking appropriate Lag order'''

lag_candidates = set()

merged_series = Ut.create_merged_series(price_series1, price_series2, ticker1, ticker2)

#Pearson correlation
lags_to_check = 40
pearson_results = Ut.pearson_corr_lags(merged_series, lags_to_check)
lag_candidates.add(key for key in pearson_results.keys())

#VAR tests
result1, result2 = Ut.test_lag_orders_var(merged_series)
result_intersect = np.intersect1d(result1, result2)
lag_candidates.add(result for result in result_intersect)

logger.info(f"Lag candidates: {lag_candidates}")

lag_order = random.choice(list(lag_candidates)) #Needs to be changed lol. For testing purposes only.

logger.info(f"Chose lag order: {lag_order}")


In [None]:

'''6. Forecast'''
forecast_days = lag_order
forecasted_period = Ut.get_forecast_dates(forecast_days, end_time)
actual1 = Ut.reveal_actual(price_series1_whole_nodiff, forecasted_period['start'], forecasted_period['end'])
actual2 = Ut.reveal_actual(price_series2_whole_nodiff, forecasted_period['start'], forecasted_period['end'])
price_series_whole = Ut.create_merged_series(price_series1_whole, price_series2, ticker1="AZN (whole)", ticker2="PFE (whole)")
forecasted = Ut.forecasted(lag_order, merged_series, forecast_days)

actual_final = Ut.create_merged_series(actual1, actual2, ticker1, ticker2)
forecasted_final = Ut.reverse_differencing(forecasted, price_series_whole, ticker1, ticker2)

In [None]:
'''Find historical difference between the two stocks...'''

price_series1_1 = Ut.create_training_set(price_series1_whole_nodiff, start_time, end_time)
price_series2_2 = Ut.create_training_set(price_series1_whole_nodiff, start_time, end_time)

historical_differences = Ut.find_historical_differences(price_series1_1, price_series2_2)

std = Ut.calc_lag_std(historical_differences, lag_order)

In [None]:
'''7. Plan trades'''

# Buy signal - If asset 1 is predicted to increase in price, whilst the other is predicted to decrease. Then go long.
# Sell signal - If asset 2 is predicted to fall in price, whilst the other is predicted to increase. Then short it.
# Basically if price of both stocks are predicted to diverge in opposite directions.
# Represents the average distance between each value and the mean

std_volatility1 = Ut.calc_lag_std(price_series1, lag_order)
std_volatility2 = Ut.calc_lag_std(price_series2, lag_order)

# trade = Trade(ticker1, ticker2)

''' 
3 indicators to consider...
 1. The historical differences between the two stocks and the standard deviation of these differences with a specific lag order
 2. Stock1s standard deviation with a specific lag order
 3. Stock2s standard deviation with a specific lag order
 
A - Look at the forecasted period for both stocks and see what the difference is between both stocks and see if it is above or below indicator 1.
B - Look at either stock and see if it matches its own historical deviation.

For either stock... (all relative to historicals)
Buy signal - If the difference for that period is very low, it suggests that stock 2 is outperforming stock 1. We bet on this eventually coming down. 
So long stock1 and short stock2
Hold - If the differnce for that period is close to 0, it suggests that it is a relatively stable period. So hold
Sell signal - If the difference for that period is very high, it suggests that stock 1 is outperforming stock2. We bet on this eventually coming down to 
historical norms. So short stock1 and long stock2
 '''

forecasted_differences = forecasted[f'{ticker1} (forecasted)'] - forecasted[f'{ticker2} (forecasted)']

forecasted_mean = np.mean(forecasted_differences)





# for date, diff_val in enumerate(forecasted[ticker1]):
#     trade.update_stock_price(actual_final[ticker1][date], actual_final[ticker2][date])
#     # If the difference gets too high between the two stocks, we bet it on going back down by shorting 