# Backtest Model

With the data sorted out, we move on to creating the backtest model for the digital twin. We want to be able to accurately replicate the system.

1. Where would types best go
    - github submodules?
2. How might we want to organize things like loading the configs
3. Do we want to cut the psubs into something like meta block, backtest block, extrap block similar to how Danilo ahs done it?
    -Or a switch that says whether something is backtest only/extrap only/both
4. What is the overall structure we want to respect when it comes to this?


Define the types as a dimension, compose other models with shared dimensions

!! Send message to cadCAD channel to see if you can catch up!!

1. Build the baseline model abstract
2. Get a baseline explanation of things -> i.e. define types and use as submodule or something similar.

## Prior Work

The following prior work will be used for developing this part.

In [1]:
import sqlite3
import pandas as pd
import numpy as np
from dataclasses import dataclass
import digital_twin

def pull_pure_returns(con):
    return pd.read_sql("SELECT * FROM pure_returns", con)

def pull_prices(con):
    return pd.read_sql("SELECT * FROM prices", con)

def pull_trades(con):
    return pd.read_sql("SELECT * FROM trades", con)

def process_pure_returns(pure_returns_data):
    pure_returns_data = pure_returns_data.set_index('t')
    pure_returns_data = pure_returns_data.sort_index()
    return pure_returns_data

def process_prices(prices_data):
    prices_data = prices_data.set_index('t')
    prices_data = prices_data.sort_index()
    return prices_data

def process_trades(trades_data):
    trades_data = trades_data.rename(columns = {"time": "t"})
    trades_data["had_trade"] = True
    trades_data = trades_data.pivot("t", "trade", "had_trade")
    trades_data = trades_data.fillna(False)
    return trades_data

def aggregate_pull(con):
    pure_returns_data = pull_pure_returns(con)
    prices_data = pull_prices(con)
    trades_data = pull_trades(con)

    pure_returns_data = process_pure_returns(pure_returns_data)
    prices_data = process_prices(prices_data)
    trades_data = process_trades(trades_data)
    
    data = {"pure_returns": pure_returns_data,
           "prices_data": prices_data,
           "trades_data": trades_data}
    
    return data

def compute_input_data(data):
    pure_returns_data = data["pure_returns"].copy()
    prices_data = data["prices_data"].copy()
    trades_data = data["trades_data"].copy()
    
    #Grab the starting state
    starting_state = prices_data.iloc[0]
    prices_data = prices_data.iloc[1:]
    prices_data.index = prices_data.index - 1
    
    #Combine data
    historical_data = pd.concat([pure_returns_data, prices_data, trades_data], axis=1)
    historical_data[["Arbitrage", "Momentum Buy", "Momentum Sell"]] = historical_data[["Arbitrage", "Momentum Buy", "Momentum Sell"]].fillna(False)
    
    input_data = historical_data[["index_return", "basket_return"]]
    output_data = historical_data[["index_price", "basket_price", "Arbitrage", "Momentum Buy", "Momentum Sell"]]
    
    out = {"starting_state": starting_state,
          "historical_data": historical_data,
          "input_data": input_data,
          "output_data": output_data}
    
    
    return out

share_price = float
percentage_return = float
trade_action = bool

@dataclass
class Prices():
    index_price: share_price
    basket_price: share_price
        
@dataclass
class Returns():
    index_return: percentage_return
    basket_return: percentage_return
        
@dataclass
class Trades():
    arbitrage: trade_action
    momentum_buy: trade_action
    momentum_sell: trade_action
        
        
def map_price(data):
    return Prices(index_price = data["index_price"],
                 basket_price = data["basket_price"])

def map_returns(data):
    return Returns(index_return = data["index_return"],
    basket_return = data["basket_return"])

def map_trades(data):
    return Trades(arbitrage = data["Arbitrage"],
                 momentum_buy = data["Momentum Buy"],
                 momentum_sell = data["Momentum Sell"])

def format_inputs(inputs):
    inputs_f = {}
    
    starting_state = inputs["starting_state"].copy()
    starting_state = map_price(starting_state)
    inputs_f["starting_state"] = starting_state
    
    historical_data = inputs["historical_data"].copy()
    historical_data["returns"] = historical_data.apply(lambda x: map_returns(x),axis=1)
    historical_data["prices"] = historical_data.apply(lambda x: map_price(x),axis=1)
    historical_data["trades"] = historical_data.apply(lambda x: map_trades(x),axis=1)
    historical_data = historical_data[["returns", "prices", "trades"]]
    inputs_f["historical_data"] = historical_data
    
    input_data = inputs['input_data'].copy()
    input_data["returns"] = input_data.apply(lambda x: map_returns(x),axis=1)
    input_data = input_data[["returns"]]
    inputs_f["input_data"] = input_data
    
    output_data = inputs['output_data'].copy()
    output_data["prices"] = output_data.apply(lambda x: map_price(x),axis=1)
    output_data["trades"] = output_data.apply(lambda x: map_trades(x),axis=1)
    output_data = output_data[["prices", "trades"]]
    inputs_f["output_data"] = output_data
    
    return inputs_f

class ArbitrageDataPipeline(digital_twin.DataPipeline):
    def pull_historical_data(self):
        con = sqlite3.connect('arb.db')
        return aggregate_pull(con)
    
    def compute_input_data(self, data):
        return compute_input_data(data)
    
    def format_input_data(self, data):
        return format_inputs(data)

class ArbitrageDigitalTwin(digital_twin.DigitalTwin):
    def load_data_initial(self):
        self.historical_data = self.data_pipeline.pull_historical_data()
        
        self.historical_data["pure_returns"].to_csv("pure_returns.csv")
        self.historical_data["prices_data"].to_csv("prices_data.csv")
        self.historical_data["trades_data"].to_csv("trades_data.csv")
    
    def load_data_prior(self):
        self.historical_data = {}
        
        self.historical_data["pure_returns"] = pd.read_csv("pure_returns.csv", index_col = 0)
        self.historical_data["prices_data"] = pd.read_csv("prices_data.csv", index_col = 0)
        self.historical_data["trades_data"] = pd.read_csv("trades_data.csv", index_col = 0)


In [2]:
TestDataPipeline = ArbitrageDataPipeline()
arb_dt = ArbitrageDigitalTwin(name = "Test",
                    data_pipeline = TestDataPipeline)
arb_dt.load_data_initial()
arb_dt.compute_input_data()
starting_state = arb_dt.input_data["starting_state"]
starting_state = {"prices": starting_state,
                 "trades": None}
input_data = arb_dt.input_data["input_data"]


In [3]:
from model.run import load_config

In [4]:
exp = load_config(monte_carlo_runs = 1,
            timesteps = 100,
            params = {"input_data": [input_data]},
            initial_state = starting_state)

In [5]:
from model.run import run, postprocessing
raw = run(exp)

processed = postprocessing(raw)


                  ___________    ____
  ________ __ ___/ / ____/   |  / __ \
 / ___/ __` / __  / /   / /| | / / / /
/ /__/ /_/ / /_/ / /___/ ___ |/ /_/ /
\___/\__,_/\__,_/\____/_/  |_/_____/
by cadCAD

cadCAD Version: 0.4.28
Execution Mode: local_proc
Simulation Dimensions:
Entire Simulation: (Models, Unique Timesteps, Params, Total Runs, Sub-States) = (1, 100, 1, 1, 2)
     Simulation 0: (Timesteps, Params, Runs, Sub-States) = (100, 1, 1, 2)
Execution Method: local_simulations
Execution Mode: single_threaded
Total execution time: 0.04s


In [6]:
raw

Unnamed: 0,prices,trades,simulation,subset,run,substep,timestep
0,"Prices(index_price=100.0, basket_price=100.0)",,0,0,1,0,0
1,"Prices(index_price=110.87138625391076, basket_...",,0,0,1,1,1
2,"Prices(index_price=112.53772595362811, basket_...",,0,0,1,1,2
3,"Prices(index_price=117.2461197668698, basket_p...",,0,0,1,1,3
4,"Prices(index_price=131.4369055087102, basket_p...",,0,0,1,1,4
...,...,...,...,...,...,...,...
96,"Prices(index_price=297.2265210849316, basket_p...",,0,0,1,1,96
97,"Prices(index_price=302.78180557011564, basket_...",,0,0,1,1,97
98,"Prices(index_price=332.78724444458226, basket_...",,0,0,1,1,98
99,"Prices(index_price=345.38104647048516, basket_...",,0,0,1,1,99


## For Itamar:

1. Start by mapping out from the "1 Set Up" notebook what the policies and state updates will end up being.
2. Begin iterating on the different policy and state updates outside of a cadCAD model first (unless you prefer working with them in a cadCAD model). I find it easier to debug this way.
3. Use only the input data within the model, and then use the output data to check that you have correctly created teh mechanisms.
4. For now you can just hard code the values for theta for trades, but eventually we will have it as a parameter

Starting state of the system:

In [7]:
starting_state = arb_dt.input_data["starting_state"]
print(starting_state)

Prices(index_price=100.0, basket_price=100.0)


Input data, which you can reference within the cadCAD model by grabbing the timestep and indexing into the array. In this case it is just return data.

In [8]:
input_data = arb_dt.input_data["input_data"]
print(input_data)

                                              returns
t                                                    
0   Returns(index_return=0.10871386253910743, bask...
1   Returns(index_return=0.015029483765100592, bas...
2   Returns(index_return=0.04183835929990093, bask...
3   Returns(index_return=0.12103416104564571, bask...
4   Returns(index_return=0.08280887550560694, bask...
..                                                ...
95  Returns(index_return=0.040580329256415186, bas...
96  Returns(index_return=0.018690406444573593, bas...
97  Returns(index_return=0.0990992137653999, baske...
98  Returns(index_return=0.037843403664469796, bas...
99  Returns(index_return=0.041954800849375494, bas...

[100 rows x 1 columns]


Output data, each timestep should be checking out to these values.

In [9]:
output_data = arb_dt.input_data["output_data"]
print(output_data)

                                               prices  \
t                                                       
0   Prices(index_price=110.87138625391076, basket_...   
1   Prices(index_price=112.53772595362811, basket_...   
2   Prices(index_price=120.17727276104154, basket_...   
3   Prices(index_price=138.09089885008862, basket_...   
4   Prices(index_price=153.26420217395852, basket_...   
..                                                ...   
95  Prices(index_price=282.98793555193873, basket_...   
96  Prices(index_price=295.48402246347314, basket_...   
97  Prices(index_price=314.3609940653123, basket_p...   
98  Prices(index_price=319.4994145870264, basket_p...   
99  Prices(index_price=329.1161770474364, basket_p...   

                                               trades  
t                                                      
0   Trades(arbitrage=False, momentum_buy=False, mo...  
1   Trades(arbitrage=False, momentum_buy=False, mo...  
2   Trades(arbitrage=False, moment