# Prior Work

In [1]:
import sqlite3
import pandas as pd
import numpy as np
from dataclasses import dataclass
import digital_twin

def pull_pure_returns(con):
    return pd.read_sql("SELECT * FROM pure_returns", con)

def pull_prices(con):
    return pd.read_sql("SELECT * FROM prices", con)

def pull_trades(con):
    return pd.read_sql("SELECT * FROM trades", con)

def process_pure_returns(pure_returns_data):
    pure_returns_data = pure_returns_data.set_index('t')
    pure_returns_data = pure_returns_data.sort_index()
    return pure_returns_data

def process_prices(prices_data):
    prices_data = prices_data.set_index('t')
    prices_data = prices_data.sort_index()
    return prices_data

def process_trades(trades_data):
    trades_data = trades_data.rename(columns = {"time": "t"})
    trades_data["had_trade"] = True
    trades_data = trades_data.pivot("t", "trade", "had_trade")
    trades_data = trades_data.fillna(False)
    return trades_data

def aggregate_pull(con):
    pure_returns_data = pull_pure_returns(con)
    prices_data = pull_prices(con)
    trades_data = pull_trades(con)

    pure_returns_data = process_pure_returns(pure_returns_data)
    prices_data = process_prices(prices_data)
    trades_data = process_trades(trades_data)
    
    data = {"pure_returns": pure_returns_data,
           "prices_data": prices_data,
           "trades_data": trades_data}
    
    return data

def compute_input_data(data):
    pure_returns_data = data["pure_returns"].copy()
    prices_data = data["prices_data"].copy()
    trades_data = data["trades_data"].copy()
    
    #Grab the starting state
    starting_state = prices_data.iloc[0]
    prices_data = prices_data.iloc[1:]
    prices_data.index = prices_data.index - 1
    
    #Combine data
    historical_data = pd.concat([pure_returns_data, prices_data, trades_data], axis=1)
    historical_data[["Arbitrage", "Momentum Buy", "Momentum Sell"]] = historical_data[["Arbitrage", "Momentum Buy", "Momentum Sell"]].fillna(False)
    
    input_data = historical_data[["index_return", "basket_return"]]
    output_data = historical_data[["index_price", "basket_price", "Arbitrage", "Momentum Buy", "Momentum Sell"]]
    
    out = {"starting_state": starting_state,
          "historical_data": historical_data,
          "input_data": input_data,
          "output_data": output_data}
    
    
    return out

share_price = float
percentage_return = float
trade_action = bool

@dataclass
class Prices():
    index_price: share_price
    basket_price: share_price
        
@dataclass
class Returns():
    index_return: percentage_return
    basket_return: percentage_return
        
@dataclass
class Trades():
    arbitrage: trade_action
    momentum_buy: trade_action
    momentum_sell: trade_action
        
        
def map_price(data):
    return Prices(index_price = data["index_price"],
                 basket_price = data["basket_price"])

def map_returns(data):
    return Returns(index_return = data["index_return"],
    basket_return = data["basket_return"])

def map_trades(data):
    return Trades(arbitrage = data["Arbitrage"],
                 momentum_buy = data["Momentum Buy"],
                 momentum_sell = data["Momentum Sell"])

def format_inputs(inputs):
    inputs_f = {}
    
    starting_state = inputs["starting_state"].copy()
    starting_state = map_price(starting_state)
    inputs_f["starting_state"] = starting_state
    
    historical_data = inputs["historical_data"].copy()
    historical_data["returns"] = historical_data.apply(lambda x: map_returns(x),axis=1)
    historical_data["prices"] = historical_data.apply(lambda x: map_price(x),axis=1)
    historical_data["trades"] = historical_data.apply(lambda x: map_trades(x),axis=1)
    historical_data = historical_data[["returns", "prices", "trades"]]
    inputs_f["historical_data"] = historical_data
    
    input_data = inputs['input_data'].copy()
    input_data["returns"] = input_data.apply(lambda x: map_returns(x),axis=1)
    input_data = input_data[["returns"]]
    inputs_f["input_data"] = input_data
    
    output_data = inputs['output_data'].copy()
    output_data["prices"] = output_data.apply(lambda x: map_price(x),axis=1)
    output_data["trades"] = output_data.apply(lambda x: map_trades(x),axis=1)
    output_data = output_data[["prices", "trades"]]
    inputs_f["output_data"] = output_data
    
    return inputs_f

class ArbitrageDataPipeline(digital_twin.DataPipeline):
    def pull_historical_data(self):
        con = sqlite3.connect('arb.db')
        return aggregate_pull(con)
    
    def compute_input_data(self, data):
        return compute_input_data(data)
    
    def format_input_data(self, data):
        return format_inputs(data)

class ArbitrageDigitalTwin(digital_twin.DigitalTwin):
    def load_data_initial(self):
        self.historical_data = self.data_pipeline.pull_historical_data()
        
        self.historical_data["pure_returns"].to_csv("pure_returns.csv")
        self.historical_data["prices_data"].to_csv("prices_data.csv")
        self.historical_data["trades_data"].to_csv("trades_data.csv")
    
    def load_data_prior(self):
        self.historical_data = {}
        
        self.historical_data["pure_returns"] = pd.read_csv("pure_returns.csv", index_col = 0)
        self.historical_data["prices_data"] = pd.read_csv("prices_data.csv", index_col = 0)
        self.historical_data["trades_data"] = pd.read_csv("trades_data.csv", index_col = 0)

In [3]:
from model.run import load_config
from model.run import run, postprocessing



TestDataPipeline = ArbitrageDataPipeline()
arb_dt = ArbitrageDigitalTwin(name = "Test",
                    data_pipeline = TestDataPipeline)
arb_dt.load_data_initial()
arb_dt.compute_input_data()
starting_state = arb_dt.input_data["starting_state"]
starting_state = {"prices": starting_state,
                 "trades": None}
input_data = arb_dt.input_data["input_data"]
exp = load_config(monte_carlo_runs = 1,
            timesteps = 100,
            params = {"input_data": [input_data]},
            initial_state = starting_state)
raw = run(exp)
processed = postprocessing(raw)


                  ___________    ____
  ________ __ ___/ / ____/   |  / __ \
 / ___/ __` / __  / /   / /| | / / / /
/ /__/ /_/ / /_/ / /___/ ___ |/ /_/ /
\___/\__,_/\__,_/\____/_/  |_/_____/
by cadCAD

cadCAD Version: 0.4.28
Execution Mode: local_proc
Simulation Dimensions:
Entire Simulation: (Models, Unique Timesteps, Params, Total Runs, Sub-States) = (1, 100, 1, 1, 2)
     Simulation 0: (Timesteps, Params, Runs, Sub-States) = (100, 1, 1, 2)
Execution Method: local_simulations
Execution Mode: single_threaded
Total execution time: 0.01s
