# Homework 4 - Trade Flow
By: Alex Huang

Date: February 8, 2024

UChicago ID: 12408153

## Imports and Config

In [2]:
# Operating
import os
import shutil
import platform
import datetime

# Plotting
import matplotlib.pyplot as plt
import seaborn as sns

# Data
import statsmodels.api as sm
import pandas as pd
import numpy as np
import scipy as sp

# Typing
from typing import Union

Unnamed: 0,Ask1PriceMillionths,Bid1PriceMillionths,Ask1SizeBillionths,Bid1SizeBillionths,Ask2PriceMillionths,Bid2PriceMillionths,Ask2SizeBillionths,Bid2SizeBillionths,received_utc_nanoseconds,timestamp_utc_nanoseconds,Mid
0,22972550000,22970150000,210000000,25797600,22972560000,22970120000,210000000,87069610,1674521267750919800,1674521267806932000,2.297135e+10
1,22972550000,22970150000,410000000,25797600,22972560000,22970120000,210000000,87069610,1674521267751154000,1674521267807073000,2.297135e+10
2,22972550000,22970150000,410000000,25797600,22972560000,22970120000,210000000,87069610,1674521267752363000,1674521267813516000,2.297135e+10
3,22972540000,22970150000,210000000,25797600,22972550000,22970120000,410000000,87069610,1674521267763308000,1674521267819272000,2.297134e+10
4,22972540000,22970150000,210000000,25797600,22972550000,22970120000,410000000,87069610,1674521267764161000,1674521267825538000,2.297134e+10
...,...,...,...,...,...,...,...,...,...,...,...
10700407,22571340000,22569880000,85809220,100000000,22571350000,22569440000,155580,200000000,1674661801846230000,1674661801834922000,2.257061e+10
10700408,22571340000,22569880000,85809220,100000000,22571350000,22569440000,12801710,200000000,1674661801847329100,1674661801846675000,2.257061e+10
10700409,22571340000,22569880000,85809220,100000000,22571350000,22569440000,12801710,200000000,1674661801857751100,1674661801855921000,2.257061e+10
10700410,22571330000,22569880000,88607990,100000000,22571340000,22569440000,85809220,200000000,1674661801881766800,1674661801889084000,2.257060e+10


## Functions

In [39]:
class Compute:
    def lagged_tradeflow(timeseries: pd.DataFrame, volume_column_name: str, side_column_name: str, lag: int):
        past = (timeseries[volume_column_name] * timeseries[side_column_name])
        org_cumsum = past.cumsum()
        org_cumsum.name = 'cumsum'
        lagged_cumsum = org_cumsum.reindex(org_cumsum.index - lag).fillna(method='ffill')
        lagged_cumsum.index = org_cumsum.index
        lagged_cumsum.name = 'new_cumsum'
        volume_side = past.copy().cumsum()
        display("Vol * Side")
        volume_side.index = past.index
        display(pd.concat([past, volume_side], axis=1))
        past = past.reindex(timeseries.index - lag)
        past = past.fillna(method='ffill')
        display("Reindexed")
        volume_side.index = past.index
        display(pd.concat([past, volume_side], axis=1))
        past = past.cumsum()
        display("Cumsum")
        volume_side.index = past.index
        display(pd.concat([past, volume_side], axis=1))
        
        past.index = timeseries.index
        past.name = 'lagged_cumsum'

        new_timeseries = pd.concat([timeseries, org_cumsum, past, lagged_cumsum], axis=1)
        new_timeseries['diff'] = org_cumsum - past 
        return new_timeseries


# Table of Contents

## I. Introduction: Overview and Objective

## II. Gathering Data: Fetching, Filtering, Cleaning, and Configuration

In [3]:
book_btc_usd_df = pd.read_csv(r'./data/book_narrow_BTC-USD_2023.delim', sep='\t')
book_eth_btc_df = pd.read_csv(r'./data/book_narrow_ETH-BTC_2023.delim', sep='\t')
book_eth_usd_df = pd.read_csv(r'./data/book_narrow_ETH-USD_2023.delim', sep='\t')
trades_btc_usd_df = pd.read_csv(r'./data/trades_narrow_BTC-USD_2023.delim', sep='\t')
trades_eth_btc_df = pd.read_csv(r'./data/trades_narrow_ETH-BTC_2023.delim', sep='\t')
trades_eth_usd_df = pd.read_csv(r'./data/trades_narrow_ETH-USD_2023.delim', sep='\t')

In [4]:
train_split_portion = 0.4

# Splitting book_btc_usd_df
train_book_btc_usd_df = book_btc_usd_df.iloc[:int(book_btc_usd_df.shape[0] * train_split_portion)]
test_book_btc_usd_df = book_btc_usd_df.iloc[int(book_btc_usd_df.shape[0] * train_split_portion):]

# Splitting book_eth_btc_df
train_book_eth_btc_df = book_eth_btc_df.iloc[:int(book_eth_btc_df.shape[0] * train_split_portion)] 
test_book_eth_btc_df = book_eth_btc_df.iloc[int(book_eth_btc_df.shape[0] * train_split_portion):]

# Splitting book_eth_usd_df
train_book_eth_usd_df = book_eth_usd_df.iloc[:int(book_eth_usd_df.shape[0] * train_split_portion)] 
test_book_eth_usd_df = book_eth_usd_df.iloc[int(book_eth_usd_df.shape[0] * train_split_portion):]

# Splitting trades_btc_usd_df
train_trades_btc_usd_df = trades_btc_usd_df.iloc[:int(trades_btc_usd_df.shape[0] * train_split_portion)] 
test_trades_btc_usd_df = trades_btc_usd_df.iloc[int(trades_btc_usd_df.shape[0] * train_split_portion):]

# Splitting trades_eth_btc_df
train_trades_eth_btc_df = trades_eth_btc_df.iloc[:int(trades_eth_btc_df.shape[0] * train_split_portion)] 
test_trades_eth_btc_df = trades_eth_btc_df.iloc[int(trades_eth_btc_df.shape[0] * train_split_portion):]

# Splitting trades_eth_usd_df
train_trades_eth_usd_df = trades_eth_usd_df.iloc[:int(trades_eth_usd_df.shape[0] * train_split_portion)] 
test_trades_eth_usd_df = trades_eth_usd_df.iloc[int(trades_eth_usd_df.shape[0] * train_split_portion):]


In [7]:
train_book_eth_usd_df.head()

Unnamed: 0,Ask1PriceMillionths,Bid1PriceMillionths,Ask1SizeBillionths,Bid1SizeBillionths,Ask2PriceMillionths,Bid2PriceMillionths,Ask2SizeBillionths,Bid2SizeBillionths,received_utc_nanoseconds,timestamp_utc_nanoseconds,Mid
0,1629780000,1629670000,613579740,46580250,1629790000,1629660000,920369620,40425010,1674521275215127000,1674521271006006000,1629725000.0
1,1629780000,1629670000,613579740,46580250,1629790000,1629660000,920369620,125425010,1674521275215372000,1674521271008923000,1629725000.0
2,1629780000,1629670000,613579740,346580250,1629790000,1629660000,920369620,125425010,1674521275216940000,1674521271013030000,1629725000.0
3,1629780000,1629670000,613579740,334765040,1629790000,1629660000,920369620,125425010,1674521275236165000,1674521271071902000,1629725000.0
4,1629770000,1629670000,92158690,334765040,1629780000,1629660000,613579740,125425010,1674521275257733000,1674521271167792000,1629720000.0


In [6]:
train_trades_btc_usd_df.head()

Unnamed: 0,received_utc_nanoseconds,timestamp_utc_nanoseconds,PriceMillionths,SizeBillionths,Side
0,1674521267814309000,1674521267874527000,22970120000,87069600,-1
1,1674521267814046000,1674521267874527000,22970150000,25797600,-1
2,1674521267817981000,1674521267878712000,22970120000,10,-1
3,1674521267822734000,1674521267886114000,22969160000,217683140,-1
4,1674521274845338000,1674521268676444000,22969840000,10000000,1


In [None]:
train_trades_btc_usd_df 
train_trades_eth_btc_df
train_trades_eth_usd_df

In [13]:
data = {
    'time': [1,3,5,6,7,9,12],
    'vol':  [5,2,3,4,1,7,10],
    'side': [1,1,-1,1,1,-1,-1]
}
data = pd.DataFrame(data)
data = data.set_index('time')
data

Unnamed: 0_level_0,vol,side
time,Unnamed: 1_level_1,Unnamed: 2_level_1
1,5,1
3,2,1
5,3,-1
6,4,1
7,1,1
9,7,-1
12,10,-1


In [40]:
Compute.lagged_tradeflow(data, 'vol', 'side', 2)

'Vol * Side'

Unnamed: 0_level_0,0,1
time,Unnamed: 1_level_1,Unnamed: 2_level_1
1,5,5
3,2,7
5,-3,4
6,4,8
7,1,9
9,-7,2
12,-10,-8


'Reindexed'

Unnamed: 0_level_0,0,1
time,Unnamed: 1_level_1,Unnamed: 2_level_1
-1,,5
1,5.0,7
3,2.0,4
4,2.0,8
5,-3.0,9
7,1.0,2
10,1.0,-8


'Cumsum'

Unnamed: 0_level_0,0,1
time,Unnamed: 1_level_1,Unnamed: 2_level_1
-1,,5
1,5.0,7
3,7.0,4
4,9.0,8
5,6.0,9
7,7.0,2
10,8.0,-8


Unnamed: 0_level_0,vol,side,cumsum,lagged_cumsum,new_cumsum,diff
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,5,1,5,,,
3,2,1,7,5.0,5.0,2.0
5,3,-1,4,7.0,7.0,-3.0
6,4,1,8,9.0,7.0,-1.0
7,1,1,9,6.0,4.0,3.0
9,7,-1,2,7.0,9.0,-5.0
12,10,-1,-8,8.0,9.0,-16.0


## III.

## IV.

## V.