In [1]:
import polars as pl
import pandas as pd
import polars.datatypes as T
import numpy as np

from datetime import datetime, time
import sys
sys.path.insert(0, 'C:\\Users\\rockh\\Repositories\\Tsuro\\')

from tsuro.datasets import load_cba_trades
from tsuro.preprocessing import cast_strings_to_datetime

In [32]:
import polars.datatypes as T
from polars.testing import assert_frame_equal
from tsuro.data_engineering import ewmstd

volume_test = pl.Series(
    "volume_test", [150.1,16.1,2.2,5.2,25.4,40.000002]
)

test_df = pl.DataFrame({
    "class": [0,1,0,1,0,1],
    "hour": [12,10,13,10,12,10],
    "minute": [20,5,15,15,12,10],
    "volume": [150,16,2,5,25,40]
    }
).with_columns(
    pl.time(pl.col("hour"), pl.col("minute")).alias("time")
)

test_df = ewmstd(
        test_df,
        ewmstd_cols=["volume"],
        spans=[2, 3],
        partition_by="class",
        order_by="time",
    )

#test_df.sort(by = ["class", "time"])
test_df

class,hour,minute,volume,time,volume_ewmstd_span2,volume_ewmstd_span3
i64,i64,i64,i64,time,f64,f64
0,12,20,150,12:20:00,88.388348,87.372356
1,10,5,16,10:05:00,11.313708,7.406561
0,13,15,2,13:15:00,104.651804,86.312969
1,10,15,5,10:15:00,24.748737,20.209616
0,12,12,25,12:12:00,17.67767,11.572751
1,10,10,40,10:10:00,16.970563,23.126979


In [14]:
from tsuro.data_engineering import ewma
test_df = pl.DataFrame({
    "class": [1,1,1],
    "hour": [10,10,10],
    "minute": [5,10,15],
    "volume": [16,40,5]
    }
).with_columns(
    pl.time(pl.col("hour"), pl.col("minute")).alias("time")
)

test_df = ewma(
        test_df,
        ewma_cols=["volume"],
        spans=[3]
    )
test_df

class,hour,minute,volume,time,volume_ewma_span3
i64,i64,i64,i64,time,f64
1,10,5,16,10:05:00,2.285714
1,10,10,40,10:10:00,13.714286
1,10,15,5,10:15:00,16.571429


In [29]:
from tsuro.data_engineering import moving_average
test_df = pl.DataFrame({
    "class": [1,1,1],
    "hour": [10,10,10],
    "minute": [5,10,15],
    "volume": [16,40,5]
    }
).with_columns(
    pl.time(pl.col("hour"), pl.col("minute")).alias("time")
)

test_df = moving_average(
        test_df,
        moving_avg_cols =["volume"],
        weights = [1/7,2/7,4/7]
    )
test_df

class,hour,minute,volume,time,volume_ma
i64,i64,i64,i64,time,f64
1,10,5,16,10:05:00,2.285714
1,10,10,40,10:10:00,13.714286
1,10,15,5,10:15:00,16.571429


In [8]:
from tsuro.data_engineering import moving_variance

test_df = moving_variance(
    test_df,
    moving_var_cols = "volume",
    weights = [1/7,2/7,4/7],
    partition_by = "class",
    order_by = "time",
    unbiased = True,
    ddof = 1
)
test_df

class,hour,minute,volume,time,volume_ewmstd_span2,volume_ewmstd_span3,volume_mvar
i64,i64,i64,i64,time,f64,f64,f64
0,12,20,150,12:20:00,88.388348,87.372356,7633.928571
1,10,5,16,10:05:00,11.313708,7.406561,54.857143
0,13,15,2,13:15:00,104.651804,86.312969,7449.928571
1,10,15,5,10:15:00,24.748737,20.209616,408.428571
0,12,12,25,12:12:00,17.67767,11.572751,133.928571
1,10,10,40,10:10:00,16.970563,23.126979,534.857143


In [3]:
trades_df = load_cba_trades(lazy=False)
trades_df

Unnamed: 0_level_0,time,price,volume,dollar_value,market,condition
i64,str,f64,i64,f64,str,str
0,"""6/10/2021 17:00""",100.08,12904,1.2914e6,"""ASX""","""ET XT"""
1,"""6/10/2021 16:54""",100.08,19280,1929542.4,"""ASX""","""ET XT"""
2,"""6/10/2021 16:47""",100.3127,18517,1.8575e6,"""CXA""","""SX XT"""
3,"""6/10/2021 16:46""",100.08,5364,536829.12,"""CXA""",
4,"""6/10/2021 16:45""",100.08,24624,2.4644e6,"""ASX""","""ET XT"""
…,…,…,…,…,…,…
66572,"""6/10/2021 7:05""",95.01,6000,570060.0,"""ASX""","""EC XT"""
66573,"""6/10/2021 7:05""",94.01,1200,112812.0,"""ASX""","""EC"""
66574,"""6/10/2021 7:05""",94.01,18700,1.757987e6,"""ASX""","""EC XT"""
66575,"""6/10/2021 7:05""",94.0,1200,112800.0,"""ASX""","""EC"""


In [4]:
# Convert "Time" Column into Datetime Object
trades_df = cast_strings_to_datetime(trades_df, columns = ["time"])
trades_df = trades_df.rename(
   {"time": "date_time"}
)


# Grab Time Fields
trades_df = trades_df.with_columns(
    pl.col("date_time").dt.time().alias("time"),
    pl.col("date_time").dt.hour().alias("hour"),
    pl.col("date_time").dt.minute().alias("minute"),
    pl.col("date_time").dt.second().alias("second"),
    pl.col("date_time").dt.millisecond().alias("millisecond")
)

# Market Hours
trades_df = trades_df.filter(
    pl.col("time").is_between(
        time(9,0), time(17,0)
    )
)
trades_df

Unnamed: 0_level_0,date_time,price,volume,dollar_value,market,condition,time,hour,minute,second,millisecond
i64,datetime[μs],f64,i64,f64,str,str,time,i8,i8,i8,i32
0,2021-06-10 17:00:00,100.08,12904,1.2914e6,"""ASX""","""ET XT""",17:00:00,17,0,0,0
1,2021-06-10 16:54:00,100.08,19280,1929542.4,"""ASX""","""ET XT""",16:54:00,16,54,0,0
2,2021-06-10 16:47:00,100.3127,18517,1.8575e6,"""CXA""","""SX XT""",16:47:00,16,47,0,0
3,2021-06-10 16:46:00,100.08,5364,536829.12,"""CXA""",,16:46:00,16,46,0,0
4,2021-06-10 16:45:00,100.08,24624,2.4644e6,"""ASX""","""ET XT""",16:45:00,16,45,0,0
…,…,…,…,…,…,…,…,…,…,…,…
66537,2021-06-10 10:02:00,102.0,4,408.0,"""ASX""",,10:02:00,10,2,0,0
66538,2021-06-10 10:02:00,102.0,30,3060.0,"""ASX""",,10:02:00,10,2,0,0
66539,2021-06-10 10:02:00,102.0,215,21930.0,"""ASX""",,10:02:00,10,2,0,0
66540,2021-06-10 10:02:00,102.0,233,23766.0,"""ASX""",,10:02:00,10,2,0,0


In [5]:
# Sort by Time
trades_df = trades_df.sort(by = "time")
trades_df

Unnamed: 0_level_0,date_time,price,volume,dollar_value,market,condition,time,hour,minute,second,millisecond
i64,datetime[μs],f64,i64,f64,str,str,time,i8,i8,i8,i32
66070,2021-06-10 10:02:00,102.93,29,2984.97,"""ASX""",,10:02:00,10,2,0,0
66071,2021-06-10 10:02:00,102.93,7,720.51,"""ASX""",,10:02:00,10,2,0,0
66072,2021-06-10 10:02:00,102.93,7,720.51,"""ASX""",,10:02:00,10,2,0,0
66073,2021-06-10 10:02:00,102.94,7,720.58,"""ASX""",,10:02:00,10,2,0,0
66074,2021-06-10 10:02:00,102.94,7,720.58,"""ASX""",,10:02:00,10,2,0,0
…,…,…,…,…,…,…,…,…,…,…,…
4,2021-06-10 16:45:00,100.08,24624,2.4644e6,"""ASX""","""ET XT""",16:45:00,16,45,0,0
3,2021-06-10 16:46:00,100.08,5364,536829.12,"""CXA""",,16:46:00,16,46,0,0
2,2021-06-10 16:47:00,100.3127,18517,1.8575e6,"""CXA""","""SX XT""",16:47:00,16,47,0,0
1,2021-06-10 16:54:00,100.08,19280,1929542.4,"""ASX""","""ET XT""",16:54:00,16,54,0,0


In [6]:
trades_df = trades_df.with_columns(
    pl.col("date_time").rank(method = "ordinal").alias("rank")
)
trades_df = trades_df.with_columns(
    pl.when(
        pl.col("rank") % 2 == 0
    ).then(0).otherwise(1).alias("class")
)
trades_df

Unnamed: 0_level_0,date_time,price,volume,dollar_value,market,condition,time,hour,minute,second,millisecond,rank,class
i64,datetime[μs],f64,i64,f64,str,str,time,i8,i8,i8,i32,u32,i32
66070,2021-06-10 10:02:00,102.93,29,2984.97,"""ASX""",,10:02:00,10,2,0,0,1,1
66071,2021-06-10 10:02:00,102.93,7,720.51,"""ASX""",,10:02:00,10,2,0,0,2,0
66072,2021-06-10 10:02:00,102.93,7,720.51,"""ASX""",,10:02:00,10,2,0,0,3,1
66073,2021-06-10 10:02:00,102.94,7,720.58,"""ASX""",,10:02:00,10,2,0,0,4,0
66074,2021-06-10 10:02:00,102.94,7,720.58,"""ASX""",,10:02:00,10,2,0,0,5,1
…,…,…,…,…,…,…,…,…,…,…,…,…,…
4,2021-06-10 16:45:00,100.08,24624,2.4644e6,"""ASX""","""ET XT""",16:45:00,16,45,0,0,66538,0
3,2021-06-10 16:46:00,100.08,5364,536829.12,"""CXA""",,16:46:00,16,46,0,0,66539,1
2,2021-06-10 16:47:00,100.3127,18517,1.8575e6,"""CXA""","""SX XT""",16:47:00,16,47,0,0,66540,0
1,2021-06-10 16:54:00,100.08,19280,1929542.4,"""ASX""","""ET XT""",16:54:00,16,54,0,0,66541,1


In [7]:
trades_df.columns

['',
 'date_time',
 'price',
 'volume',
 'dollar_value',
 'market',
 'condition',
 'time',
 'hour',
 'minute',
 'second',
 'millisecond',
 'rank',
 'class']

In [8]:
df = pl.DataFrame({"time": ["10:00", "10:05", "10:10"], "price": [50.50, 60.10, 55.20], "volume": [120, 300, 200]})
df

time,price,volume
str,f64,i64
"""10:00""",50.5,120
"""10:05""",60.1,300
"""10:10""",55.2,200


In [9]:
# trades_df = trades_df.with_columns(
#     pl.lit(np.random.rand(trades_df.shape[0])).alias("random")
# )

# trades_df = trades_df.with_columns(
#     pl.col("random").round().alias("class")
# ).cast(
#     {"class": T.Int32}
# )

# trades_df

In [10]:
from tsuro.data_structures import StandardBars

bars = StandardBars(datetime_col = "date_time", price_col = "price", volume_col = "volume")

In [11]:
pdf = bars.create_volume_bars(
    trades_df
)
pdf

bar_index,open_price,high_price,low_price,close_price,datetime_start,datetime_end
i32,f64,f64,f64,f64,datetime[μs],datetime[μs]
15,100.3,100.33,99.97,100.0,2021-06-10 10:58:00,2021-06-10 11:03:00
30,99.93,99.95,99.64,99.65,2021-06-10 13:24:00,2021-06-10 13:36:00
27,99.92,99.98,99.77,99.91,2021-06-10 12:44:00,2021-06-10 12:55:00
3,102.5,102.59,102.01,102.07,2021-06-10 10:05:00,2021-06-10 10:10:00
24,100.205,100.25,99.84,99.88,2021-06-10 12:03:00,2021-06-10 12:12:00
…,…,…,…,…,…,…
47,100.08,100.08,100.08,100.08,2021-06-10 16:10:00,2021-06-10 16:10:00
38,99.98,100.01,99.84,100.01,2021-06-10 15:02:00,2021-06-10 15:14:00
62,100.3127,104.33,99.9673,99.9673,2021-06-10 16:22:00,2021-06-10 16:26:00
50,100.08,100.08,100.08,100.08,2021-06-10 16:10:00,2021-06-10 16:10:00


In [12]:
# trades_df = trades_df.with_columns(
#         pl.col("volume")
#         .ewm_mean(span=3, adjust = True, min_periods = 1)
#         .over(partition_by = None, order_by = "rank")
#         .alias(
#             f"volume_ewma_span3"
#         )
# )
# trades_df

In [13]:
from tsuro.data_engineering import ewma

trades_df = ewma(
    trades_df,
    ewma_cols = ["price", "volume"],
    spans = [3,4],
    order_by = "rank",
)
trades_df

Unnamed: 0_level_0,date_time,price,volume,dollar_value,market,condition,time,hour,minute,second,millisecond,rank,class,price_ewma_span3,price_ewma_span4,volume_ewma_span3,volume_ewma_span4
i64,datetime[μs],f64,i64,f64,str,str,time,i8,i8,i8,i32,u32,i32,f64,f64,f64,f64
66070,2021-06-10 10:02:00,102.93,29,2984.97,"""ASX""",,10:02:00,10,2,0,0,1,1,14.704286,10.217316,4.142857,2.878676
66071,2021-06-10 10:02:00,102.93,7,720.51,"""ASX""",,10:02:00,10,2,0,0,2,0,44.112857,27.246176,6.142857,4.036765
66072,2021-06-10 10:02:00,102.93,7,720.51,"""ASX""",,10:02:00,10,2,0,0,3,1,102.93,55.62761,10.142857,5.966912
66073,2021-06-10 10:02:00,102.94,7,720.58,"""ASX""",,10:02:00,10,2,0,0,4,0,102.935714,102.934596,7.0,9.183824
66074,2021-06-10 10:02:00,102.94,7,720.58,"""ASX""",,10:02:00,10,2,0,0,5,1,102.938571,102.937353,7.0,7.0
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
4,2021-06-10 16:45:00,100.08,24624,2.4644e6,"""ASX""","""ET XT""",16:45:00,16,45,0,0,66538,0,100.08,100.08,17972.142857,18009.264706
3,2021-06-10 16:46:00,100.08,5364,536829.12,"""CXA""",,16:46:00,16,46,0,0,66539,1,100.08,100.08,10402.285714,11895.716912
2,2021-06-10 16:47:00,100.3127,18517,1.8575e6,"""CXA""","""SX XT""",16:47:00,16,47,0,0,66540,0,100.212971,100.186939,15631.428571,14272.165441
1,2021-06-10 16:54:00,100.08,19280,1929542.4,"""ASX""","""ET XT""",16:54:00,16,54,0,0,66541,1,100.146486,100.144164,17074.0,17297.805147


In [14]:
trades_masked_df = trades_df.filter(
    pl.col("time").is_between(
        time(11,0), time(16,0)
    )
)
trades_filtered_df = trades_masked_df.select(
    "rank",
    "date_time",
    "price",
    "volume",
    "volume_ewma_span3"
)
trades_filtered_df

rank,date_time,price,volume,volume_ewma_span3
u32,datetime[μs],f64,i64,f64
18772,2021-06-10 11:00:00,100.15,15,31.142857
18773,2021-06-10 11:00:00,100.15,2,8.428571
18774,2021-06-10 11:00:00,100.15,16,11.857143
18775,2021-06-10 11:00:00,100.16,50,33.428571
18776,2021-06-10 11:00:00,100.16,4,18.857143
…,…,…,…,…
66182,2021-06-10 15:59:00,99.95,1,3.857143
66183,2021-06-10 15:59:00,99.95,4,3.428571
66184,2021-06-10 15:59:00,99.935,60,35.571429
66185,2021-06-10 15:59:00,99.935,44,42.857143


In [15]:
from tsuro.data_engineering import moving_variance

trades_var_df = moving_variance(
    trades_filtered_df,
    "volume",
    weights = {"var_unnormed": [1,2,3], "var_normed": [1/6,2/6,3/6]},
    unbiased = False
)

trades_var_df = moving_variance(
    trades_var_df,
    "volume",
    weights = {"var_unnormed_unbias": [1,2,3], "var_normed_unbias": [1/6,2/6,3/6]},
    unbiased = True
)
trades_var_df

rank,date_time,price,volume,volume_ewma_span3,volume_var_unnormed,volume_var_normed,volume_var_unnormed_unbias,volume_var_normed_unbias
u32,datetime[μs],f64,i64,f64,f64,f64,f64,f64
18772,2021-06-10 11:00:00,100.15,15,31.142857,31.25,31.25,51.136364,51.136364
18773,2021-06-10 11:00:00,100.15,2,8.428571,28.805556,28.805556,47.136364,47.136364
18774,2021-06-10 11:00:00,100.15,16,11.857143,42.138889,42.138889,68.954545,68.954545
18775,2021-06-10 11:00:00,100.16,50,33.428571,395.555556,395.555556,647.272727,647.272727
18776,2021-06-10 11:00:00,100.16,4,18.857143,428.888889,428.888889,701.818182,701.818182
…,…,…,…,…,…,…,…,…
66182,2021-06-10 15:59:00,99.95,1,3.857143,13.888889,13.888889,22.727273,22.727273
66183,2021-06-10 15:59:00,99.95,4,3.428571,3.222222,3.222222,5.272727,5.272727
66184,2021-06-10 15:59:00,99.935,60,35.571429,813.25,813.25,1330.772727,1330.772727
66185,2021-06-10 15:59:00,99.935,44,42.857143,350.222222,350.222222,573.090909,573.090909


In [16]:
from tsuro.data_engineering import ewmstd

trades_ewm_df = ewmstd(
    trades_filtered_df,
    ewmstd_cols = ["price", "volume"],
    spans = [3,4],
    order_by = "rank",
)
trades_ewm_df

rank,date_time,price,volume,volume_ewma_span3,price_ewmstd_span3,price_ewmstd_span4,volume_ewmstd_span3,volume_ewmstd_span4
u32,datetime[μs],f64,i64,f64,f64,f64,f64,f64
18772,2021-06-10 11:00:00,100.15,15,31.142857,0.0,0.0,6.943651,5.457052
18773,2021-06-10 11:00:00,100.15,2,8.428571,0.0,0.0,6.734771,5.397995
18774,2021-06-10 11:00:00,100.15,16,11.857143,0.0,0.0,8.259194,9.002617
18775,2021-06-10 11:00:00,100.16,50,33.428571,0.006547,0.006063,25.950502,24.005108
18776,2021-06-10 11:00:00,100.16,4,18.857143,0.004629,0.005368,26.6029,24.28087
…,…,…,…,…,…,…,…,…
66182,2021-06-10 15:59:00,99.95,1,3.857143,,0.0,4.818121,4.50743
66183,2021-06-10 15:59:00,99.95,4,3.428571,,0.0,2.220039,3.434344
66184,2021-06-10 15:59:00,99.935,60,35.571429,0.00982,0.009095,37.335353,34.326241
66185,2021-06-10 15:59:00,99.935,44,42.857143,0.006944,0.008051,22.92846,26.570079


In [18]:
from tsuro.data_engineering import moving_stddev

trades_ewm_df = moving_stddev(
    trades_ewm_df,
    "volume",
    weights = {"ewmstd_test": [1/7,2/7,4/7]},
    unbiased = True
)

In [19]:
trades_ewm_df

rank,date_time,price,volume,volume_ewma_span3,price_ewmstd_span3,price_ewmstd_span4,volume_ewmstd_span3,volume_ewmstd_span4,volume_ewmstd_test
u32,datetime[μs],f64,i64,f64,f64,f64,f64,f64,f64
18772,2021-06-10 11:00:00,100.15,15,31.142857,0.0,0.0,6.943651,5.457052,6.943651
18773,2021-06-10 11:00:00,100.15,2,8.428571,0.0,0.0,6.734771,5.397995,6.734771
18774,2021-06-10 11:00:00,100.15,16,11.857143,0.0,0.0,8.259194,9.002617,8.259194
18775,2021-06-10 11:00:00,100.16,50,33.428571,0.006547,0.006063,25.950502,24.005108,25.950502
18776,2021-06-10 11:00:00,100.16,4,18.857143,0.004629,0.005368,26.6029,24.28087,26.6029
…,…,…,…,…,…,…,…,…,…
66182,2021-06-10 15:59:00,99.95,1,3.857143,,0.0,4.818121,4.50743,4.818121
66183,2021-06-10 15:59:00,99.95,4,3.428571,,0.0,2.220039,3.434344,2.220039
66184,2021-06-10 15:59:00,99.935,60,35.571429,0.00982,0.009095,37.335353,34.326241,37.335353
66185,2021-06-10 15:59:00,99.935,44,42.857143,0.006944,0.008051,22.92846,26.570079,22.92846


In [32]:
import numpy as np
def weighted_avg(
    values: list[float],
    weights: list[float]
):
    dot = np.dot(values,weights)
    total_weight = np.sum(weights)
    
    return dot/total_weight

weighted_avg([16,50,4], [1,2,3])

21.333333333333332

In [35]:
def weighted_var(
    values: list[float],
    weights: list[float],
    mean: float
):
    values_diff = [value - mean for value in values]
    sq_err = np.power(values_diff, 2)
    print(sq_err)
    dot = np.dot(sq_err, weights)
    total_weight = np.sum(weights)
    
    return dot/total_weight

weighted_var([16,50,4], [1,2,3], mean = 21.333)

[ 28.440889 821.796889 300.432889]


428.888889

In [24]:
def get_linear_coeffs(x_0,y_0,z_0,x_1,y_1,z_1):
    """
    Fit z = alpha*x + beta*y
    """
    
    det = x_0*y_1 - y_0*x_1
    alpha = (y_1*z_0 - y_0*z_1)/det
    beta = (-x_1*z_0 + x_0*z_1)/det
    
    return alpha, beta

triples = [
    [50, 16,15.19753],
    [16, 2,37.59259],
    [44, 60,25.02695],
    [60,4,43.080861]
]

for idx1 in range(len(triples)):
    triple1 = triples[idx1]
    
    for idx2 in range(idx1+1,len(triples)):
        triple2 = triples[idx2]
        
        total_list = triple1 + triple2
        print(total_list)
        alpha, beta = get_linear_coeffs(*total_list)
        
        print(f"Triple 1: {triple1}| Triple 2: {triple2}")
        print(f"alpha = {alpha}\nbeta = {beta}")
        print(f"Triple1[z] = {alpha*triple1[0] + beta*triple1[1]}")
        print(f"Triple2[z] = {alpha*triple2[0] + beta*triple2[1]}\n")

[50, 16, 15.19753, 16, 2, 37.59259]
Triple 1: [50, 16, 15.19753]| Triple 2: [16, 2, 37.59259]
alpha = 3.660810128205128
beta = -10.490186025641025
Triple1[z] = 15.19753
Triple2[z] = 37.59259

[50, 16, 15.19753, 44, 60, 25.02695]
Triple 1: [50, 16, 15.19753]| Triple 2: [44, 60, 25.02695]
alpha = 0.22274416376306622
beta = 0.25377011324041804
Triple1[z] = 15.19753
Triple2[z] = 25.026949999999996

[50, 16, 15.19753, 60, 4, 43.080861]
Triple 1: [50, 16, 15.19753]| Triple 2: [60, 4, 43.080861]
alpha = 0.826978494736842
beta = -1.6344621710526315
Triple1[z] = 15.197529999999997
Triple2[z] = 43.080861

[16, 2, 37.59259, 44, 60, 25.02695]
Triple 1: [16, 2, 37.59259]| Triple 2: [44, 60, 25.02695]
alpha = 2.5292448394495417
beta = -1.4376637155963305
Triple1[z] = 37.59259000000001
Triple2[z] = 25.02695

[16, 2, 37.59259, 60, 4, 43.080861]
Triple 1: [16, 2, 37.59259]| Triple 2: [60, 4, 43.080861]
alpha = -1.1465828214285716
beta = 27.968957571428575
Triple1[z] = 37.59259
Triple2[z] = 43.080861

[

In [85]:
data = {
        "volume": [2,16,50,4],
        "rank": [1,2,3,4]

}
df = pl.DataFrame(
    data
)

df = df.with_columns(
    pl.col("volume").ewm_mean(span = 3).alias("volume_ewma_span3")
)
df

volume,rank,volume_ewma_span3
i64,i64,f64
2,1,2.0
16,2,11.333333
50,3,33.428571
4,4,17.733333


In [89]:
span = 3
alpha = 2/(span+1)

bias_correction = alpha/(1-(1-alpha)**span)
weights = [bias_correction*(1-alpha)**(span-1-i) for i in range(span)]

df = df.with_columns(
    pl.col("volume").rolling_mean(window_size = span, weights = weights).alias("custom_volume_ewma_span3")
)
df

volume,rank,volume_ewma_span3,custom_volume_ewma_span3
i64,i64,f64,f64
2,1,2.0,
16,2,11.333333,
50,3,33.428571,33.428571
4,4,17.733333,18.857143


In [46]:
trades_1 = trades_df.filter(
    pl.col("class") == 1
)
trades_1

rank,date_time,price,volume,dollar_value,time,class,price_ewma_span2,price_ewma_span4,volume_ewma_span2,volume_ewma_span4,volume_lag1,volume_lag2,volume_lag3,volume_lag4
u32,datetime[μs],f64,i64,f64,time,i32,f64,f64,f64,f64,i64,i64,i64,i64
1,2021-06-10 11:00:00,100.15,15,1502.25,11:00:00,1,100.15,100.15,15.0,15.0,,,,
3,2021-06-10 11:00:00,100.15,16,1602.4,11:00:00,1,100.15,100.15,15.75,15.625,15,,,
5,2021-06-10 11:00:00,100.16,4,400.64,11:00:00,1,100.156923,100.155102,7.615385,9.693878,16,15,,
7,2021-06-10 11:00:00,100.16,393,39362.88,11:00:00,1,100.159,100.157353,267.75,185.845588,4,16,15,
9,2021-06-10 11:00:00,100.16,12,1201.92,11:00:00,1,100.159669,100.158501,96.545455,110.444136,393,4,16,15
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
47407,2021-06-10 15:59:00,99.95,30,2998.5,15:59:00,1,99.947805,99.942304,30.453214,25.20014,43,11,1,1
47409,2021-06-10 15:59:00,99.95,11,1099.45,15:59:00,1,99.949268,99.945382,17.484405,19.520084,30,43,11,1
47411,2021-06-10 15:59:00,99.95,1,99.95,15:59:00,1,99.949756,99.947229,6.494802,12.11205,11,30,43,11
47413,2021-06-10 15:59:00,99.935,60,5996.1,15:59:00,1,99.939919,99.942338,42.164934,31.26723,1,11,30,43


In [None]:
import numpy as np

def bars(x,y):
    
    return np.int64(x/y)*y

print(np.arange(100))
bars(np.arange(100),25)