In [98]:
import polars as pl

In [99]:
df = pl.read_parquet("i/amzn.parquet").select(pl.all().name.to_lowercase())
df = df.filter(pl.all_horizontal(pl.col('pred').arr.first().is_not_nan()))
df

symbol,date,open,close,high,low,volume,pred
str,date,f64,f64,f64,f64,i64,"array[f32, 8]"
"""AMZN""",2024-03-15,176.639999,174.419998,177.929993,173.899994,72115500,"[173.260468, 173.433975, … 173.815308]"
"""AMZN""",2024-03-18,175.800003,174.479996,176.690002,174.279999,31250700,"[174.102325, 174.274918, … 175.117676]"
"""AMZN""",2024-03-19,174.220001,175.899994,176.089996,173.520004,26880900,"[176.449158, 176.600403, … 177.051682]"
"""AMZN""",2024-03-20,176.139999,178.149994,178.529999,174.639999,29947200,"[178.290375, 178.366974, … 179.159897]"
"""AMZN""",2024-03-21,179.990005,178.149994,181.419998,178.149994,32824300,"[179.562714, 179.60614, … 179.870117]"
…,…,…,…,…,…,…,…
"""AMZN""",2024-10-31,190.509995,186.399994,190.600006,185.229996,75146800,"[184.40036, 183.942627, … 184.04361]"
"""AMZN""",2024-11-01,199.0,197.929993,200.5,197.020004,99687800,"[200.619995, 199.987335, … 200.000198]"
"""AMZN""",2024-11-04,196.449997,195.779999,197.330002,194.309998,38492100,"[196.872269, 196.485413, … 196.674103]"
"""AMZN""",2024-11-05,196.039993,199.5,199.820007,195.990005,30564800,"[201.594681, 201.018646, … 201.350952]"


In [100]:
from typing import Iterable


def eucledian_length(x: pl.Expr, y: pl.Expr):
    return (x**2 + y**2).sqrt()

def vangle(i: pl.Expr, base: pl.Expr, a: pl.Expr, b: pl.Expr):
    a = a - base
    b = b - base
    angle_cos = (i * i + a * b) / (eucledian_length(i, a) * eucledian_length(i, b))
    return angle_cos.clip(-1, 1).arccos()

def meanstd(cols: Iterable[str]):
    return (
        pl.struct(mean=pl.mean(col), std=pl.std(col)).alias(col)
        for col in cols
    )

In [101]:
preds = df.with_columns(pl.col('pred').arr.to_struct(lambda i: f'pred_{i + 1}')).unnest('pred')
preds

symbol,date,open,close,high,low,volume,pred_1,pred_2,pred_3,pred_4,pred_5,pred_6,pred_7,pred_8
str,date,f64,f64,f64,f64,i64,f32,f32,f32,f32,f32,f32,f32,f32
"""AMZN""",2024-03-15,176.639999,174.419998,177.929993,173.899994,72115500,173.260468,173.433975,174.134033,173.89415,174.549118,174.525635,174.175385,173.815308
"""AMZN""",2024-03-18,175.800003,174.479996,176.690002,174.279999,31250700,174.102325,174.274918,174.496017,174.429047,175.118759,174.910553,175.387848,175.117676
"""AMZN""",2024-03-19,174.220001,175.899994,176.089996,173.520004,26880900,176.449158,176.600403,176.623657,176.723907,176.99942,176.733429,177.150391,177.051682
"""AMZN""",2024-03-20,176.139999,178.149994,178.529999,174.639999,29947200,178.290375,178.366974,178.324554,178.564926,178.793854,178.812958,179.076111,179.159897
"""AMZN""",2024-03-21,179.990005,178.149994,181.419998,178.149994,32824300,179.562714,179.60614,179.393784,179.721878,179.279785,179.557236,179.522644,179.870117
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""AMZN""",2024-10-31,190.509995,186.399994,190.600006,185.229996,75146800,184.40036,183.942627,183.985168,183.692749,184.428925,184.378113,184.264816,184.04361
"""AMZN""",2024-11-01,199.0,197.929993,200.5,197.020004,99687800,200.619995,199.987335,199.953857,199.71373,200.343536,199.859772,199.640808,200.000198
"""AMZN""",2024-11-04,196.449997,195.779999,197.330002,194.309998,38492100,196.872269,196.485413,196.704773,196.762833,196.953674,196.603027,196.345016,196.674103
"""AMZN""",2024-11-05,196.039993,199.5,199.820007,195.990005,30564800,201.594681,201.018646,201.293457,200.920227,200.989212,200.7444,200.930756,201.350952


In [102]:
preds.select(
    ((pl.col('close').shift(-i) - pl.col(f'pred_{i}')).abs() / pl.col('close').shift(-i).abs()).alias(f'mape_{i}')
    for i in range(1, 9)
).select(
    meanstd(f'mape_{i}' for i in range(1, 9))
)

mape_1,mape_2,mape_3,mape_4,mape_5,mape_6,mape_7,mape_8
struct[2],struct[2],struct[2],struct[2],struct[2],struct[2],struct[2],struct[2]
"{0.014846,0.013808}","{0.020078,0.019414}","{0.025081,0.023143}","{0.029835,0.026608}","{0.03403,0.028208}","{0.03746,0.029541}","{0.04084,0.030245}","{0.044337,0.029919}"


In [103]:
def log_profit(i: int):
    return (pl.col('close').shift(-i) / pl.col('close')).log()

profits = preds.select(
    (
        pl.when(pl.col(f'pred_{i}') > pl.col('close'))
        .then(log_profit(i))
        .otherwise(-log_profit(i))
    ).alias(f'profit_{i}')
    for i in range(1, 9)
).select(pl.all().mean().exp() - 1)
profits.select(pl.all().round(4))

profit_1,profit_2,profit_3,profit_4,profit_5,profit_6,profit_7,profit_8
f64,f64,f64,f64,f64,f64,f64,f64
-0.0013,-0.0021,-0.0063,-0.0056,-0.0055,-0.0055,-0.0066,-0.0092


In [104]:
profits.select(
    ((1 + pl.col(f'profit_{i}')) ** (251 / i) - 1).name.keep()
    for i in range(1, 9)
).select(pl.all().round(3))

profit_1,profit_2,profit_3,profit_4,profit_5,profit_6,profit_7,profit_8
f64,f64,f64,f64,f64,f64,f64,f64
-0.27,-0.227,-0.412,-0.298,-0.243,-0.207,-0.212,-0.252


In [105]:
total_days = (pl.last('date') - pl.first('date')).dt.total_days()
price_diff = pl.last('close') / pl.first('close')
df.select(
    price_diff ** (251 / pl.len()) - 1
).select(pl.all().round(4))

close
f64
0.2786
