In [1]:
# %autosave 15

Autosaving every 15 seconds


In [2]:
"""
Simple test for formula loading and evaluation.
"""
import os
import sys
sys.path.insert(0, os.path.abspath(os.path.join("/home/ubuntu/projects/hindsight/examples/data/ast", '../../..')))

import matplotlib.pyplot as plt
import jax
import pandas as pd
import pandas_ta as ta
import jax.numpy as jnp

from src import DataManager
from src.data.ast import parse_formula
from src.data.ast.manager import FormulaManager
from src.data.ast.functions import register_built_in_functions, get_function_context
from src.data.core import prepare_for_jit

In [3]:
# Register built-in functions
register_built_in_functions()

In [4]:
# Initialize and load formulas
manager = FormulaManager()
# DEPRECATED: manager.load_directory("../../.././src/data/ast/definitions")
# init of formula manager will autoload all defns

In [None]:
# Load CRSP data
dm = DataManager()
ds = dm.get_data(
    {
        "data_sources": [
            {
                "data_path": "wrds/equity/crsp",
                "config": {
                    "start_date": "2020-01-01",
                    "end_date": "2024-01-01",
                    "freq": "D",
                    "filters": {
                        "date__gte": "2020-01-01"
                    },
                    "processors": {
                        "replace_values": {
                            "source": "delistings",
                            "rename": [["dlstdt", "time"]],
                            "identifier": "permno",
                            "from_var": "dlret",
                            "to_var": "ret"
                        },
                        "merge_table": [
                            {
                                "source": "msenames",
                                "identifier": "permno",
                                "column": "comnam",
                                "axis": "asset"
                            },
                            {
                                "source": "msenames",
                                "identifier": "permno",
                                "column": "exchcd",
                                "axis": "asset"
                            }
                        ],
                        "set_permco_coord": True,
                        "fix_market_equity": True
                    }
                }
            }
        ]
    }
)['wrds/equity/crsp']

wrds/equity/crsp: Attemping to load found cache(/home/suchismit/data/cache/wrds/equity/crsp/edcb130770ec1dbca15938371cc00f10_2020-01-01_2023-12-31.nc).


In [None]:
# create the closing prices

ds["close"] = ds["prc"] / ds["cfacpr"]

#### Computation & Check

In [None]:
# Prepare data for JIT, since some vars are not JIT compatible. For example, strings.
ds_jit, recover = prepare_for_jit(ds)

ds_jit

In [None]:
## Uncomment the below line to view the recovery dict (this primarily contains metadata and
## vars that were removed to make the dataset JIT compatible) 

# recover

### RSI

In [None]:
# Actual computation

# Create evaluation context with both data and functions
context = {
    "price": "close",
    "window": 14,
    "_dataset": ds_jit
}
context.update(get_function_context())  # Add all registered functions

# JIT compile the evaluation
# @jax.jit 
def _eval(context):
    result = manager.evaluate("rsi", context)
    return result
    
## ideally this should be JIT compiled but ipy kernel doesn't seem to be happy with it. Ignore for now.

In [None]:
result = _eval(context) # eval w/ the context.

In [None]:
asset_permno = 14593 # APPLE's permno
ds = ds.assign(rsi=result) # Assigning RSI to the original dataset

In [None]:
# All computations and assignments are done. We can now move back to a more manageable format for the ds.

# Select APPLE and the vars we just computed + the closing price.
subset = ds[['rsi', 'close']].sel(asset=asset_permno) 
ts = subset.dt.to_time_indexed() # refer to the docs. (essentially flattens the time dimension)

In [None]:
# Since time is now a single flattened dimension, X-array plot functions work happily.
# X-Array does not support multidimensional time plotting, hence ALWAYS flatten before plot (for now)
ts['rsi'].plot.line(x="time", label='RSI', color='blue', alpha=0.5) 
ts['close'].plot.line(x="time", label='Adjusted closing prices', color='cyan', alpha=0.5) 
plt.title(f"Adjusted Price vs. RSI (Asset: {asset_permno})")
plt.xlabel("Time")
plt.ylabel("Price")
plt.legend()
plt.grid(False)

In [None]:
# We will do manual eye-check.

# Get the RSIs and the closing prices from the flattened ds and conver to pandas.
df = ts[['rsi', 'close']].to_dataframe()

df[-20:] # view last 10 records.

In [None]:
# Pandas TA Version

rsi_pd = ta.rsi(df['close'].dropna(), length=14)

rsi_pd[-20:]

### ALMA

In [None]:
# Create evaluation context with both data and functions
context = {
    "price": "close",
    "window": 10,    # Default for ALMA
    "offset": 0.85,  # Default offset
    "sigma": 6.0,    # Default sigma
    "_dataset": ds_jit
}
context.update(get_function_context())

# JIT compile the evaluation
def _eval(context):
    result = manager.evaluate("alma", context)
    return result

result = _eval(context) # eval w/ the context.

asset_permno = 14593 # APPLE's permno
ds = ds.assign(alma=result) # Assigning RSI to the original dataset

# Select APPLE and the vars we just computed + the closing price.
subset = ds[['alma', 'close']].sel(asset=asset_permno) 
ts = subset.dt.to_time_indexed() 

In [None]:
# Since time is now a single flattened dimension, X-array plot functions work happily.
# X-Array does not support multidimensional time plotting, hence ALWAYS flatten before plot (for now)
ts['alma'].plot.line(x="time", label='ALMA', color='blue', alpha=0.5) 
ts['close'].plot.line(x="time", label='Adjusted closing prices', color='cyan', alpha=0.5) 
plt.title(f"Adjusted Price vs. RSI (Asset: {asset_permno})")
plt.xlabel("Time")
plt.ylabel("Price")
plt.legend()
plt.grid(False)

In [None]:
# We will do manual eye-check.

df = ts[['alma', 'close']].to_dataframe()

df[-20:]['alma'] # view last 10 records.

In [None]:
# Pandas TA Version

alma_pd = ta.alma(df['close'].dropna(), length=10, sigma=6, distribution_offset=0.85, offset=None)

alma_pd[-20:]

### FWMA

In [None]:
manager.list_formulas()

In [None]:
# Create evaluation context with both data and functions
context = {
    "price": "close",
    "window": 10,
    "_dataset": ds_jit
}
context.update(get_function_context())

# JIT compile the evaluation
def _eval(context):
    result = manager.evaluate("fwma", context)
    return result

result = _eval(context) # eval w/ the context.

asset_permno = 14593 # APPLE's permno
ds = ds.assign(fwma=result) # Assigning RSI to the original dataset

# Select APPLE and the vars we just computed + the closing price.
subset = ds[['fwma', 'close']].sel(asset=asset_permno) 
ts = subset.dt.to_time_indexed() 

In [None]:
ts['fwma'].plot.line(x="time", label='fwma', color='blue', alpha=0.5) 
ts['close'].plot.line(x="time", label='Adjusted closing prices', color='cyan', alpha=0.5) 
plt.title(f"Adjusted Price vs. RSI (Asset: {asset_permno})")
plt.xlabel("Time")
plt.ylabel("Price")
plt.legend()
plt.grid(False)

In [None]:
# We will do manual eye-check.

df = ts[['fwma', 'close']].to_dataframe()

df[-20:]['fwma'] # view last 10 records.

In [None]:
# Pandas TA Version

fwma_pd = ta.fwma(df['close'].dropna(), length=10)

fwma_pd[-20:]