In [7]:
import sys
import os

# Get the absolute path to the project directory
project_dir = os.path.abspath("..")

# Append the project directory to sys.path
if project_dir not in sys.path:
    sys.path.append(project_dir)

In [8]:
import sys
import os

# Get the absolute path to the project directory
project_dir = os.path.abspath("..")

# Append the project directory to sys.path
if project_dir not in sys.path:
    sys.path.append(project_dir)

import yfinance as yf
import pandas as pd
import datetime as dt
import matplotlib.pyplot as plt
import os
import numpy as np
from typing import Dict
import polars as pl

from src.common.AssetData import AssetData
from src.common.AssetDataPolars import AssetDataPolars
from src.common.AssetDataService import AssetDataService
from src.common.AssetFileInOut import AssetFileInOut 

from src.featureAlchemy.FeatureMain import FeatureMain
from src.featureAlchemy.FeatureTA import FeatureTA
from src.featureAlchemy.FeatureGroupDynamic import FeatureGroupDynamic
from src.common.DataFrameTimeOperations import DataFrameTimeOperationsPolars as DPl


In [9]:
assets=AssetFileInOut("../src/stockGroups/bin").loadDictFromFile("group_debug")

# Convert to Polars for speedup
assetspl: Dict[str, AssetDataPolars] = {}
for ticker, asset in assets.items():
    assetspl[ticker] = AssetDataService.to_polars(asset)

In [10]:
ticker = "IRM"
asset = assetspl[ticker]
startDate = pd.Timestamp(2020, 1, 1, tz = "UTC")
endDate = pd.Timestamp(2020, 12, 31, tz = "UTC")
params = {
        'idxLengthOneMonth': 21,
        'fouriercutoff': 5,
        'multFactor': 8,
        'timesteps': 20,
    }
ffM = FeatureMain(
    assetspl,
    startDate, 
    endDate, 
    lagList = [1, 10, 100],
    monthHorizonList = [1,2,6,12],
    params = params
)

KeyError: 'IRM'

In [None]:
meta_tree, treeFeatures_np, treeFeaturesNames = ffM.getTreeFeatures()
print(treeFeatures_np.shape[1] == len(treeFeaturesNames))
print(meta_tree.shape[1] == 3)
print(meta_tree.shape[0] == treeFeatures_np.shape[0])

  Processing ticker AAPL (1/299)
  Processing ticker ABT (2/299)
  Processing ticker ACN (3/299)


KeyboardInterrupt: 

In [None]:
# Make business days
asset_start_idx = DPl(asset.shareprice).getNextLowerOrEqualIndex(startDate)+1
asset_end_idx = DPl(asset.shareprice).getNextLowerOrEqualIndex(endDate)
business_days = asset.shareprice["Date"].slice(asset_start_idx, asset_end_idx - asset_start_idx + 1).to_numpy()  
business_days = np.array([pd.Timestamp(x, tz ="UTC") for x in business_days])

## Checks on the dataframe

In [None]:
fTA = FeatureTA(asset, startDate, endDate, [1, 10, 100], params=params)
featureTANames = fTA.getFeatureNames()
featuresTA = np.zeros((len(business_days), len(featureTANames)), dtype = np.float32)
for i, date in enumerate(business_days):
    featuresTA[i] = fTA.apply(date, 1.0, None)

ticker_df = featuresTA

common_cols = [a in featureTANames for a in treeFeaturesNames] 
ticker_filtermask = meta_tree[:,1] == ticker
filtered_featurenp = treeFeatures_np[:, common_cols]
filtered_featurenp = filtered_featurenp[ticker_filtermask, :]

A = filtered_featurenp
B = ticker_df
print(np.max(np.abs(A-B))/np.prod(A.shape))

0.0001388998592601103


In [None]:
fGD = FeatureGroupDynamic(assetspl, startDate, endDate, lagList=[1, 10, 100], monthHorizonList=[1,2,6,12], params=params)
featureGDNames = fGD.getFeatureNames()
featuresGD = np.zeros((len(business_days), len(featureGDNames)), dtype = np.float32)
FGD_tick_date = {date: fGD.apply(date, None) for date in (business_days)}
for i, date in enumerate(business_days):
    featuresGD[i] = FGD_tick_date[date][ticker]
    
ticker_df = featuresGD
    
common_cols = [a in featureGDNames for a in treeFeaturesNames] 
ticker_filtermask = meta_tree[:,1] == ticker
filtered_featuredf = treeFeatures_np[:, common_cols]
filtered_featuredf = filtered_featuredf[ticker_filtermask, :]

A = filtered_featuredf
B = ticker_df
print(np.max(np.abs(A-B))/np.prod(A.shape))

8.803775528307594e-07


## Checks in the time dataframe

In [None]:
meta_time, timeFeatures_np, timeFeaturesNames = ffM.getTimeFeatures()

  Processing ticker AAPL (1/34)
  Processing ticker ADBE (2/34)
  Processing ticker ADI (3/34)
  Processing ticker ADP (4/34)
  Processing ticker ALGN (5/34)
  Processing ticker AMD (6/34)
  Processing ticker AMGN (7/34)
  Processing ticker AMZN (8/34)
  Processing ticker ANSS (9/34)
  Processing ticker BKNG (10/34)
  Processing ticker CDNS (11/34)
  Processing ticker CSCO (12/34)
  Processing ticker CSX (13/34)
  Processing ticker CTSH (14/34)
  Processing ticker EA (15/34)
  Processing ticker EXC (16/34)
  Processing ticker GILD (17/34)
  Processing ticker IDXX (18/34)
  Processing ticker INTC (19/34)
  Processing ticker INTU (20/34)
  Processing ticker ISRG (21/34)
  Processing ticker KLAC (22/34)
  Processing ticker MAR (23/34)
  Processing ticker MDLZ (24/34)
  Processing ticker MSFT (25/34)
  Processing ticker MU (26/34)
  Processing ticker NFLX (27/34)
  Processing ticker NVDA (28/34)
  Processing ticker QCOM (29/34)
  Processing ticker REGN (30/34)
  Processing ticker ROST (31/

In [None]:
print(timeFeatures_np.shape[2] == len(timeFeaturesNames))
print(timeFeatures_np.shape[1] == params["timesteps"])
print(meta_time.shape[1] == 3)
print(meta_time.shape[0] == timeFeatures_np.shape[0])

True
True
True
True


In [None]:
fTA = FeatureTA(asset, startDate, endDate, [1, 10, 100], params=params)
featureTANames = fTA.getTimeFeatureNames()
featuresTA = np.zeros((len(business_days), params['timesteps'], len(featureTANames)), dtype = np.float32)
for i, date in enumerate(business_days):
    featuresTA[i,:,:] = fTA.apply_timeseries(date, None)

ticker_np = featuresTA

common_cols = [a in featureTANames for a in timeFeaturesNames] 
ticker_filtermask = meta_time[:,1] == ticker
filtered_featuredf = timeFeatures_np[:, :, common_cols]
filtered_featuredf = filtered_featuredf[ticker_filtermask, :, :]

A = filtered_featuredf
B = ticker_np
print(np.max(np.abs(A-B))/np.prod(A.shape))

4.6575295899301315e-08


In [None]:
fGD = FeatureGroupDynamic(assetspl, startDate, endDate, lagList=[1, 10, 100], monthHorizonList=[1,2,6,12], params=params)
featureGDNames = fGD.getTimeFeatureNames()
featuresGD = np.zeros((len(business_days), params['timesteps'], len(featureGDNames)), dtype = np.float32)
FGD_tick_date = {date: fGD.apply_timeseries(date, None) for date in (business_days)}
for i, date in enumerate(business_days):
    featuresGD[i] = FGD_tick_date[date][ticker]
    
ticker_np = featuresGD

common_cols = [a in featureGDNames for a in timeFeaturesNames] 
ticker_filtermask = meta_time[:,1] == ticker
filtered_featuredf = timeFeatures_np[:, :, common_cols]
filtered_featuredf = filtered_featuredf[ticker_filtermask, :, :]

A = filtered_featuredf
B = ticker_np
print(np.max(np.abs(A-B))/np.prod(A.shape))

0.0
