In [1]:
import sys
import os

# Get the absolute path to the project directory
project_dir = os.path.abspath("..")

# Append the project directory to sys.path
if project_dir not in sys.path:
    sys.path.append(project_dir)

In [2]:
import yfinance as yf
import pandas as pd
import polars as pl
import datetime as dt
import matplotlib.pyplot as plt
import warnings
from typing import Dict
from dataclasses import asdict, is_dataclass, dataclass
import os
import json
import pickle
from pandas.api.types import is_datetime64_any_dtype
import scipy
import numpy as np
from scipy.stats import linregress
from ta import add_all_ta_features
import holidays
import pycountry

from src.common.AssetData import AssetData
from src.common.AssetDataService import AssetDataService
from src.common.AssetFileInOut import AssetFileInOut 
from src.databaseService.OutsourceLoader import OutsourceLoader

from src.simulation.SimulatePortfolio import SimulatePortfolio
from src.strategy.StratBuyAndHold import StratBuyAndHold
from src.simulation.ResultAnalyzer import ResultAnalyzer
from src.common.AssetFileInOut import AssetFileInOut
from src.common.AssetDataPolars import AssetDataPolars
from src.mathTools.CurveAnalysis import CurveAnalysis
from src.predictionModule.FourierML import FourierML
from src.common.YamlTickerInOut import YamlTickerInOut
from src.common.DataFrameTimeOperations import DataFrameTimeOperationsPandas as DFTO


In [3]:
assets=AssetFileInOut("../src/stockGroups/bin").loadDictFromFile("group_snp500_over20years")

# Convert to Polars for speedup
assetspl: Dict[str, AssetDataPolars] = {}
for ticker, asset in assets.items():
    assetspl[ticker]= AssetDataService.to_polars(asset)

In [4]:
aIRM = assetspl["IRM"]
pricesArray = aIRM.shareprice["Close"]
aidx = pricesArray.len() - 10
m=24
pastPrices = pricesArray.slice(aidx-m * 21-1, m * 21 +1).to_numpy()
futurePrices = pricesArray.slice((aidx+1),5).to_numpy()

In [21]:
date = pd.Timestamp(2024,6,10, tz="UTC")
country_holidays = holidays.CountryHoliday('US')
for y in range(date.year-1, date.year+2):
    country_holidays.get(f"{y}")
    
country_holidays = sorted(country_holidays.keys())
print(country_holidays)
country_holidays = [pd.Timestamp(val.__str__(), tz= 'UTC') for val in country_holidays]
country_holidays

[datetime.date(2023, 1, 1), datetime.date(2023, 1, 2), datetime.date(2023, 1, 16), datetime.date(2023, 2, 20), datetime.date(2023, 5, 29), datetime.date(2023, 6, 19), datetime.date(2023, 7, 4), datetime.date(2023, 9, 4), datetime.date(2023, 11, 10), datetime.date(2023, 11, 11), datetime.date(2023, 11, 23), datetime.date(2023, 12, 25), datetime.date(2024, 1, 1), datetime.date(2024, 1, 15), datetime.date(2024, 2, 19), datetime.date(2024, 5, 27), datetime.date(2024, 6, 19), datetime.date(2024, 7, 4), datetime.date(2024, 9, 2), datetime.date(2024, 11, 11), datetime.date(2024, 11, 28), datetime.date(2024, 12, 25), datetime.date(2025, 1, 1), datetime.date(2025, 1, 20), datetime.date(2025, 2, 17), datetime.date(2025, 5, 26), datetime.date(2025, 6, 19), datetime.date(2025, 7, 4), datetime.date(2025, 9, 1), datetime.date(2025, 11, 11), datetime.date(2025, 11, 27), datetime.date(2025, 12, 25)]


[Timestamp('2023-01-01 00:00:00+0000', tz='UTC'),
 Timestamp('2023-01-02 00:00:00+0000', tz='UTC'),
 Timestamp('2023-01-16 00:00:00+0000', tz='UTC'),
 Timestamp('2023-02-20 00:00:00+0000', tz='UTC'),
 Timestamp('2023-05-29 00:00:00+0000', tz='UTC'),
 Timestamp('2023-06-19 00:00:00+0000', tz='UTC'),
 Timestamp('2023-07-04 00:00:00+0000', tz='UTC'),
 Timestamp('2023-09-04 00:00:00+0000', tz='UTC'),
 Timestamp('2023-11-10 00:00:00+0000', tz='UTC'),
 Timestamp('2023-11-11 00:00:00+0000', tz='UTC'),
 Timestamp('2023-11-23 00:00:00+0000', tz='UTC'),
 Timestamp('2023-12-25 00:00:00+0000', tz='UTC'),
 Timestamp('2024-01-01 00:00:00+0000', tz='UTC'),
 Timestamp('2024-01-15 00:00:00+0000', tz='UTC'),
 Timestamp('2024-02-19 00:00:00+0000', tz='UTC'),
 Timestamp('2024-05-27 00:00:00+0000', tz='UTC'),
 Timestamp('2024-06-19 00:00:00+0000', tz='UTC'),
 Timestamp('2024-07-04 00:00:00+0000', tz='UTC'),
 Timestamp('2024-09-02 00:00:00+0000', tz='UTC'),
 Timestamp('2024-11-11 00:00:00+0000', tz='UTC'),


In [32]:
a=pd.Timestamp(2025, 7, 4, tz="UTC")
pd.Timestamp(a)
print(pd.Timestamp(2025, 7, 4, tz="UTC") in country_holidays)
pd.Timestamp(2023, 1, 1, tz="UTC").is_month_start

True


True

In [5]:
def getSeasonalFeatures(timestamp: pd.Timestamp, country: str = 'US') -> dict:
    """
    Extracts comprehensive date-related features for a given pd.Timestamp.
    Parameters:
        timestamp (pd.Timestamp): The date to extract features from.
        country (str): The country code for holiday determination (default: 'US').
    Returns:
        dict: A dictionary containing the extracted date features.
    """
    if not isinstance(timestamp, pd.Timestamp):
        raise ValueError("The input must be a pandas Timestamp object.")
    # Ensure timestamp is timezone-aware (if not already)
    timestamp = timestamp.tz_localize('UTC') if timestamp.tz is None else timestamp
    tstz = timestamp.tz
    # Define holidays for the given country
    country_holidays = holidays.CountryHoliday(country)
    country_holidays.get(timestamp)
    holiday_dates = sorted(country_holidays.keys())
    # General date-related features
    features = {
        "year": timestamp.year,
        "month": timestamp.month,
        "day": timestamp.day,
        "day_of_week": timestamp.dayofweek,  # Monday=0, Sunday=6
        "day_name": timestamp.day_name(),
        "is_weekend": timestamp.dayofweek >= 5,  # True if Saturday or Sunday
        "is_holiday": timestamp in country_holidays,
        "holiday_name": country_holidays.get(timestamp, None),  # Name of the holiday if it's a holiday
        "quarter": timestamp.quarter,
        "week_of_year": timestamp.isocalendar()[1],  # Week number of the year
        "is_month_start": timestamp.is_month_start,
        "is_month_end": timestamp.is_month_end,
        "is_year_start": timestamp.is_year_start,
        "is_year_end": timestamp.is_year_end,
    }
    # Additional features
    features.update({
        "days_to_next_holiday": (
            min((pd.Timestamp(h, tz=tstz) - timestamp).days for h in holiday_dates if pd.Timestamp(h, tz=tstz) >= timestamp)
            if holiday_dates else None
        ),
        "days_since_last_holiday": (
            min((timestamp - pd.Timestamp(h, tz=tstz)).days for h in holiday_dates if pd.Timestamp(h, tz=tstz) <= timestamp)
            if holiday_dates else None
        ),
        "season": timestamp.month % 12 // 3 + 1,  # 1: Winter, 2: Spring, 3: Summer, 4: Fall
        "is_trading_day": timestamp.dayofweek < 5 and timestamp not in country_holidays,  # Example; adjust for real calendars
        "week_part": (
            0 if timestamp.dayofweek < 2 
            else 1 if timestamp.dayofweek < 4 
            else 2
        )
    })
    return features

In [6]:
date = pd.Timestamp(2024,6,10)
country = pycountry.countries.lookup(asset.about.get('country','US')).alpha_2
print(country)
getSeasonalFeatures(date, country)

US


{'year': 2024,
 'month': 6,
 'day': 10,
 'day_of_week': 0,
 'day_name': 'Monday',
 'is_weekend': False,
 'is_holiday': False,
 'holiday_name': None,
 'quarter': 2,
 'week_of_year': 24,
 'is_month_start': False,
 'is_month_end': False,
 'is_year_start': False,
 'is_year_end': False,
 'days_to_next_holiday': 9,
 'days_since_last_holiday': 14,
 'season': 3,
 'is_trading_day': True,
 'week_part': 0}