In [13]:
import sys
import os

# Get the absolute path to the project directory
project_dir = os.path.abspath("..")

# Append the project directory to sys.path
if project_dir not in sys.path:
    sys.path.append(project_dir)

In [14]:
import sys
import os

# Get the absolute path to the project directory
project_dir = os.path.abspath("..")

# Append the project directory to sys.path
if project_dir not in sys.path:
    sys.path.append(project_dir)

import yfinance as yf
import pandas as pd
import datetime as dt
import matplotlib.pyplot as plt
import warnings
from dataclasses import asdict, is_dataclass, dataclass
import os
import json
import pickle
from pandas.api.types import is_datetime64_any_dtype
import numpy as np
from typing import Dict
import polars as pl

from src.common.AssetData import AssetData
from src.common.AssetDataPolars import AssetDataPolars
from src.common.AssetDataService import AssetDataService
from src.common.AssetFileInOut import AssetFileInOut 
from src.databaseService.OutsourceLoader import OutsourceLoader

from src.simulation.SimulatePortfolio import SimulatePortfolio
from src.strategy.StratBuyAndHold import StratBuyAndHold
from src.simulation.ResultAnalyzer import ResultAnalyzer
from src.common.AssetFileInOut import AssetFileInOut
from src.mathTools.CurveAnalysis import CurveAnalysis
from src.predictionModule.FourierML import FourierML
from src.common.YamlTickerInOut import YamlTickerInOut
from src.common.DataFrameTimeOperations import DataFrameTimeOperationsPandas as DFTO


In [15]:
assets=AssetFileInOut("../src/stockGroups/bin").loadDictFromFile("group_snp500_over20years")

# Convert to Polars for speedup
assetspl: Dict[str, AssetDataPolars] = {}
for ticker, asset in assets.items():
    assetspl[ticker] = AssetDataService.to_polars(asset)

In [16]:
asset = assetspl["NVDA"]

fin_quar = asset.financials_quarterly
fin_ann = asset.financials_annually

finPerDay = asset.shareprice.select(['Date', 'Close']) 
#add utc to every entry in the Date column
print(type(finPerDay))
finPerDay = finPerDay.with_columns(
    pl.col("Date").dt.replace_time_zone("UTC")
)

<class 'polars.dataframe.frame.DataFrame'>


In [17]:
fin_quar = fin_quar.with_columns(
    [
        (pl.col(col) / pl.col("totalRevenue")).alias(col)
        for col in fin_quar.columns
        if fin_quar[col].dtype.is_numeric() and col != "totalRevenue"
    ]
)

for col in fin_quar.columns:
    print(f"col: {col}   item: {fin_quar[col].tail(1).item()}")

col: fiscalDateEnding   item: 2024-10-27 00:00:00+00:00
col: reportedDate   item: 2024-11-20 00:00:00+00:00
col: reportedEPS   item: 2.308876346844536e-11
col: estimatedEPS   item: 2.109343823043156e-11
col: surprise   item: 1.9953252380137965e-12
col: surprisePercentage   item: 2.6963970127130724e-10
col: reportTime   item: post-market
col: reportedCurrency_x   item: USD
col: grossProfit   item: 0.7455675275069836
col: totalRevenue   item: 35082000000.0
col: costOfRevenue   item: 0.25443247249301637
col: costofGoodsAndServicesSold   item: 0.009178496094863462
col: operatingIncome   item: 0.6233681090017673
col: sellingGeneralAndAdministrative   item: 0.025568667692833932
col: researchAndDevelopment   item: 0.09663075081238241
col: operatingExpenses   item: 0.12219941850521635
col: investmentIncomeNet   item: 0.013454193033464455
col: netInterestIncome   item: 0.011715409611766718
col: interestIncome   item: 0.013454193033464455
col: interestExpense   item: 0.0017387834216977367
col: n

In [24]:
fin_quar2 = fin_quar.rename({"fiscalDateEnding": "Date"})
fin_ann2 = fin_ann.rename({"fiscalDateEnding": "Date"})
# Perform asof joins to attach the closest (earlier) quarterly & annual rows
finPerDay = (
    finPerDay
    .join_asof(fin_quar2, on="Date", strategy="backward", suffix="_quar")
    .join_asof(fin_ann2, on="Date", strategy="backward", suffix="_ann")
)

In [None]:
print(type(fin_quar["nonInterestIncome"].item(56)))
for col in fin_quar.columns:
        dtype = fin_quar[col].dtype
        # Check if the column is numeric
        if  dtype == fin_quar['nonInterestIncome'].dtype:
            min_val = fin_quar[col].min()
            max_val = fin_quar[col].max()
            print(f"{col} \n    Type: {dtype}, Min: {min_val}, Max: {max_val}")
        else:
            print(f"{col} \n    Type: {dtype}")

In [5]:

# Convert all string columns to float, with errors resulting in null (NaN)
fin_ann = fin_ann.with_columns(
    [
        pl.col(col).cast(pl.Float64, strict=False)
        for col in fin_ann.columns
        if fin_ann[col].dtype == pl.Utf8
    ]
)

fin_quart = fin_quart.with_columns(
    [
        pl.col(col).cast(pl.Float64, strict=False)
        for col in fin_quart.columns
        if fin_quart[col].dtype == pl.Utf8
    ]
)

In [6]:
# Divide all numeric columns by "totalRevenue", handling missing values safely
fin_ann = fin_ann.with_columns(
    [
        (pl.col(col) / pl.col("totalRevenue")).alias(col)
        for col in fin_ann.columns
        if fin_ann[col].dtype.is_numeric() and col != "totalRevenue"
    ]
)

fin_quart = fin_quart.with_columns(
    [
        (pl.col(col) / pl.col("totalRevenue")).alias(col)
        for col in fin_quart.columns
        if fin_quart[col].dtype.is_numeric() and col != "totalRevenue"
    ]
)

In [7]:
for col, dtype in fin_ann.schema.items():
    print(f"{col}: {dtype}")

fiscalDateEnding: Datetime(time_unit='ns', time_zone='UTC')
reportedEPS: Float64
reportedCurrency_x: Float64
grossProfit: Float64
totalRevenue: Float64
costOfRevenue: Float64
costofGoodsAndServicesSold: Float64
operatingIncome: Float64
sellingGeneralAndAdministrative: Float64
researchAndDevelopment: Float64
operatingExpenses: Float64
investmentIncomeNet: Float64
netInterestIncome: Float64
interestIncome: Float64
interestExpense: Float64
nonInterestIncome: Float64
otherNonOperatingIncome: Float64
depreciation: Float64
depreciationAndAmortization: Float64
incomeBeforeTax: Float64
incomeTaxExpense: Float64
interestAndDebtExpense: Float64
netIncomeFromContinuingOperations: Float64
comprehensiveIncomeNetOfTax: Float64
ebit: Float64
ebitda: Float64
netIncome_x: Float64
reportedCurrency_y: Float64
totalAssets: Float64
totalCurrentAssets: Float64
cashAndCashEquivalentsAtCarryingValue: Float64
cashAndShortTermInvestments: Float64
inventory: Float64
currentNetReceivables: Float64
totalNonCurrent

In [8]:
a = fin_ann["fiscalDateEnding"].item(-1)
print(type(a))

fin_ann.with_columns(fin_ann["fiscalDateEnding"].map_elements(lambda x: pd.Timestamp(x), return_dtype=pd.Timestamp))

a = fin_ann["fiscalDateEnding"].item(-1)
print(type(a))

a = fin_quart["reportedDate"].item(-1)
print(type(a))

<class 'datetime.datetime'>
<class 'datetime.datetime'>
<class 'datetime.datetime'>


In [9]:
fin_quart = fin_quart.with_columns(
        (pl.col("surprisePercentage") / 1000.0).alias("surprisePercentage")
)