In [1]:
import sys
import os

# Get the absolute path to the project directory
project_dir = os.path.abspath("..")

# Append the project directory to sys.path
if project_dir not in sys.path:
    sys.path.append(project_dir)

In [8]:
import polars as pl
from typing import Dict

from src.common.AssetData import AssetData
from src.common.AssetDataService import AssetDataService
from src.common.AssetFileInOut import AssetFileInOut 
from src.common.AssetDataPolars import AssetDataPolars 
from src.databaseService.EstablishStocks import EstablishStocks 
from src.databaseService.OutsourceLoader import OutsourceLoader
from src.databaseService.CleanData import CleanData 


In [None]:
import os
print(os.getcwd())

c:\Users\kimer\Desktop\RandomOdyssey\notebooks


In [4]:
assets=AssetFileInOut("../src/stockGroups/bin").loadDictFromFile("group_snp500_over20years")

# Convert to Polars for speedup
assetspl: Dict[str, AssetDataPolars] = {}
for ticker, asset in assets.items():
    assetspl[ticker]= AssetDataService.to_polars(asset)

In [16]:
aTick = assetspl["A"]
fin = aTick.financials_quarterly

aShareprice = aTick.shareprice

rows_with_nulls = aShareprice.filter(
    pl.any_horizontal(
        pl.all().is_null()
    )
)

print(rows_with_nulls)

shape: (0, 9)
┌──────┬──────┬──────┬─────┬───┬──────────┬────────┬───────────┬────────┐
│ Date ┆ Open ┆ High ┆ Low ┆ … ┆ AdjClose ┆ Volume ┆ Dividends ┆ Splits │
│ ---  ┆ ---  ┆ ---  ┆ --- ┆   ┆ ---      ┆ ---    ┆ ---       ┆ ---    │
│ date ┆ f64  ┆ f64  ┆ f64 ┆   ┆ f64      ┆ f64    ┆ f64       ┆ f64    │
╞══════╪══════╪══════╪═════╪═══╪══════════╪════════╪═══════════╪════════╡
└──────┴──────┴──────┴─────┴───┴──────────┴────────┴───────────┴────────┘


In [19]:
aQuarterly = aTick.financials_quarterly
rows_with_nulls = aQuarterly.filter(
    pl.any_horizontal(
        pl.all().is_null()
    )
)
rows_with_nulls.tail()

fiscalDateEnding,reportedDate,reportedEPS,estimatedEPS,surprise,surprisePercentage,reportTime,grossProfit,totalRevenue,ebit,ebitda,totalAssets,totalCurrentLiabilities,totalShareholderEquity,commonStockSharesOutstanding,operatingCashflow
date,date,f64,f64,f64,f64,str,f64,f64,f64,f64,f64,f64,f64,f64,f64
2004-01-31,2004-02-17,0.21,0.22,-0.01,-4.5455,"""pre-market""",,,,,,,,,
2004-04-30,2004-05-17,0.24,0.24,0.0,0.0,"""pre-market""",,,,,,,,,
2004-07-31,2004-08-12,0.3,0.28,0.02,7.1429,"""pre-market""",,,,,,,,,
2004-10-31,2004-11-11,0.3,0.31,-0.01,-3.2258,"""pre-market""",,,,,,,,,
2024-10-31,2024-11-25,1.46,1.41,0.05,3.5461,"""post-market""",916000000.0,1701000000.0,,408000000.0,11846000000.0,1895000000.0,5898000000.0,291000000.0,481000000.0


In [20]:
aAnnually = aTick.financials_annually
rows_with_nulls = aAnnually.filter(
    pl.any_horizontal(
        pl.all().is_null()
    )
)
rows_with_nulls.tail()

fiscalDateEnding,reportedEPS,grossProfit,totalRevenue,ebit,ebitda,totalAssets,totalCurrentLiabilities,totalShareholderEquity,operatingCashflow
date,f64,f64,f64,f64,f64,f64,f64,f64,f64
2000-10-31,1.77,,,,,,,,
2001-10-31,-0.22,,,,,,,,
2002-10-31,-1.4175,,,,,,,,
2003-10-31,-0.25,,,,,,,,
2004-10-31,1.05,,,,,,,,


In [21]:
financials_quarterly = CleanData.financial_fiscalDateIncongruence(fin)

financials_quarterly.tail(7)

Unnamed: 0,fiscalDateEnding,reportedDate,reportedEPS,estimatedEPS,surprise,surprisePercentage,reportTime,reportedCurrency,grossProfit,totalRevenue,...,depreciation,depreciationAndAmortization,incomeBeforeTax,incomeTaxExpense,interestAndDebtExpense,netIncomeFromContinuingOperations,comprehensiveIncomeNetOfTax,ebit,ebitda,netIncome
96,2023-04-30,2023-05-24,0.109,0.092,0.017,18.4783,post-market,USD,4648000000,7192000000,...,,181000000,2209000000,166000000,66000000,2043000000,2036000000,2275000000,2456000000,2043000000
97,2023-07-30,2023-08-23,0.27,0.207,0.063,30.4348,post-market,USD,9462000000,13507000000,...,,146000000,6981000000,793000000,65000000,6188000000,6187000000,7046000000,7192000000,6188000000
98,2023-10-29,2023-11-21,0.402,0.336,0.066,19.6429,post-market,USD,13400000000,18120000000,...,,144000000,10523000000,1279000000,63000000,9242000000,9206000000,10586000000,10730000000,9243000000
99,2024-01-28,2024-02-21,0.516,0.461,0.055,11.9306,post-market,USD,16791000000,22103000000,...,,143000000,14106000000,1821000000,63000000,12285000000,12400000000,14169000000,14312000000,12285000000
100,2024-04-28,2024-05-22,0.612,0.559,0.053,9.4812,post-market,USD,20406000000,26044000000,...,,143000000,17279000000,2398000000,64000000,14881000000,14745000000,17343000000,17486000000,14881000000
101,2024-07-28,2024-08-28,0.68,0.63,0.05,7.9365,post-market,USD,22574000000,30040000000,...,,146000000,19214000000,2615000000,61000000,16599000000,16764000000,18642000000,18788000000,16599000000
102,2024-10-27,2024-11-20,0.81,0.74,0.07,9.4595,post-market,USD,26156000000,35082000000,...,,149000000,22316000000,3007000000,61000000,19309000000,19356000000,21869000000,22018000000,19309000000


In [14]:
sectorDict = {}
for ticker, asset in assetspl.items():
    sectorDict[asset.sector] = asset.sector
    
print(sectorDict.keys())

dict_keys(['LIFE SCIENCES', 'TECHNOLOGY', 'MANUFACTURING', 'TRADE & SERVICES', 'REAL ESTATE & CONSTRUCTION', 'FINANCE', 'ENERGY & TRANSPORTATION'])


In [None]:
categories = [
            'other', 'industrials', 'healthcare', 'technology', 'utilities', 
            'financial-services', 'basic-materials', 'real-estate', 
            'consumer-defensive', 'energy', 'communication-services', 
            'consumer-cyclical'
]