In [44]:
import requests
import numpy as np
import json
import pandas as pd
import time
import yahoo_fin.stock_info as si
from datetime import datetime

def getcik(ticker):
    #Convert the ticker into the proper cik
    for key,value in cikdata.items():
        if value["ticker"] == ticker:
            cik = value["cik_str"]
            break
    return str(cik).zfill(10)

#Headers for EDGAR call
headers = {
    "User-Agent":"ficakc@seznam.cz",
    "Accept-Encoding":"gzip, deflate",
}

TIMEOUT = 6
RETRIES = 2
RETRY_DELAY = 1
# cik_url =  "https://www.sec.gov/files/company_tickers.json"
# cikdata = requests.get(cik_url, headers=headers).json()

with open(r"C:\Programming\Python\Finance\EDGAR\cik.json","r") as file:
    cikdata = json.load(file)
    file.close()
with open(r"C:\Programming\Python\Finance\EDGAR\apple.json","r") as file:
    Apple = json.load(file)
    file.close()

def get(url, headers, timeout, max_retries, retry_delay):
    for attempt in range(max_retries):
        try:
            response = requests.get(url, timeout=timeout, headers= headers)
            response.raise_for_status()  # Raise an HTTPError for bad requests (4xx or 5xx)
            return response  # Successful request, exit the loop
        except requests.RequestException as e:
            print(f"Request failed: {e}")
            if attempt < max_retries - 1:
                print(f"Retrying in {retry_delay} seconds...")
                time.sleep(retry_delay)
            else:
                print("Max retries reached. Exiting.")
                return 0
        

def companyfacts(ticker:str):
    #Get all the financial data for a ticker
    cik = getcik(ticker)
    data_url = f"https://data.sec.gov/api/xbrl/companyfacts/CIK{cik}.json"
    facts = get(data_url, headers,TIMEOUT,RETRIES,RETRY_DELAY)
    return facts

def endtodatetime(dataframe):
    dataframe.loc[:,"end"] = pd.datetime(dataframe["end"])
    return dataframe

class Stock:
    def __init__(self, ticker:str, measures:list):
        self.ticker = ticker.upper()
        self.cik = getcik(self.ticker)
        data = companyfacts(self.ticker)
        #If the response wasn't recieved, self.data becomes 0 
        if data:
            self.data = data.json()
        else:
            self.data = data
        self.start_year = max([datetime.strptime(self.data["facts"]["dei"]["EntityCommonStockSharesOutstanding"]["units"]["shares"][0]["end"], r"%Y-%m-%d")] +[datetime.strptime(self.data["facts"]["us-gaap"][measure]["units"]["USD"][0]["end"], r"%Y-%m-%d") for measure in measures])
        self.end_year = datetime.now().date()
        self.fullprice = si.get_data(self.ticker,self.start_year, self.end_year).reset_index()
        Price = self.fullprice[[self.fullprice.columns[0],"close", "adjclose"]].copy()
        Price["end"] = Price["index"].astype(str)
        Price.drop(columns=["index"],inplace=True)
        date_range = pd.date_range(start=self.start_year, end=self.end_year).astype(str)
        self.date_range = pd.DataFrame(date_range, columns=['end'])
        Price = pd.merge(self.date_range, Price, on = ["end"],how="left" )
        self.price = Price.bfill()

    def fact(self,measure,simple=True):
        #Propagate the 0 
        if self.data == 0:
            return 0
        try:
            point_list = self.data["facts"]["us-gaap"][measure]["units"]["USD"]
            frame = pd.DataFrame(point_list)
            frame = frame.drop_duplicates(subset='end', keep='last')
            frame[measure] = frame["val"]
            if simple:
                frame = frame[["end", measure]]
            frame = pd.merge(self.date_range,frame,on="end",how="left")
            return frame.ffill()
        except KeyError:
            print(f"Measure {measure} not available for company.")
    def shares(self,simple=True):
        #Propagate the 0 
        if self.data == 0:
            return 0
        try:
            if simple:
                price = pd.DataFrame(self.data["facts"]["dei"]["EntityCommonStockSharesOutstanding"]["units"]["shares"])[["end","val"]]
        except KeyError:
            if simple:
                price = pd.DataFrame(self.data["facts"]["dei"]["EntityPublicFloat"]["units"]["shares"])[["end","val"]]
        price["shares"] = price["val"]
        price.drop(columns=["val"], inplace = True)
        price = price.drop_duplicates(subset="end", keep="last")
        price = pd.merge(self.date_range, price, on=["end"], how="left")
        return price.ffill()


In [2]:
#write out measures based on importance in descending order
measures = ["Assets", "Liabilities", "AssetsCurrent", "LiabilitiesCurrent"]

for company, values in cikdata.items():
    stock = Stock(values["ticker"], measures)
    shares = stock.shares().copy()
    stock_num = stock.price.copy()
    df = pd.merge(shares, stock_num, left_on=["end"], right_on=["end"], how = "left")
    frames_list = [stock.fact(measure) for measure in measures]
    for frame in frames_list:
        df  = pd.merge(df,frame, on=["end"], how="left")
        assert df[df.columns[-1]].isnull() == 0, "Missing something buddy"
    

TypeError: string indices must be integers, not 'str'

In [35]:
Apple.data["facts"]["dei"]["EntityCommonStockSharesOutstanding"]["units"]["shares"]

[{'end': '2009-06-27',
  'val': 895816758,
  'accn': '0001193125-09-153165',
  'fy': 2009,
  'fp': 'Q3',
  'form': '10-Q',
  'filed': '2009-07-22',
  'frame': 'CY2009Q2I'},
 {'end': '2009-10-16',
  'val': 900678473,
  'accn': '0001193125-09-214859',
  'fy': 2009,
  'fp': 'FY',
  'form': '10-K',
  'filed': '2009-10-27'},
 {'end': '2009-10-16',
  'val': 900678473,
  'accn': '0001193125-10-012091',
  'fy': 2009,
  'fp': 'FY',
  'form': '10-K/A',
  'filed': '2010-01-25',
  'frame': 'CY2009Q3I'},
 {'end': '2010-01-15',
  'val': 906794589,
  'accn': '0001193125-10-012085',
  'fy': 2010,
  'fp': 'Q1',
  'form': '10-Q',
  'filed': '2010-01-25',
  'frame': 'CY2009Q4I'},
 {'end': '2010-04-09',
  'val': 909938383,
  'accn': '0001193125-10-088957',
  'fy': 2010,
  'fp': 'Q2',
  'form': '10-Q',
  'filed': '2010-04-21',
  'frame': 'CY2010Q1I'},
 {'end': '2010-07-09',
  'val': 913562880,
  'accn': '0001193125-10-162840',
  'fy': 2010,
  'fp': 'Q3',
  'form': '10-Q',
  'filed': '2010-07-21',
  'frame'

In [45]:
measures = ["Assets", "Liabilities", "AssetsCurrent", "LiabilitiesCurrent"]
Apple = Stock("aapl", measures)
stock = Apple
shares = stock.shares()
stock_num = stock.price
if isinstance(shares, int) or isinstance(stock_num, int): 
    pass
    # break
df = pd.merge(shares.copy(), stock_num.copy(), on=["end"], how = "left")
frames_list = [stock.fact(measure) for measure in measures]
for frame in frames_list:
    df  = pd.merge(df,frame, on=["end"], how="left")
df.head(16)

Unnamed: 0,end,shares,close,adjclose,Assets,Liabilities,AssetsCurrent,LiabilitiesCurrent
0,2009-06-27,895816758.0,5.070357,4.298038,48140000000.0,22252000000.0,35170000000.0,16661000000.0
1,2009-06-28,895816758.0,5.070357,4.298038,48140000000.0,22252000000.0,35170000000.0,16661000000.0
2,2009-06-29,895816758.0,5.070357,4.298038,48140000000.0,22252000000.0,35170000000.0,16661000000.0
3,2009-06-30,895816758.0,5.086786,4.311965,48140000000.0,22252000000.0,35170000000.0,16661000000.0
4,2009-07-01,895816758.0,5.101071,4.324075,48140000000.0,22252000000.0,35170000000.0,16661000000.0
5,2009-07-02,895816758.0,5.000714,4.239004,48140000000.0,22252000000.0,35170000000.0,16661000000.0
6,2009-07-03,895816758.0,4.950357,4.196318,48140000000.0,22252000000.0,35170000000.0,16661000000.0
7,2009-07-04,895816758.0,4.950357,4.196318,48140000000.0,22252000000.0,35170000000.0,16661000000.0
8,2009-07-05,895816758.0,4.950357,4.196318,48140000000.0,22252000000.0,35170000000.0,16661000000.0
9,2009-07-06,895816758.0,4.950357,4.196318,48140000000.0,22252000000.0,35170000000.0,16661000000.0


Path to info: Stock["facts"]["us-gaap"][some_measure]["units"]["USD"]

Path to meta info: Stock["facts"]["dei"]

Three things:
    First:
        Graph and correlate measures and ratios
            Currently forward filling fundamentals data, could be fucked in the future
            Include the rate of change of the variables and compare those 
    Second:
        Make a test to validate trading strategies
            Add a function that picks eligible stocks at some timeframe and test their performance xdz
    Third:
        Train a model to predict the long term price
            forward filling fundamentals can be really fucked 
            Include all the past fundamentals for each datapoint - this could figure out how people value growth.

Data to add for regression and AI:
    oil price
    interest rates 
    

Measures:
    Price/BookValuepershare = MarketCap/BookValue = EntityCommonStockSharesOutstanding*SPrice(Assets-Liabilities)
    Price/Earnings - all the possible earnings metrics
    Current ratio = AssetsCurrent/LiabilitiesCurrent
    ///////////////////////////////////////////////////////////////////////////////////////////////////////////
    Revenue
    Operating Income 
    Free cash flow 
    EBITDA 
    Dividends - "PaymentsOfDividendsCommonStock"

How does market cap influence these correlations?

Where do you find price data?
    Yahoo-fin.


Methods to figure out relationships:
    Kendall's Tau
    Spearman's Rank Correlation

    cross variable correlation loop through all variables 
    Polynomial regression
    LSTM


Todo:
    Load the joined dataframe and check it.

*Get all the measure names*

In [19]:
# with open("C:\Programming\Python\Finance\EDGAR\stock.json", "w") as file:
#     json.dump(df.to_dict(orient='records'), file, indent=1)
#     file.close()

with open("C:\Programming\Python\Finance\EDGAR\shares.json", "w") as file:
    json.dump(stock.shares().copy().to_dict(orient='records'), file, indent=1)
    file.close()

with open(r"C:\Programming\Python\Finance\EDGAR\assets.json", "w") as file:
    json.dump(Apple["facts"]["us-gaap"]["Assets"]["units"]["USD"], file, indent=1)
    file.close()


    

In [None]:
measures = {}
for key,value in Apple["facts"]["us-gaap"].items() :
    measures[key.ljust(100)] = value["label"]

measures["METADATA".ljust(200,"/")] = ""

for key,value in Apple["facts"]["dei"].items():
    measures[key.ljust(100)] = value["label"]


with open(r"C:\Programming\Python\Finance\EDGAR\measures.json","w") as file:
    json.dump(measures, file, indent=1)


#create price reference list:
with open("C:\Programming\Python\Finance\EDGAR\price.json", "w") as file:
    json.dump(Apple.price.to_dict(orient='records'), file, indent=1)
    file.close()


In [48]:
for key in Apple["facts"]["us-gaap"]["AssetsCurrent"]["units"]["USD"] :
    try:
        del key["frame"]
        print(key)
    except KeyError:
        print(key)



{'end': '2008-09-27', 'val': 32311000000, 'accn': '0001193125-09-153165', 'fy': 2009, 'fp': 'Q3', 'form': '10-Q', 'filed': '2009-07-22'}
{'end': '2008-09-27', 'val': 32311000000, 'accn': '0001193125-09-214859', 'fy': 2009, 'fp': 'FY', 'form': '10-K', 'filed': '2009-10-27'}
{'end': '2008-09-27', 'val': 30006000000, 'accn': '0001193125-10-012091', 'fy': 2009, 'fp': 'FY', 'form': '10-K/A', 'filed': '2010-01-25'}
{'end': '2009-06-27', 'val': 35170000000, 'accn': '0001193125-09-153165', 'fy': 2009, 'fp': 'Q3', 'form': '10-Q', 'filed': '2009-07-22'}
{'end': '2009-09-26', 'val': 36265000000, 'accn': '0001193125-09-214859', 'fy': 2009, 'fp': 'FY', 'form': '10-K', 'filed': '2009-10-27'}
{'end': '2009-09-26', 'val': 31555000000, 'accn': '0001193125-10-012085', 'fy': 2010, 'fp': 'Q1', 'form': '10-Q', 'filed': '2010-01-25'}
{'end': '2009-09-26', 'val': 31555000000, 'accn': '0001193125-10-012091', 'fy': 2009, 'fp': 'FY', 'form': '10-K/A', 'filed': '2010-01-25'}
{'end': '2009-09-26', 'val': 31555000

In [40]:
# print(Apple["facts"]["us-gaap"]["Assets"]["units"]["USD"][0]["end"])


datetime.date(2024, 1, 11)