In [56]:
import httpx
import asyncio
import requests
from fredapi import Fred
import numpy as np
import json
import pandas as pd
import time
import yahoo_fin.stock_info as si
from datetime import datetime
import tracemalloc

fred = Fred(api_key='0c34c4dd2fd6943f6549f1c990a8a0f0') 
client =  httpx.AsyncClient()
async def fetch(url, headers, semaphore, client, timeout, max_retries, start_retry_delay):
    async with semaphore:
        for attempt in range(1,max_retries):
            try:
                response = await client.get(url, timeout=timeout, headers= headers)
                response.raise_for_status()
                return response  # Successful request, exit the loop
            except httpx.HTTPStatusError as e:
                headers = response.headers
                #Sometimes a retry-after header is returned
                retry_after = headers.get('Retry-After')
                if retry_after != None:
                    #Just for debugging
                    print(retry_after)
                    await asyncio.sleep(retry_after.astype(int))
                    continue
                print(f"Error response {e.response.status_code}.")
            except httpx.TimeoutException as e:
                print(f"Timeout reached: {e}")
                if attempt < max_retries:
                    print(f"Retrying in {attempt*start_retry_delay} seconds...")
                    await asyncio.sleep(attempt*start_retry_delay)
                else:
                    print("Max retries reached. Exiting.")
                    return 0
            except httpx.RequestError as e:
                print(f"An error occurred: {e}.")

def fred_info(ids:list, start:str, end:str):
    #start and end are datatime objects
    start = start.strftime('%Y-%m-%d')
    end = end.strftime('%Y-%m-%d')
    frame = pd.DataFrame()
    for id in ids:
        series = fred.get_series(id,observation_start=start, observation_end=end)
        frame[id] = series
    frame = frame.reset_index()
    frame["index"] = frame["index"].astype(str)
    return frame.ffill().bfill()

async def fred_fetch(ids:list, start:str, end:str):
    fred_data = await asyncio.to_thread(fred_info,ids,start,end)
    return fred_data

async def yahoo_fetch(ticker ,start_year, end_year):
    yahoo_data = await asyncio.to_thread(si.get_data,ticker,start_year, end_year)
    return yahoo_data

In [57]:
def runlist(dict, nameslist):
    idx = 0
    while (idx<len(nameslist)):
        try:
            data = dict[nameslist[idx]]
            return data
        except KeyError:
            idx +=1
    raise KeyError(f"None of the names in {nameslist} matched {dict}")


def getcik(ticker):
    #Convert the ticker into the proper cik
    for key,value in cikdata.items():
        if value["ticker"] == ticker:
            cik = value["cik_str"]
            break
    return str(cik).zfill(10)

#Headers for EDGAR call
headers = {
    "User-Agent":"ficakc@seznam.cz",
    "Accept-Encoding":"gzip, deflate",
}

TIMEOUT = 8
RETRIES = 2
START_RETRY_DELAY = 0.4
# cik_url =  "https://www.sec.gov/files/company_tickers.json"
# cikdata = requests.get(cik_url, headers=headers).json()

with open(r"C:\Programming\Python\Finance\EDGAR\cik.json","r") as file:
    cikdata = json.load(file)
    file.close()
with open(r"C:\Programming\Python\Finance\EDGAR\apple.json","r") as file:
    Apple = json.load(file)
    file.close()



def companyfacts(ticker:str, client, semaphore):
    #Get all the financial data for a ticker
    cik = getcik(ticker)
    data_url = f"https://data.sec.gov/api/xbrl/companyfacts/CIK{cik}.json"
    facts = fetch(data_url, headers, semaphore, client, TIMEOUT,RETRIES,START_RETRY_DELAY)
    return facts

def endtodatetime(dataframe):
    dataframe.loc[:,"end"] = pd.datetime(dataframe["end"])
    return dataframe



class Stock:
    def __init__(self, ticker:str):
        self.ticker = ticker.upper()
        self.cik = getcik(self.ticker)
        
    async def async_init(self,client, semaphore, measures):
        #Get all of the data for the company, ALL of it 
        # snapshot3 = tracemalloc.take_snapshot()
        data = await companyfacts(self.ticker, client, semaphore)
        # snapshot4 = tracemalloc.take_snapshot()
        # top_stats = snapshot3.compare_to(snapshot4, 'lineno')
        # for stat in top_stats[:10]:
        #     print(stat)
        #If the response wasn't recieved, skips the rest of the code 
        if type(data) != int:
            self.data = data.json()
        else:
            return
        #Get the share amount 
        self.share_name_list = ["EntityCommonStockSharesOutstanding","CommonStockSharesOutstanding", "EntityPublicFloat"]
        share_info = runlist(self.data["facts"]["dei"], self.share_name_list)
        share_date= datetime.strptime(share_info["units"]["shares"][0]["end"], r"%Y-%m-%d")
        self.start_year = max([share_date] + [datetime.strptime(self.data["facts"]["us-gaap"][measure]["units"]["USD"][0]["end"], r"%Y-%m-%d") if measure in self.data["facts"]["us-gaap"] else datetime.strptime('1920-01-01', r"%Y-%m-%d") for measure in measures])
        self.end_year = datetime.now().date()
        #Get the price and set the self.price
        self.fullprice = await yahoo_fetch(self.ticker,self.start_year, self.end_year)
        self.fullprice = self.fullprice.reset_index()
        Price = self.fullprice[[self.fullprice.columns[0],"close", "adjclose"]].copy()
        Price["end"] = Price["index"].astype(str)
        Price.drop(columns=["index"],inplace=True)
        date_range = pd.date_range(start=self.start_year, end=self.end_year).astype(str)
        self.date_range = pd.DataFrame(date_range, columns=['end'])
        Price = pd.merge(self.date_range, Price, on = ["end"],how="left" )
        self.price = Price.bfill()

    def fact(self,measure,simple=True):
        #Propagate the 0 
        if self.data == 0:
            return 0
        try:
            point_list = self.data["facts"]["us-gaap"][measure]["units"]["USD"]
            frame = pd.DataFrame(point_list)
            frame = frame.drop_duplicates(subset='end', keep='last')
            frame[measure] = frame["val"]
            if simple:
                frame = frame[["end", measure]]
            frame = pd.merge(self.date_range,frame,on="end",how="left")
            return frame.ffill()
        except KeyError:
            print(f"Measure {measure} not available for company.")
    def shares(self,simple=True):
        #Propagate the 0 
        if self.data == 0:
            return 0
        if simple:
            share_count = pd.DataFrame(runlist(self.data["facts"]["dei"],self.share_name_list)["units"]["shares"])[["end","val"]]
        share_count["shares"] = share_count["val"]
        share_count.drop(columns=["val"], inplace = True)
        share_count = share_count.drop_duplicates(subset="end", keep="last")
        share_count = pd.merge(self.date_range, share_count, on=["end"], how="left")
        return share_count.ffill()
    
async def async_task(ticker, client, semaphore, measures, indicators):
    stock = Stock(ticker)
    print(f"Currently pinging {ticker}")
    await stock.async_init(client,semaphore,measures)
    try:
        #Price and shares oustanding 
        shares = stock.shares().copy()
        stock_price = stock.price.copy()
        df = pd.merge(shares, stock_price, left_on=["end"], right_on=["end"], how = "left")
        frames_list = [stock.fact(measure) for measure in measures]
        for frame in frames_list:
            df = pd.merge(df,frame, on=["end"], how="left")
    #HANDLE RETURNING NONES IN THE GATHER BECAUSE SOME FRAMES WILL BE NONE///////////////////////////
    except AttributeError:
        return
    #Economic indicators 
    indicator_frame = await fred_fetch(indicators, stock.start_year, stock.end_year)
    df = pd.merge(df, indicator_frame, left_on =["end"], right_on=["index"], how="left")
    return (ticker,df)


In [58]:
#write out measures based on importance in descending order
# tracemalloc.start()
measures = ["Assets", "Liabilities", "AssetsCurrent", "LiabilitiesCurrent"]
#write out the indicators 
indicators = ["TB3MS", "DCOILWTICO"]
#Get the first n companies sorted by market cap 
companies_num = 3
comp = 0
sem = asyncio.Semaphore(9)
#GATHER THE FIRST companies_num companies ciks and pass them to the gather with the tasks
ticker_list = []
for company, values in cikdata.items():
    if comp<companies_num:
        ticker_list.append(values["ticker"])
        comp+=1
    else:
        break
company_frames_tuples = await asyncio.gather(*[async_task(ticker, client, sem, measures, indicators) for ticker in ticker_list])
company_frames_dict = {}
for ticker, df in company_frames_tuples:
    company_frames_dict[ticker] = df

    
    

Currently pinging AAPL
Currently pinging MSFT
Currently pinging GOOGL


KeyError: 'shares'

In [None]:
# Apple.data["facts"]["dei"]["EntityCommonStockSharesOutstanding"]["units"]["shares"]
frame  = fred_info(["TB3MS", "DCOILWTICO"], '2015-02-24', '2017-02-24')
frame.head(40)
# print(frame)

frame["index"] = frame["index"].astype(str)
with open("C:\Programming\Python\Finance\EDGAR\FRED.json", "w") as file:
    json.dump(frame.to_dict(orient="records"), file, indent=1)
    file.close()


In [None]:
MSFT = companyfacts('MSFT').json()
print(MSFT["facts"]["dei"]["EntityCommonStockSharesOutstanding"]["units"]["shares"][0]["end"])


In [None]:
measures = ["Assets", "Liabilities", "AssetsCurrent", "LiabilitiesCurrent"]
Apple = Stock("aapl", measures)
stock = Apple
shares = stock.shares()
stock_num = stock.price
if isinstance(shares, int) or isinstance(stock_num, int): 
    pass
    # break
df = pd.merge(shares.copy(), stock_num.copy(), on=["end"], how = "left")
frames_list = [stock.fact(measure) for measure in measures]
for frame in frames_list:
    df = pd.merge(df,frame, on=["end"], how="left")
df.head(16)

Path to info: Stock["facts"]["us-gaap"][some_measure]["units"]["USD"]

Path to meta info: Stock["facts"]["dei"]

List of all possible names for shares outstanding:
    CommonStockSharesOutstanding
    EntityCommonStockSharesOutstanding

Three things:
    First:
        Graph and correlate measures and ratios
            Currently forward filling fundamentals data, could be fucked in the future
            Include the rate of change of the variables and compare those 
    Second:
        Make a test to validate trading strategies
            Add a function that picks eligible stocks at some timeframe and test their performance xdz
    Third:
        Train a model to predict the long term price
            forward filling fundamentals can be really fucked 
            Consider using averages or linear change from previous value to next for training 
            Include all the past fundamentals for each datapoint - this could figure out how people value growth.

Data to add for regression and AI:
    oil price
    interest rates 
        FRED HAS SO MUCH DATA HOLY SHIT 
        pick yer fucking poison mate
    

Measures:
    Price/BookValuepershare = MarketCap/BookValue = EntityCommonStockSharesOutstanding*SPrice(Assets-Liabilities)
    Price/Earnings - all the possible earnings metrics
    Current ratio = AssetsCurrent/LiabilitiesCurrent
    ///////////////////////////////////////////////////////////////////////////////////////////////////////////
    Revenue
    Operating Income 
    Free cash flow 
    EBITDA 
    Dividends - "PaymentsOfDividendsCommonStock"

How does market cap influence these correlations?

Where do you find price data?
    Yahoo-fin.


Methods to figure out relationships:
    Kendall's Tau
    Spearman's Rank Correlation

    cross variable correlation loop through all variables 
    Polynomial regression
    LSTM


Todo:
    You are using the calls one after another in the same task, you can run them concurrently
    Investigate why not forward filling
    Implement a list check for all the possible names for common shares
    Implement ConnectionError safeguard
    Remove memory snapshots if not useful
    Make run list implementation for shares aswell -ITS FUCKEDDDDDDDDDDDDD

*Get all the measure names*

In [None]:
# with open("C:\Programming\Python\Finance\EDGAR\stock.json", "w") as file:
#     json.dump(df.to_dict(orient='records'), file, indent=1)
#     file.close()

with open("C:\Programming\Python\Finance\EDGAR\shares.json", "w") as file:
    json.dump(stock.shares().copy().to_dict(orient='records'), file, indent=1)
    file.close()

with open(r"C:\Programming\Python\Finance\EDGAR\assets.json", "w") as file:
    json.dump(Apple["facts"]["us-gaap"]["Assets"]["units"]["USD"], file, indent=1)
    file.close()


    

In [None]:
measures = {}
for key,value in Apple["facts"]["us-gaap"].items() :
    measures[key.ljust(100)] = value["label"]

measures["METADATA".ljust(200,"/")] = ""

for key,value in Apple["facts"]["dei"].items():
    measures[key.ljust(100)] = value["label"]


with open(r"C:\Programming\Python\Finance\EDGAR\measures.json","w") as file:
    json.dump(measures, file, indent=1)


#create price reference list:
with open("C:\Programming\Python\Finance\EDGAR\price.json", "w") as file:
    json.dump(Apple.price.to_dict(orient='records'), file, indent=1)
    file.close()


In [None]:
for key in Apple["facts"]["us-gaap"]["AssetsCurrent"]["units"]["USD"] :
    try:
        del key["frame"]
        print(key)
    except KeyError:
        print(key)



In [None]:
# print(Apple["facts"]["us-gaap"]["Assets"]["units"]["USD"][0]["end"])
