In [1]:
import pandas as pd
import openai
import os
from dotenv import load_dotenv
import requests
from sec_api import ExtractorApi 
from sec_api import XbrlApi

In [2]:

# Load environment variables from .env file
load_dotenv()

# Retrieve the API key from environment variables
api_key = os.environ.get("GPT")

# Check if the API key is retrieved successfully
if api_key:
    print("API loaded")
else:
    print("API key not loaded")
    exit()  # Stop further execution

# Configure OpenAI library to use your API key
openai.api_key = api_key


API loaded


In [8]:
sec_api_key = os.getenv('SEC_API')
                        

print(sec_api_key)              

ea8d053165e7f360b1be3701552eca23e2091cdb8d69e4d503a8cc8eff8dc18c


In [9]:
import pandas as pd
from sec_api import XbrlApi

# Your API key for the sec-api
API_KEY = sec_api_key
xbrlApi = XbrlApi(API_KEY)


In [10]:
import pandas as pd

def get_balance_sheet(xbrl_json):
    balance_sheet_store = {}

    # Iterate over each US GAAP item in the balance sheet
    for usGaapItem, facts in xbrl_json['BalanceSheets'].items():
        values = []
        indices = []

        # Ensure facts is a list before iterating
        if not isinstance(facts, list):
            print(f"Skipping {usGaapItem} as it's not a list.")
            continue

        for fact in facts:
            # Check if fact is a dictionary
            if not isinstance(fact, dict):
                print(f"Skipping a fact in {usGaapItem} as it's not a dictionary.")
                continue

            # Only consider items without segment. Not required for our analysis.
            if 'segment' not in fact and 'period' in fact and 'instant' in fact['period']:
                # Use 'instant' for index
                index = fact['period']['instant']
                
                # Ensure the 'value' key exists and no index duplicates are created
                if 'value' in fact and index not in indices:
                    values.append(fact['value'])
                    indices.append(index)
                else:
                    print(f"No 'value' key for {usGaapItem} on {index}")

        balance_sheet_store[usGaapItem] = pd.Series(values, index=indices, dtype='float64') 

    balance_sheet = pd.DataFrame(balance_sheet_store)
    # Switch columns and rows so that US GAAP items are rows and each column header represents a date
    return balance_sheet.T 


In [11]:
import pandas as pd

# URLs for Peloton's 10-K filings from 2019 to 2022
url_10k_20 = "https://www.sec.gov/ix?doc=/Archives/edgar/data/1639825/000163982520000122/pton-20200630.htm"
url_10k_21 = "https://www.sec.gov/ix?doc=/Archives/edgar/data/1639825/000163982521000256/pton-20210630.htm"
url_10k_22 = "https://www.sec.gov/ix?doc=/Archives/edgar/data/1639825/000163982522000117/pton-20220630.htm"
url_10k_23 = "https://www.sec.gov/ix?doc=/Archives/edgar/data/1639825/000163982523000132/pton-20230630.htm"

xbrl_json_2020 = xbrlApi.xbrl_to_json(htm_url=url_10k_20)
xbrl_json_2021 = xbrlApi.xbrl_to_json(htm_url=url_10k_21)
xbrl_json_2022 = xbrlApi.xbrl_to_json(htm_url=url_10k_22)
xbrl_json_2023 = xbrlApi.xbrl_to_json(htm_url=url_10k_23)



Exception: API error: 429 - {"status":429,"error":"You send a lot of requests. We like that. But you exceeded the free query limit of 100 requests. Upgrade your account to get unlimited access. Visit sec-api.io for more."}

In [None]:
balance_sheet_2020 = get_balance_sheet(xbrl_json_2020)
balance_sheet_2021 = get_balance_sheet(xbrl_json_2021)
balance_sheet_2022 = get_balance_sheet(xbrl_json_2022)
balance_sheet_2023 = get_balance_sheet(xbrl_json_2023)

In [None]:
print(balance_sheet_2020)

In [None]:
balance_sheets_merged = pd.concat([balance_sheet_2020, balance_sheet_2021, balance_sheet_2022, balance_sheet_2023], axis=0, sort=False)

balance_sheets_merged = balance_sheets_merged.sort_index().reset_index()

balance_sheets_merged = balance_sheets_merged.applymap(lambda x: pd.to_numeric(x, errors= 'ignore'))

balance_sheets_merged.head(10)

In [None]:


# Aggregate by index and take max
balance_sheets = balance_sheets_merged.groupby('index').max()

# Reindex
balance_sheets = balance_sheets.reindex(balance_sheet_2020.index)

# Drop columns before 2019 and filter out non-annual data
cols_to_drop = []
for col in balance_sheets.columns:
    splitted = col.split('-')
    start = '-'.join(splitted[:3])
    end = '-'.join(splitted[3:])
    start_date = pd.to_datetime(start)
    end_date = pd.to_datetime(end)
    duration = (end_date - start_date).days / 360

    #

balance_sheets.drop(columns=cols_to_drop, inplace=True)

# Convert to readable format
balance_sheets = balance_sheets.apply(lambda row: pd.to_numeric(row, errors='coerce', downcast='integer').astype(str), axis=1)

# Sort columns
balance_sheets = balance_sheets[sorted(balance_sheets.columns)]

print("Cleaned Balance sheets from 10-K filings (2019 onwards) as dataframe:")
print('----------------------------------------------------------------------')
balance_sheets


In [None]:
def analyze_balance_sheet_with_gpt(df):
    # Convert DataFrame to a string representation for sending to GPT
    balance_sheet_str = df.to_string()

    prompt_text = f"Please analyze the following balance sheet data for the last few years:\n\n{balance_sheet_str}\n\nProvide insights on the assets, liabilities, and equity trends, and evaluate if the investing risk has increased in 750 words or less."

    # Make API call to OpenAI
    response = openai.Completion.create(
        engine="text-davinci-003",
        prompt=prompt_text,
        max_tokens=1000
    )

    # Print GPT's analysis
    print(response.choices[0].text.strip())

In [None]:
analyze_balance_sheet_with_gpt(balance_sheets)