In [47]:
import requests
import pandas as pd
import datetime
import numpy as np

headers = {"User-Agent": "russ@sunriseanalysis.com"}

ticker = "AAPL"
concept = "NetIncomeLoss"

In [48]:
def get_cik(ticker):
    """
    Get the CIK for a given ticker.
    Args:
        ticker (str): The ticker symbol of the company.
    Returns:
        str: The CIK for the company.
    """
    ticker = ticker.upper()
    headers = {"User-Agent": "russ@sunriseanalysis.com"}
    # Get the tickers JSON file
    tickers_json = requests.get(
        "https://www.sec.gov/files/company_tickers.json", headers=headers
    )
    # Create a DataFrame of the tickers
    cik_df = pd.DataFrame.from_dict(tickers_json.json(), orient="index")
    cik_df["cik_str"] = cik_df["cik_str"].astype(str).str.zfill(10)
    # Get the CIK for the given ticker
    cik = cik_df[cik_df["ticker"] == ticker]["cik_str"].values[0]
    return cik

In [49]:
cik = get_cik(ticker)

In [50]:
concept_url = (
    f"https://data.sec.gov/api/xbrl/companyconcept/CIK{cik}/us-gaap/{concept}.json"
)
concept_df = pd.DataFrame.from_dict(requests.get(concept_url, headers=headers).json())

In [51]:
concept_units = pd.DataFrame.from_dict(concept_df["units"]["USD"])
for col in ["start", "end", "filed"]:
    concept_units[col] = pd.to_datetime(concept_units[col])
df = concept_units
df["time_delta"] = df["end"] - df["start"]
df["val"] = df["val"] / 1_000_000
df.rename(columns={"val": "value(millions)"}, inplace=True)

annual_data = df[df["time_delta"] > pd.Timedelta(days=350)]
df = df.drop(annual_data.index)
annual_data = annual_data.drop_duplicates(subset="value(millions)", keep="last")
annual_data = annual_data.dropna(subset="frame")
annual_data = annual_data.drop(
    columns=["time_delta", "filed", "form", "fy", "fp"]
).reset_index(drop=True)

annual_data.rename(columns={"frame": "year"}, inplace=True)
extra_data = df[df["time_delta"] > pd.Timedelta(days=115)]
df = df.drop(extra_data.index)
extra_data.reset_index(drop=True, inplace=True)
df = df.drop_duplicates(subset="value(millions)", keep="last")
df = df.drop(columns=["time_delta", "filed", "form", "fy", "fp", "accn"])
df = df.dropna(subset=["frame"])
df = df.reset_index(drop=True)

In [52]:
def map_to_quarter(end_date):
    month_diff = (end_date.month - end_month + 9) % 12
    if 0 <= month_diff < 3:
        return 1
    elif 3 <= month_diff < 6:
        return 2
    elif 6 <= month_diff < 9:
        return 3
    else:  # 9, 10, 11
        return 4

end_month = annual_data.iloc[0]["end"].month
df["quarter"] = df["end"].apply(map_to_quarter)

In [53]:
start_year = int(df['frame'].min()[2:6]) # extract start year from the minimum frame
end_year = int(df['frame'].max()[2:6]) # extract end year from the maximum frame
frames = [f"CY{year}Q{quarter}" for year in range(start_year, end_year + 1) for quarter in range(1, 5)]
df_full = pd.DataFrame({'frame': frames})
# Step 2: Merge the original DataFrame with the new DataFrame
merged = pd.merge(df_full, df, how='left', on='frame')

# Step 3: Sort the DataFrame by frame
merged = merged.sort_values(by='frame').reset_index(drop=True)
merged.drop(columns=['frame'], inplace=True)

In [54]:
merged["next_start"] = merged["start"].shift(-1)
merged["prev_end"] = merged["end"].shift()

# For missing quarters, set the 'start' date as the day after the end of the previous quarter,
# and the 'end' date as the day before the start of the next quarter
merged.loc[merged["start"].isna(), "start"] = merged["prev_end"] + pd.Timedelta(days=1)
merged.loc[merged["end"].isna(), "end"] = merged["next_start"] - pd.Timedelta(days=1)

# Drop the 'next_start' and 'prev_end' columns as they are no longer needed
merged.drop(["next_start", "prev_end"], axis=1, inplace=True)

In [55]:
today = datetime.datetime.today()
# Calculate the start date of the current quarter
if today.month < 4:
    quarter_start = datetime.datetime(today.year - 1, 12, 1)
elif today.month < 7:
    quarter_start = datetime.datetime(today.year, 3, 1)
elif today.month < 10:
    quarter_start = datetime.datetime(today.year, 6, 1)
else:
    quarter_start = datetime.datetime(today.year, 9, 1)
# Remove rows where the 'start' date is on or after the start of the current quarter
merged = merged[merged["start"] < quarter_start]

In [57]:
merged

Unnamed: 0,start,end,value(millions),quarter
1,2008-03-30,2008-06-28,1072.0,3.0
2,2008-06-29,2008-09-27,,
3,2008-09-28,2008-12-27,2255.0,1.0
4,2008-12-28,2009-03-28,1620.0,2.0
5,2009-03-29,2009-06-27,1828.0,3.0
6,2009-06-28,2009-09-26,2532.0,4.0
7,2009-09-27,2009-12-26,3378.0,1.0
8,2009-12-27,2010-03-27,3074.0,2.0
9,2010-03-28,2010-06-26,3253.0,3.0
10,2010-06-27,2010-09-25,4308.0,4.0
