In [6]:
#read csv from data/CIK.csv
import pandas as pd
import requests
import json

In [None]:


cik = 320193
# URL of the JSON file - CIK
CIK_input = str(cik).zfill(10)
url = f"https://data.sec.gov/submissions/CIK{CIK_input}.json"

headers = {
    'User-Agent': 'Nguyen Xuan Nam (namalcor02@gmail.com)'
}
# Send a GET request to the URL
response = requests.get(url, headers=headers)


# Check if the request was successful (status code 200)
if response.status_code == 200:
    # Convert the response to a Python object (list or dictionary)
    data = response.json()
    print(data)
else:
    print(f"Failed to download JSON file. Status code: {response.status_code}")

In [4]:
def get_latest_filing_URL(cik, type = "10-K"):
    """
    Retrieves the latest filing URL for a given company identified by its CIK (Central Index Key) and filing type.
    Parameters:
    cik (int or str): The Central Index Key (CIK) of the company. It can be an integer or a string.
    type (str): The type of filing to search for (default is "10-K"). Examples include "10-K", "10-Q", "8-K", etc.
    Returns:
    tuple: A tuple containing the company name, the year of the latest filing, and the URL of the latest filing.
            Returns None if the request to the SEC data fails.
    Example:
    get_latest_filing_URL(320193, "10-K")
    ('Apple Inc.', '2022', 'https://www.sec.gov/Archives/edgar/data/320193/000032019322000007/aapl-20220924x10k.htm')
    Notes:
    - The function sends a GET request to the SEC's EDGAR database to retrieve the company's filing data.
    - The User-Agent header is required by the SEC's API for identification purposes.
    - The function looks for the specified filing type in the company's recent filings and constructs the URL for the latest filing.
    """
    
    
    CIK_input = str(cik).zfill(10)
    url = f"https://data.sec.gov/submissions/CIK{CIK_input}.json"

    headers = {
        'User-Agent': 'NUS DSA3101 AY24/25S1 WW (ay2525s1ww@googlegroups.com)'
    }
    # Send a GET request to the URL
    response = requests.get(url, headers=headers)


    # Check if the request was successful (status code 200)
    if response.status_code == 200:
        # Convert the response to a Python object (list or dictionary)
        data = response.json()
    else:
        print(f"Failed to download JSON file. Status code: {response.status_code}, CIK: {cik}")
        return None
        
    
    # print(f"We are getting the latest filling URL for CIK: {cik}, company name: {data['name']}, ticker: {*data["tickers"],}")
    recents = data["filings"]["recent"]
    filling_type = recents["form"]
    accession_number = recents["accessionNumber"]
    primary_doc = recents["primaryDocument"]
    filling_date = recents["filingDate"]
    for i in range(len(filling_type)):
        if filling_type[i] == type:
            # print(f"filling date: {filling_date[i]}")
            cik_url = cik
            accession_number_url = accession_number[i].replace("-", "")
            primary_doc_url = primary_doc[i]
            filing_URL = f"https://www.sec.gov/Archives/edgar/data/{cik_url}/{accession_number_url}/{primary_doc_url}"
            latest_filing_year = filling_date[i][:4]
            company_name = data['name']
            break
        
    return company_name, latest_filing_year, filing_URL

def map_ticker_to_cik(ticker_list: pd.DataFrame) -> pd.DataFrame:
    mapper = pd.read_csv('data/mapping_stock.csv', header = 0)
    mapper = mapper[mapper['ticker'].isin(ticker_list['Ticker'])]
    return mapper.loc[:,["ticker","cik"]].reset_index(drop=True)
    

In [None]:
ticker_df = pd.read_excel("data/ECM_Datasets.xlsx")
cik_df = map_ticker_to_cik(ticker_df)


cik_df["Results"] = cik_df["cik"].apply(get_latest_filing_URL)
cik_df[["Company Name","Latest Filing Year", "Filing URL"]] = pd.DataFrame(cik_df['Results'].tolist(),index = cik_df.index)
cik_df = cik_df.drop(columns=["Results"])
cik_df.head()

In [8]:
ticker_df = pd.read_excel("data/ECM_Datasets.xlsx")
cik_df = map_ticker_to_cik(ticker_df)


cik_df["Results"] = cik_df["cik"].apply(lambda x: get_latest_filing_URL(x, type = "8-K"))
cik_df[["Company Name","Latest Filing Year", "Filing URL"]] = pd.DataFrame(cik_df['Results'].tolist(),index = cik_df.index)
cik_df = cik_df.drop(columns=["Results"])
cik_df.head()
cik_df.to_excel("data/NASDAQ_8-K_URLs.xlsx", index = False)

In [68]:
cik_df.to_excel("data/NASDAQ_10-K_URLs.xlsx", index = False)