In [1]:
import numpy as np
import pandas as pd
from time import sleep
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import undetected_chromedriver as uc

In [2]:
option = webdriver.ChromeOptions()
option.add_argument("--window-size=500,500")
driver = uc.Chrome(options=option)

In [3]:
driver.get("https://www.sec.gov/edgar/search/")

In [4]:
ticker_list = ['AAPL', 'AMZN']

In [5]:
input_ticker = ticker_list[0]

#Clearing output and sending ticker keyword
input_button=driver.find_element(By.XPATH, '//*[@id="entity-short-form"]')
input_button.clear()
input_button.send_keys(input_ticker)

In [6]:
# Click the seach button
search_button=driver.find_element(By.XPATH, '//*[@id="search"]').click()

In [9]:
# Clearing the text search bar
driver.find_element(By.XPATH, '//*[@id="keywords"]').clear()

# Inputting Ticker in ticker search bar
driver.find_element(By.XPATH, '//*[@id="entity-full-form"]').send_keys(input_ticker)

In [10]:
WebDriverWait(driver,10).until(
    EC.element_to_be_clickable((By.XPATH, '//*[@id="keywords"]'))
).click()

# Interacting with the dropdown menu
WebDriverWait(driver,10).until(
    EC.element_to_be_clickable((By.XPATH, '//*[@id="category-select"]'))
).click()

# Choosing All Reports options
WebDriverWait(driver,10).until(
    EC.element_to_be_clickable((By.XPATH, '//*[@id="category-type-grp"]/ul/li[3]'))
).click()

In [48]:
# Clicking the search bar now
WebDriverWait(driver,10).until(
    EC.element_to_be_clickable((By.XPATH, '//*[@id="search"]'))
).click()

#### Specific Text: "//a[contains(text(), '10-Q (Quarterly report)')]"

In [49]:
# Clicking on the 10 Q report

WebDriverWait(driver, 10).until(
    EC.element_to_be_clickable(
        (By.XPATH, "//a[contains(text(), '10-Q (Quarterly report)')]")
    )
).click()

In [50]:
# Opening the document in another tab

WebDriverWait(driver, 10).until(
    EC.element_to_be_clickable(
        (By.XPATH, '//*[@id="open-file"]/button')
    )
).click()

In [51]:
# Switch to the new tab
WebDriverWait(driver, 10).until(EC.number_of_windows_to_be(2))
driver.switch_to.window(driver.window_handles[1])

In [None]:
page_source = driver.page_source

# Save the page source to a file
with open("page_source.html", "w", encoding="utf-8") as file:
    file.write(page_source)

In [52]:
# Wait until the "Item 2" element is present
management_section = WebDriverWait(driver, 10).until(
    EC.presence_of_element_located((By.XPATH, "//span[contains(text(), 'Item 2.')]"))
)
print("Item 2: ", management_section.text)

Item 2:  Item 2.    Management’s Discussion and Analysis of Financial Condition and Results of Operations


In [53]:
# Locate the parent element
parent_element = management_section.find_element(By.XPATH, "./ancestor::div")

current_element = parent_element
print("Current element: ", current_element.text)

Current element:  Item 2.    Management’s Discussion and Analysis of Financial Condition and Results of Operations


In [54]:
# Locate the next sibling element
current_element = current_element.find_element(By.XPATH, "following-sibling::*[1]")
sibling_text = current_element.text.strip()
sibling_text

'This Item and other sections of this Quarterly Report on Form 10-Q (“Form 10-Q”) contain forward-looking statements, within the meaning of the Private Securities Litigation Reform Act of 1995, that involve risks and uncertainties. Forward-looking statements provide current expectations of future events based on certain assumptions and include any statement that does not directly relate to any historical or current fact. For example, statements in this Form 10-Q regarding the potential future impact of macroeconomic conditions on the Company’s business and results of operations are forward-looking statements. Forward-looking statements can also be identified by words such as “future,” “anticipates,” “believes,” “estimates,” “expects,” “intends,” “plans,” “predicts,” “will,” “would,” “could,” “can,” “may,” and similar terms. Forward-looking statements are not guarantees of future performance and the Company’s actual results may differ significantly from the results discussed in the forw

# Open AI API

Setting API key as an environment variable to keep up with the good practices of sensitive information on coding. 

### On Windows
set OPENAI_API_KEY=your-api-key

### On Apple
export OPENAI_API_KEY=your-api-key


Create a file named .env in the root of your project directory and add your API key:

OPENAI_API_KEY=your-api-key


In [44]:
import openai
from dotenv import load_dotenv
import os

In [None]:
# Load environment variables from .env file
load_dotenv()
# Loading the previously set variable.
openai_api_key = os.getenv('OPENAI_API_KEY')

In [None]:
openai.api_key=openai_api_key

#### Code explanation: 
- response.choices: Accesses the list of completion choices.
- response.choices[0]: Gets the first choice in the list.
- response.choices[0].text: Extracts the text of the first choice.
- .strip(): Cleans up the text by removing any leading and trailing whitespace.

In [None]:
openai.api_key=""

In [2]:
# Summarizing text function with OpenAI
def summarize_text(financial_text):
    response = openai.Completion.create(
        engine="text-davinci-003",
        prompt=f"Summarize the following text from a 10-Q financial report {financial_text}",
        max_tokens=150,
        temperature=0.5,
    )
    summary = response.choices[0].text.strip()
    return summary

In [None]:
# Using the function
summary=summarize_text(sibling_text)
print("Summary:", summary)