In [32]:
import requests
import bs4

# URL to fetch
url = "https://www.alphavantage.co/documentation/"

# Send a GET request to the URL
response = requests.get(url)

def process_function(function: bs4.element.Tag) -> dict[str, str]:
    """
    Given an <h4> tag (function), extract:
    - function_name
    - description
    - api_params (arguments)
    - python_code_example
    """
    result = {
        "function_name": "",
        "description": "",
        "api_params": "",
        "python_code_example": "",
    }

    # Extract the function name
    function_name = function.get_text(strip=True)
    result["function_name"] = function_name

    # We'll iterate through siblings to find description, parameters, and code
    siblings = list(function.next_siblings)

    # Helper variables
    description_parts = []
    api_params_parts = []
    python_code = ""
    
    # States to control where we are in the HTML
    in_description = True  # Initially, we are reading description until we find "API Parameters"
    in_api_params = False

    for sib in siblings:
        if isinstance(sib, bs4.NavigableString):
            # often just whitespace or newline
            continue
        
        if isinstance(sib, bs4.element.Tag):
            # Check for headings that mark transitions
            if sib.name == "h6":
                heading_text = sib.get_text(strip=True)
                if "API Parameters" in heading_text:
                    # Found start of API parameters
                    in_description = False
                    in_api_params = True
                    continue
                elif "Examples" in heading_text:
                    # Found examples section; stop API params parsing
                    in_api_params = False
                    # We don't necessarily stop reading because we might still find the python code below
                    continue

            # If we're in the description phase, accumulate <p> text until API params start
            if in_description:
                if sib.name == "p" or sib.name == "br":
                    desc_text = sib.get_text(strip=True)
                    if desc_text:
                        description_parts.append(desc_text)
            
            # If we're in the API params phase, accumulate <p> elements until another section appears
            if in_api_params:
                if sib.name == "p":
                    param_text = sib.get_text(" ", strip=True)
                    if param_text:
                        api_params_parts.append(param_text)

            # Regardless of where we are, look for python code block
            # The python code block is inside <div class="python-code">
            if sib.name == "div" and "python-code" in sib.get("class", []):
                code_block = sib.find("code", class_="python")
                if code_block:
                    python_code = code_block.get_text()
    
    # Join the accumulated parts
    description_str = "\n".join(description_parts)
    api_params_str = "\n".join(api_params_parts)

    # Assign to result
    result["description"] = description_str
    result["api_params"] = api_params_str
    result["python_code_example"] = python_code

    return result


def process_section(section: bs4.element.Tag) -> None:
    section_soup = bs4.BeautifulSoup(str(section), "html.parser")
    section_title_tag = section_soup.find("h2")
    section_title = section_title_tag.get_text(strip=True) if section_title_tag else "Untitled Section"
    
    print(f"Section Title: {section_title}")

    functions = section_soup.find_all("h4")
    for i, function_tag in enumerate(functions):
        extracted = process_function(function_tag)
        print(f"Function {i+1}: {extracted['function_name']}")
        print("Description:")
        print(extracted["description"])
        print("\nAPI Parameters:")
        print(extracted["api_params"])
        print("\nPython Code Example:")
        print(extracted["python_code_example"])
        print("\n" + "-"*80 + "\n")
        break


# Check if the request was successful
if response.status_code == 200:
    # Parse the main HTML content
    soup = bs4.BeautifulSoup(response.text, "html.parser")
    
    # Find the <article> tag with the specific class and role
    article = soup.find("article", class_="main-content", role="main")
    
    if article:
        # Find all <section> tags within this <article>
        sections = article.find_all("section")
        
        # Process each section as its own HTML
        for i, section in enumerate(sections, 1):
            process_section(section)
            break
    else:
        print("No <article> with class 'main-content' and role 'main' found.")
else:
    print(f"Failed to retrieve the page. Status code: {response.status_code}")

Section Title: Time Series Stock Data APIs
Function 1: TIME_SERIES_INTRADAYTrending
Description:
This API returns current and 20+ years of historical intraday OHLCV time series of the equity specified, coveringpre-market and post-market hourswhere applicable (e.g., 4:00am to 8:00pm Eastern Time for the US market). You can query both raw (as-traded) and split/dividend-adjusted intraday data from this endpoint. The OHLCV data is sometimes called "candles" in finance literature.

API Parameters:
❚ Required: function
The time series of your choice. In this case, function=TIME_SERIES_INTRADAY
❚ Required: symbol
The name of the equity of your choice. For example: symbol=IBM
❚ Required: interval
Time interval between two consecutive data points in the time series. The following values are supported: 1min , 5min , 15min , 30min , 60min
❚ Optional: adjusted
By default, adjusted=true and the output time series is adjusted by historical split and dividend events. Set adjusted=false to query raw (

In [1]:
import requests
import bs4

url = "https://www.alphavantage.co/documentation/"

response = requests.get(url)
assert response.status_code == 200

soup = bs4.BeautifulSoup(response.text, "html.parser")

In [21]:
def process_section(section: bs4.element.Tag) -> None:
    section_title = section.find("h2").text
    print("Section Title:", section_title)

    curr_name = None
    curr = []
    for c in section.contents[:7]:
        print(c)

sections = soup.find_all("section")
for i, section in enumerate(sections, 1):
    process_section(section)
    break

Section Title: Time Series Stock Data APIs


<h2 id="time-series-data">Time Series Stock Data APIs</h2>


<p>This suite of APIs provide global equity data in 4 different temporal resolutions: (1) daily, (2) weekly, (3) monthly, and (4) intraday, with 20+ years of historical depth. A lightweight ticker quote endpoint and several utility functions such as ticker search and market open/closure status are also included for your convenience.</p>


<br/>


