In [1]:
import requests
from bs4 import BeautifulSoup
from typing import Dict, Any, List
import time

def extract_all_article_links(url: str) -> Dict[str, Any]:
    """
    Extracts all article links from a given RFA (Radio Free Asia) webpage.

    Args:
    url (str): The URL of the RFA webpage containing article links.

    Returns:
    Dict[str, Any]: A dictionary containing article links and status details.
    """
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
    }
    final_response = {
        "Links": [],
        "Message": "Success",
        "Response": 200
    }
    
    try:
        start_time = time.time()
        response = requests.get(url, headers=headers, timeout=(5, 60-5))
        response.raise_for_status()
        end_time = time.time()
        if end_time - start_time > 50:
            print(f"This URL took more than 50s: {url}")

        soup = BeautifulSoup(response.content, 'html.parser')
        all_articles = soup.find_all("div", class_="teaserimg")
        if not all_articles:
            raise ValueError("Could not find the main article container on the page.")
        
        article_links = []
        for article in all_articles:
            links = article.find_all("a", class_="teaserimg")
            article_links.extend([link.get("href") for link in links if link.get("href")])
        
        final_response["Links"] = article_links
        return final_response
    
    except requests.Timeout:
        final_response["Message"] = "Request timed out"
        final_response["Response"] = 408
        return final_response
    except requests.RequestException as e:
        final_response["Message"] = f"An error occurred while fetching the webpage: {e}"
        final_response["Response"] = getattr(e.response, 'status_code', 500)
        return final_response
    except ValueError as e:
        final_response["Message"] = f"An error occurred while parsing the webpage: {e}"
        final_response["Response"] = getattr(e.response, 'status_code', 500)
        return final_response
    except Exception as e:
        final_response["Message"] = f"An unexpected error occurred: {e}"
        final_response["Response"] = 500
        return final_response


def scrape_rfa_article(url: str) -> Dict[str, Any]:
    """
    Scrapes an article from the RFA (Radio Free Asia) website.

    Args:
    url (str): The URL of the RFA article to scrape.

    Returns:
    Dict[str, Any]: A dictionary containing the scraped information and status details.
    """
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
    }
    
    final_response = {
        "data": {
            'title': "",
            'body': {"Audio": "", "Text": []},
            'meta_data': {'URL': url, 'Author': "", 'Date': "", 'Tags': []}
        },
        "Message": "Success",
        "Response": 200
    }
    
    try:
        response = requests.get(url, headers=headers, timeout=120)
        response.raise_for_status()
        soup = BeautifulSoup(response.content, 'html.parser')
        
        # Extract title
        title = soup.find('h2', class_='no_media')
        final_response['data']['title'] = title.text.strip() if title else "Title not found"

        # Extracting Meta Data
        meta_data_body = soup.find('div', class_='sectionteaser archive')
        if meta_data_body:
            author_name = meta_data_body.find('a', class_='author')
            final_response['data']['meta_data']["Author"] = author_name.get_text() if author_name else "Author not found"
            
            date_time = meta_data_body.find('span', class_='story_date')
            final_response['data']['meta_data']["Date"] = date_time.get_text() if date_time else "Date not found"
        
        # Getting tag meta data 
        tag_meta = soup.find('ul', class_='tags')
        if tag_meta:
            tag_meta = tag_meta.select('li a')
            final_response['data']['meta_data']["Tags"] = [tag.text for tag in tag_meta]
        
        # Extract body content
        body = soup.find('div', class_='storytext')
        if body:
            paragraphs = body.find_all('p')
            final_response['data']['body']["Text"] = [para.get_text(strip=True) for para in paragraphs]

            # Find the audio tag and get its src attribute
            audio = body.find('audio')
            if audio:
                final_response['data']['body']["Audio"] = audio.get('src', "No audio source found")
        
        return final_response
    
    except requests.Timeout:
        final_response["Message"] = "Request timed out"
        final_response["Response"] = 408
        return final_response
        
    except requests.RequestException as e:
        final_response["Message"] = f"An error occurred while fetching the article: {e}"
        final_response["Response"] = getattr(e.response, 'status_code', 500)
        return final_response


def fetch_all_articles(base_url: str, total_pages: int) -> List[str]:
    """
    Fetches article links from all pages of the RFA website.

    Args:
    base_url (str): The base URL of the RFA archive.
    total_pages (int): The total number of pages to scrape.

    Returns:
    List[str]: A list of article URLs.
    """
    all_article_links = []
    for page in range(total_pages):
        url = f"{base_url}&start={page * 13}"
        response = extract_all_article_links(url)
        if response["Response"] == 200:
            all_article_links.extend(response["Links"])
        else:
            print(f"Failed to fetch links from page {page + 1}: {response['Message']}")
    return all_article_links



In [None]:

# Usage
base_archive_url = "https://www.rfa.org/tibetan/story_archive?uids=e2b0f97935844f15a765322387ba2381%40c80eb7880fab441bb93e888d5fb0b8d7%40a34e3f118cfb47c89efc8d10ca29beb4%40ed8b21d453ee4bdbb56632bdf00ba533%404f49bd08f0c44e9384542e0d5f9c37f8%402f794fee5a644fdb9964a3accdff9162%403defd73dd1994e0db35cb48f998d6958%401b63853142714c3489c3e1c3fa848623%406153403d00e24f4a8b178048cc9a1820%4069e4cbeeacf1491c95f326788021710e%401fe2a601fca941b7a3f781e0cd25954b%40bc0393320a3445688ec79b1187a883bf%4042fa45d86fca4d11befaecf0a942246d%406d988d19310843b18445bfb0f461ba8f%404a2437b9cd884233ad46554a0303ef89%407a0ed25eec54437f976e8f99fed6ee84%4009849b68da104e188c5ae11d7112827b"
total_pages = 3888

# Fetch all article links
all_article_links = fetch_all_articles(base_archive_url, total_pages)

# Fetch all article links
all_article_links = fetch_all_articles(base_archive_url, start_index, num_pages)

# Scrape each article
for article_url in all_article_links:
    article_data = scrape_rfa_article(article_url)
    print(article_data)


In [2]:
scrape_rfa_article("https://www.rfa.org/tibetan/sargyur/richard-r-verma-will-lead-u-s-delegations-to-nepal-08192024143818.html")

{'data': {'title': '',
  'body': {'Audio': '', 'Text': []},
  'meta_data': {'URL': 'https://www.rfa.org/tibetan/sargyur/richard-r-verma-will-lead-u-s-delegations-to-nepal-08192024143818.html',
   'Author': '',
   'Date': '',
   'Tags': []}},
 'Message': 'An error occurred while fetching the article: 403 Client Error: Forbidden for url: https://www.rfa.org/tibetan/sargyur/richard-r-verma-will-lead-u-s-delegations-to-nepal-08192024143818.html',
 'Response': 403}

In [9]:
import requests
from bs4 import BeautifulSoup
from typing import Dict, Any
import time
from fake_useragent import UserAgent

def scrape_rfa_article(url: str) -> Dict[str, Any]:
    """
    Scrapes an article from the RFA (Radio Free Asia) website with multiple attempts to bypass 403 errors.

    Args:
    url (str): The URL of the RFA article to scrape.

    Returns:
    Dict[str, Any]: A dictionary containing the scraped information and status details.
    """
    final_response = {
        "data": {
            'title': "",
            'body': {"Audio": "", "Text": []},
            'meta_data': {'URL': url, 'Author': "", 'Date': "", 'Tags': []}
        },
        "Message": "Success",
        "Response": 200
    }

    def parse_content(content):
        soup = BeautifulSoup(content, 'html.parser')
        
        # Extract title
        title = soup.find('h2', class_='no_media')
        final_response['data']['title'] = title.text.strip() if title else "Title not found"

        # Extracting Meta Data
        meta_data_body = soup.find('div', class_='sectionteaser archive')
        if meta_data_body:
            author_name = meta_data_body.find('a', class_='author')
            final_response['data']['meta_data']["Author"] = author_name.get_text() if author_name else "Author not found"
            
            date_time = meta_data_body.find('span', class_='story_date')
            final_response['data']['meta_data']["Date"] = date_time.get_text() if date_time else "Date not found"
        
        # Getting tag meta data 
        tag_meta = soup.find('ul', class_='tags')
        if tag_meta:
            tag_meta = tag_meta.select('li a')
            final_response['data']['meta_data']["Tags"] = [tag.text for tag in tag_meta]
        
        # Extract body content
        body = soup.find('div', class_='storytext')
        if body:
            paragraphs = body.find_all('p')
            final_response['data']['body']["Text"] = [para.get_text(strip=True) for para in paragraphs]

            # Find the audio tag and get its src attribute
            audio = body.find('audio')
            if audio:
                final_response['data']['body']["Audio"] = audio.get('src', "No audio source found")

    def attempt_request(headers, delay=0):
        try:
            if delay > 0:
                time.sleep(delay)
            response = requests.get(url, headers=headers, timeout=120)
            response.raise_for_status()
            parse_content(response.content)
            return True
        except requests.RequestException as e:
            final_response["Message"] = f"An error occurred while fetching the article: {e}"
            final_response["Response"] = getattr(e.response, 'status_code', 500)
            print(f"failed in attempt_request: {e}")
            return False

    # Attempt 1: Original headers
    original_headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
    }
    print("testing Attempt 1: Original headers")
    if attempt_request(original_headers):
        return final_response

    # Attempt 2: Random User-Agent
    print("testing Attempt 2: Random User-Agent")
    ua = UserAgent()
    random_headers = {'User-Agent': ua.random}
    if attempt_request(random_headers):
        return final_response

    # Attempt 3: Full headers set
    print("testing Attempt 3: Full headers set")
    full_headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
        'Accept-Language': 'en-US,en;q=0.5',
        'Referer': 'https://www.google.com/',
        'DNT': '1',
        'Connection': 'keep-alive',
        'Upgrade-Insecure-Requests': '1',
    }
    if attempt_request(full_headers):
        return final_response

    # Attempt 4: With delay
    print("testing Attempt 4:  With delay")
    if attempt_request(full_headers, delay=2):
        return final_response

    return final_response

In [10]:
scrape_rfa_article("https://www.rfa.org/tibetan/sargyur/richard-r-verma-will-lead-u-s-delegations-to-nepal-08192024143818.html")

testing Attempt 1: Original headers
failed in attempt_request: 403 Client Error: Forbidden for url: https://www.rfa.org/tibetan/sargyur/richard-r-verma-will-lead-u-s-delegations-to-nepal-08192024143818.html
testing Attempt 2: Random User-Agent
failed in attempt_request: 403 Client Error: Forbidden for url: https://www.rfa.org/tibetan/sargyur/richard-r-verma-will-lead-u-s-delegations-to-nepal-08192024143818.html
testing Attempt 3: Full headers set
failed in attempt_request: 403 Client Error: Forbidden for url: https://www.rfa.org/tibetan/sargyur/richard-r-verma-will-lead-u-s-delegations-to-nepal-08192024143818.html
testing Attempt 4:  With delay
failed in attempt_request: 403 Client Error: Forbidden for url: https://www.rfa.org/tibetan/sargyur/richard-r-verma-will-lead-u-s-delegations-to-nepal-08192024143818.html


{'data': {'title': '',
  'body': {'Audio': '', 'Text': []},
  'meta_data': {'URL': 'https://www.rfa.org/tibetan/sargyur/richard-r-verma-will-lead-u-s-delegations-to-nepal-08192024143818.html',
   'Author': '',
   'Date': '',
   'Tags': []}},
 'Message': 'An error occurred while fetching the article: 403 Client Error: Forbidden for url: https://www.rfa.org/tibetan/sargyur/richard-r-verma-will-lead-u-s-delegations-to-nepal-08192024143818.html',
 'Response': 403}

In [11]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
from typing import Dict, Any
import time

def scrape_rfa_article_selenium(url: str) -> Dict[str, Any]:
    """
    Scrapes an article from the RFA (Radio Free Asia) website using Selenium.

    Args:
    url (str): The URL of the RFA article to scrape.

    Returns:
    Dict[str, Any]: A dictionary containing the scraped information and status details.
    """
    final_response = {
        "data": {
            'title': "",
            'body': {"Audio": "", "Text": []},
            'meta_data': {'URL': url, 'Author': "", 'Date': "", 'Tags': []}
        },
        "Message": "Success",
        "Response": 200
    }

    options = Options()
    options.add_argument('--headless')
    options.add_argument('--no-sandbox')
    options.add_argument('--disable-dev-shm-usage')

    try:
        driver = webdriver.Chrome(options=options)
        driver.get(url)

        # Wait for the content to load
        WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.CLASS_NAME, "storytext"))
        )

        # Parse the page source with BeautifulSoup
        soup = BeautifulSoup(driver.page_source, 'html.parser')

        # Extract title
        title = soup.find('h2', class_='no_media')
        final_response['data']['title'] = title.text.strip() if title else "Title not found"

        # Extracting Meta Data
        meta_data_body = soup.find('div', class_='sectionteaser archive')
        if meta_data_body:
            author_name = meta_data_body.find('a', class_='author')
            final_response['data']['meta_data']["Author"] = author_name.get_text() if author_name else "Author not found"
            
            date_time = meta_data_body.find('span', class_='story_date')
            final_response['data']['meta_data']["Date"] = date_time.get_text() if date_time else "Date not found"
        
        # Getting tag meta data 
        tag_meta = soup.find('ul', class_='tags')
        if tag_meta:
            tag_meta = tag_meta.select('li a')
            final_response['data']['meta_data']["Tags"] = [tag.text for tag in tag_meta]
        
        # Extract body content
        body = soup.find('div', class_='storytext')
        if body:
            paragraphs = body.find_all('p')
            final_response['data']['body']["Text"] = [para.get_text(strip=True) for para in paragraphs]

            # Find the audio tag and get its src attribute
            audio = body.find('audio')
            if audio:
                final_response['data']['body']["Audio"] = audio.get('src', "No audio source found")

    except Exception as e:
        final_response["Message"] = f"An error occurred while fetching the article: {str(e)}"
        final_response["Response"] = 500
    finally:
        driver.quit()

    return final_response

# Usage
url = "https://www.rfa.org/tibetan/sargyur/richard-r-verma-will-lead-u-s-delegations-to-nepal-08192024143818.html"
result = scrape_rfa_article_selenium(url)
print(result)

{'data': {'title': '', 'body': {'Audio': '', 'Text': []}, 'meta_data': {'URL': 'https://www.rfa.org/tibetan/sargyur/richard-r-verma-will-lead-u-s-delegations-to-nepal-08192024143818.html', 'Author': '', 'Date': '', 'Tags': []}}, 'Message': 'An error occurred while fetching the article: Message: \nStacktrace:\n\tGetHandleVerifier [0x00007FF65C2A9642+30946]\n\t(No symbol) [0x00007FF65C25E3D9]\n\t(No symbol) [0x00007FF65C156FDA]\n\t(No symbol) [0x00007FF65C1A822C]\n\t(No symbol) [0x00007FF65C1A850C]\n\t(No symbol) [0x00007FF65C1EDCB7]\n\t(No symbol) [0x00007FF65C1CCAAF]\n\t(No symbol) [0x00007FF65C1EB041]\n\t(No symbol) [0x00007FF65C1CC813]\n\t(No symbol) [0x00007FF65C19A6E5]\n\t(No symbol) [0x00007FF65C19B021]\n\tGetHandleVerifier [0x00007FF65C3DF84D+1301229]\n\tGetHandleVerifier [0x00007FF65C3EBDC7+1351783]\n\tGetHandleVerifier [0x00007FF65C3E2A13+1313971]\n\tGetHandleVerifier [0x00007FF65C2DDD16+245686]\n\t(No symbol) [0x00007FF65C26759F]\n\t(No symbol) [0x00007FF65C263814]\n\t(No symb

In [14]:
import undetected_chromedriver as uc
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
from typing import Dict, Any
import time

def scrape_rfa_article_undetected(url: str) -> Dict[str, Any]:
    """
    Scrapes an article from the RFA (Radio Free Asia) website using undetected-chromedriver.

    Args:
    url (str): The URL of the RFA article to scrape.

    Returns:
    Dict[str, Any]: A dictionary containing the scraped information and status details.
    """
    final_response = {
        "data": {
            'title': "",
            'body': {"Audio": "", "Text": []},
            'meta_data': {'URL': url, 'Author': "", 'Date': "", 'Tags': []}
        },
        "Message": "Success",
        "Response": 200
    }

    options = uc.ChromeOptions()
    options.add_argument('--headless')
    options.add_argument('--no-sandbox')
    options.add_argument('--disable-dev-shm-usage')

    try:
        driver = uc.Chrome(options=options)
        driver.get(url)
        print("done Chrome driver")

        # Wait for the content to load
        WebDriverWait(driver, 20).until(
            EC.presence_of_element_located((By.CLASS_NAME, "storytext"))
        )
        print("done loading driver")

        # Parse the page source with BeautifulSoup
        soup = BeautifulSoup(driver.page_source, 'html.parser')
        print(soup)

        # Extract title
        title = soup.find('h2', class_='no_media')
        final_response['data']['title'] = title.text.strip() if title else "Title not found"

        # Extracting Meta Data
        meta_data_body = soup.find('div', class_='sectionteaser archive')
        if meta_data_body:
            author_name = meta_data_body.find('a', class_='author')
            final_response['data']['meta_data']["Author"] = author_name.get_text() if author_name else "Author not found"
            
            date_time = meta_data_body.find('span', class_='story_date')
            final_response['data']['meta_data']["Date"] = date_time.get_text() if date_time else "Date not found"
        
        # Getting tag meta data 
        tag_meta = soup.find('ul', class_='tags')
        if tag_meta:
            tag_meta = tag_meta.select('li a')
            final_response['data']['meta_data']["Tags"] = [tag.text for tag in tag_meta]
        
        # Extract body content
        body = soup.find('div', class_='storytext')
        if body:
            paragraphs = body.find_all('p')
            final_response['data']['body']["Text"] = [para.get_text(strip=True) for para in paragraphs]

            # Find the audio tag and get its src attribute
            audio = body.find('audio')
            if audio:
                final_response['data']['body']["Audio"] = audio.get('src', "No audio source found")

    except Exception as e:
        final_response["Message"] = f"An error occurred while fetching the article: {str(e)}"
        final_response["Response"] = 500
    finally:
        driver.quit()

    return final_response

# Usage
url = "https://www.rfa.org/tibetan/sargyur/richard-r-verma-will-lead-u-s-delegations-to-nepal-08192024143818.html"
result = scrape_rfa_article_undetected(url)
print(result)

done Chrome driver
{'data': {'title': '', 'body': {'Audio': '', 'Text': []}, 'meta_data': {'URL': 'https://www.rfa.org/tibetan/sargyur/richard-r-verma-will-lead-u-s-delegations-to-nepal-08192024143818.html', 'Author': '', 'Date': '', 'Tags': []}}, 'Message': 'An error occurred while fetching the article: Message: \nStacktrace:\n\tGetHandleVerifier [0x004F8923+23283]\n\t(No symbol) [0x004BE934]\n\t(No symbol) [0x003F0733]\n\t(No symbol) [0x0043326F]\n\t(No symbol) [0x004334AB]\n\t(No symbol) [0x0046EE42]\n\t(No symbol) [0x00454464]\n\t(No symbol) [0x0046CB8D]\n\t(No symbol) [0x004541B6]\n\t(No symbol) [0x00428017]\n\t(No symbol) [0x0042890D]\n\tGetHandleVerifier [0x005EA5F3+1013699]\n\tGetHandleVerifier [0x005F3E4C+1052700]\n\tGetHandleVerifier [0x005ED4B4+1025668]\n\tGetHandleVerifier [0x0051EA2B+179195]\n\t(No symbol) [0x004C6833]\n\t(No symbol) [0x004C3198]\n\t(No symbol) [0x004C3337]\n\t(No symbol) [0x004BB4BE]\n\tBaseThreadInitThunk [0x76197BA9+25]\n\tRtlInitializeExceptionChain [0

In [16]:
import undetected_chromedriver as uc
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from bs4 import BeautifulSoup
from typing import Dict, Any
import time

def scrape_rfa_article_undetected(url: str) -> Dict[str, Any]:
    """
    Scrapes an article from the RFA (Radio Free Asia) website using undetected-chromedriver.

    Args:
    url (str): The URL of the RFA article to scrape.

    Returns:
    Dict[str, Any]: A dictionary containing the scraped information and status details.
    """
    final_response = {
        "data": {
            'title': "",
            'body': {"Audio": "", "Text": []},
            'meta_data': {'URL': url, 'Author': "", 'Date': "", 'Tags': []}
        },
        "Message": "Success",
        "Response": 200
    }

    options = uc.ChromeOptions()
    options.add_argument('--headless')
    options.add_argument('--no-sandbox')
    options.add_argument('--disable-dev-shm-usage')
    options.add_argument('--disable-blink-features=AutomationControlled')
    options.add_argument('--disable-extensions')
    options.add_argument('--disable-gpu')
    options.add_argument('--window-size=1920x1080')

    try:
        driver = uc.Chrome(options=options)
        print("Chrome driver initialized")
        
        driver.get(url)
        print(f"Navigated to URL: {url}")
        
        time.sleep(10)  # Add a static wait to allow for any initial loading
        
        try:
            # Wait for the content to load
            WebDriverWait(driver, 30).until(
                EC.presence_of_element_located((By.CLASS_NAME, "storytext"))
            )
            print("storytext element found")
        except TimeoutException:
            print("Timeout waiting for storytext element. Proceeding with available content.")
        
        # Get the page source even if the specific element wasn't found
        page_source = driver.page_source
        print(f"Page source length: {len(page_source)}")
        
        # Parse the page source with BeautifulSoup
        soup = BeautifulSoup(page_source, 'html.parser')
        print(soup)

        # Extract title
        title = soup.find('h2', class_='no_media')
        final_response['data']['title'] = title.text.strip() if title else "Title not found"
        print(f"Title: {final_response['data']['title']}")

        # Extracting Meta Data
        meta_data_body = soup.find('div', class_='sectionteaser archive')
        if meta_data_body:
            author_name = meta_data_body.find('a', class_='author')
            final_response['data']['meta_data']["Author"] = author_name.get_text() if author_name else "Author not found"
            
            date_time = meta_data_body.find('span', class_='story_date')
            final_response['data']['meta_data']["Date"] = date_time.get_text() if date_time else "Date not found"
        
        # Getting tag meta data 
        tag_meta = soup.find('ul', class_='tags')
        if tag_meta:
            tag_meta = tag_meta.select('li a')
            final_response['data']['meta_data']["Tags"] = [tag.text for tag in tag_meta]
        
        # Extract body content
        body = soup.find('div', class_='storytext')
        if body:
            paragraphs = body.find_all('p')
            final_response['data']['body']["Text"] = [para.get_text(strip=True) for para in paragraphs]
            print(f"Number of paragraphs found: {len(final_response['data']['body']['Text'])}")

            # Find the audio tag and get its src attribute
            audio = body.find('audio')
            if audio:
                final_response['data']['body']["Audio"] = audio.get('src', "No audio source found")

    except Exception as e:
        final_response["Message"] = f"An error occurred while fetching the article: {str(e)}"
        final_response["Response"] = 500
        print(f"Error: {str(e)}")
    finally:
        if 'driver' in locals():
            driver.quit()

    return final_response

# Usage
url = "https://www.rfa.org/tibetan/sargyur/richard-r-verma-will-lead-u-s-delegations-to-nepal-08192024143818.html"
result = scrape_rfa_article_undetected(url)
print(result)

Chrome driver initialized
Navigated to URL: https://www.rfa.org/tibetan/sargyur/richard-r-verma-will-lead-u-s-delegations-to-nepal-08192024143818.html
Timeout waiting for storytext element. Proceeding with available content.
Page source length: 375
<html><head>
<title>Access Denied</title>
</head><body>
<h1>Access Denied</h1>
 
You don't have permission to access "http://www.rfa.org/tibetan/sargyur/richard-r-verma-will-lead-u-s-delegations-to-nepal-08192024143818.html" on this server.<p>
Reference #18.556c3f17.1724129745.32982be8
</p><p>https://errors.edgesuite.net/18.556c3f17.1724129745.32982be8</p>
</body></html>
Title: Title not found
{'data': {'title': 'Title not found', 'body': {'Audio': '', 'Text': []}, 'meta_data': {'URL': 'https://www.rfa.org/tibetan/sargyur/richard-r-verma-will-lead-u-s-delegations-to-nepal-08192024143818.html', 'Author': '', 'Date': '', 'Tags': []}}, 'Message': 'Success', 'Response': 200}
