In [18]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup
from urllib.parse import urljoin, urlparse
import time

# Set up Selenium
options = Options()
#options.add_argument("--headless")
options.add_argument("--disable-gpu")
options.add_argument("--window-size=1920,1080")

driver = webdriver.Chrome(options=options)

# Base config
base_domain = "https://visa.vfsglobal.com"
start_url = "https://visa.vfsglobal.com/one-pager/ireland/mongolia/english/index.html#visa-types"

visited = set()
to_visit = [start_url]
page_texts = []

def is_valid_vfs_url(url):
    return url.startswith(base_domain)

def clean_url(href, current_url):
    if not href:
        return None
    if href.startswith("javascript:") or href.startswith("#"):
        return None
    return urljoin(current_url, href)

while to_visit:
    url = to_visit.pop()
    if url in visited:
        continue

    visited.add(url)
    print(f"Visiting: {url}")

    try:
        driver.get(url)
        time.sleep(4)  # let JS render

        soup = BeautifulSoup(driver.page_source, "html.parser")
        body_text = soup.body.get_text(separator="\n", strip=True)
        page_texts.append(f"=== {url} ===\n{body_text}\n")

        # Discover new links
        for link in soup.find_all("a", href=True):
            next_url = clean_url(link['href'], url)
            if next_url and is_valid_vfs_url(next_url) and next_url not in visited:
                to_visit.append(next_url)

    except Exception as e:
        print(f"Failed to visit {url}: {e}")

driver.quit()

# Write to plain text
with open("vfs_recursive_text_dump.txt", "w", encoding="utf-8") as f:
    for page in page_texts:
        f.write(page + "\n\n")

print(f"\n✅ Done. Scraped {len(page_texts)} pages.")


Visiting: https://visa.vfsglobal.com/one-pager/ireland/mongolia/english/index.html#visa-types
Visiting: https://visa.vfsglobal.com/one-pager/ireland/mongolia/english/pdf/ireland-checklist-for-mongolia.pdf
Visiting: https://visa.vfsglobal.com/one-pager/ireland/mongolia/english/index.html#Join-Family-Non-Irish
Visiting: https://visa.vfsglobal.com/one-pager/ireland/mongolia/english/index.html#join-family-EU-Directive
Visiting: https://visa.vfsglobal.com/one-pager/ireland/mongolia/english/index.html#join-family-irish
Visiting: https://visa.vfsglobal.com/one-pager/ireland/mongolia/english/index.html#volunteer
Visiting: https://visa.vfsglobal.com/one-pager/ireland/mongolia/english/index.html#researcher
Visiting: https://visa.vfsglobal.com/one-pager/ireland/mongolia/english/index.html#employment
Visiting: https://visa.vfsglobal.com/one-pager/ireland/mongolia/english/index.html#study-long-stay
Visiting: https://visa.vfsglobal.com/one-pager/ireland/mongolia/english/index.html#study-short-stay
V

In [1]:
!cat vfs_recursive_text_dump.txt

=== https://visa.vfsglobal.com/one-pager/ireland/mongolia/english/index.html#visa-types ===
Apply for a VISA to Ireland
In Mongolia
Apply for a VISA to Ireland
In Mongolia
Visa Information
Visa Types
Select the visa type that is right for you to see important information on visa fees, documents required, forms, photo specifications and processing times.
Please select visa type
Visa Types
Short Stay Visa
Tourism
Business
Visit Family/Friend
Conference/Event
Performance
Interview
Study
Long Stay Visa
Study
Employment
Researcher
Volunteer
Join Family (Irish)
Join Family (EU Directive 2004/38)
Join Family (Non-Irish. EEA National)
Visa Types
Short Stay Visa
Tourism
Business
Visit Family/Friend
Conference/Event
Performance
Interview
Study
Long Stay Visa
Study
Employment
Researcher
Volunteer
Join Family (Irish)
Join Family (EU Directive       2004/38)
Join Family (Non-Irish. EEA National)
Short Stay Visa
Tourism
Business
Visit Family/Friend
Conference/Event
Performance
Interview
Study
Long S