In [None]:
import requests
from bs4 import BeautifulSoup
import csv
from urllib.parse import urljoin

BASE_URL = "https://www-archive.mozilla.org"
PAGE_URL_TEMPLATE = "https://www-archive.mozilla.org/quality/browser/front-end/testcases/"

def scrape_test_page(page_url, req_counter, test_counter):

    response = requests.get(page_url)
    soup = BeautifulSoup(response.content, "html.parser")

    requirements = []
    tests = []
    mappings = []

    def extract_section(soup, section_name):
        h2 = soup.find("h2", string=lambda text: text and section_name.lower() in text.lower())
        content_text = ""
        
        if h2:
            next_elem = h2.find_next_sibling()
            parts = []
            while next_elem and not (next_elem.name == 'h2'):
                parts.append(next_elem.get_text(separator=' ', strip=True))
                next_elem = next_elem.find_next_sibling()

            content_text = "\n".join(parts).strip()
                    
        return content_text


    for row in soup.find_all("tr"):
        tds = row.find_all("td")

        if len(tds) == 3: 
            feature = tds[0].get_text(strip=True)
            description = tds[1].get_text(strip=True)
            link_tag = tds[2].find("a")

        relative_link = link_tag['href'] if link_tag else None
        full_link = urljoin(BASE_URL, relative_link) if relative_link else ""
        # Save Requirement
        req_id = f"{req_counter}"
        requirements.append({
            "ID": req_id,
            "Feature": feature,
            "Description": description
        })

      
        
       
        test_response = requests.get(full_link)
        test_soup = BeautifulSoup(test_response.content, "html.parser")

        # OPTION 1
        # Find <h2>Purpose</h2> and next <p> if the section is structured as a table
        purpose_h2 = test_soup.find("h2", string=lambda text: text and "Purpose" in text)
        
        purpose_text = ""
        steps_text = "" 
        test_steps_combined= ""
        purpose_text = extract_section(test_soup, "Purpose")
        initial_conditions = extract_section(test_soup, "Initial Conditions")
        steps_text = extract_section(test_soup, "Steps/Description")
        expected_results = extract_section(test_soup, "Expected Results")


        test_steps_combined = "\n\n".join(filter(None, [
            initial_conditions,
            steps_text,
            expected_results
        ]))

        # Save Test
        test_id = f"{req_counter}"
        tests.append({
            "ID": test_id,
            "Purpose": purpose_text,
            "TestSteps": test_steps_combined
        })

        # Save Mapping
        mappings.append({
            "ReqID": req_id,
            "TestID": test_id
        })

        req_counter += 1
        test_counter += 1

    return requirements, tests, mappings, req_counter, test_counter

In [None]:

all_requirements = []
all_tests = []
all_mappings = []

TARGET_PATHS = [
    "bookmarks",
]

req_counter = 1
test_counter = 1

for path in TARGET_PATHS:
    page_url = urljoin(PAGE_URL_TEMPLATE, path)
    reqs, tests, maps, req_counter, test_counter = scrape_test_page(page_url, req_counter, test_counter)
    
    all_requirements.extend(reqs)
    all_tests.extend(tests)
    all_mappings.extend(maps)

In [18]:

# Write to CSV files
with open('data/Mozilla2/requirements.csv', 'w', newline='', encoding='utf-8') as f:
    writer = csv.DictWriter(f, fieldnames=['ID', 'Feature', 'Description'])
    writer.writeheader()
    writer.writerows(all_requirements)

with open('data/Mozilla2/tests.csv', 'w', newline='', encoding='utf-8') as f:
    writer = csv.DictWriter(f, fieldnames=['ID', 'Purpose', 'TestSteps'])
    writer.writeheader()
    writer.writerows(all_tests)

with open('data/Mozilla2/mapping.csv', 'w', newline='', encoding='utf-8') as f:
    writer = csv.DictWriter(f, fieldnames=['ReqID', 'TestID'])
    writer.writeheader()
    writer.writerows(all_mappings)

print("Files saved: requirements.csv, tests.csv, mapping.csv")

Files saved: requirements.csv, tests.csv, mapping.csv
