### Dataset Generation
- Consists of scraper, prompt generator, and table generator. Because of this, development of prompt generator, table generator, and scraper are partially based on the development of dataset generation notebook (and vice versa)
### How To Use
- Assuming you have all dependencies, just run all cells and give the website URL (.org/.edu/.gov)
- I will comment out all unused cells so it is safe to run all
### Dependencies
- Selenium
- Ollama (and Llama 3.1 8b)
- Pandas

#### Version 3: [Use This Until V4FINAL]

##### Functions

In [9]:
from selenium import webdriver

import selenium
from selenium import webdriver
from selenium.webdriver.common.by import By

def setup_driver():
    try :
        options = webdriver.FirefoxOptions()
        options.add_argument("--headless")
        driver = webdriver.Firefox(options=options) # Default with Firefox
    except Exception:
        try : 
            driver = webdriver.Chrome() # Default with Chrome            
            ## driver = webdriver.Chrome(options=options)

            ## Uncomment when setting up for gcolab (and using chrome)
            # from selenium.webdriver import ChromeOptions
            # from selenium.webdriver.chrome.service import Service
            # from webdriver_manager.chrome import ChromeDriverManager
            # from selenium.webdriver.chrome.options import Options
            # import google_colab_selenium as gs
            # options = ChromeOptions()
            # options.add_argument("--headless")
            # driver = gs.Chrome()
        except Exception:
            try : 
                options = webdriver.EdgeOptions()
                options.add_argument("--no-sandbox")
                options.add_argument("--headless")
                driver = webdriver.Edge(options=options) # Default with Edge
            except Exception:
                print("Error. No usable browser found for scraping")
    
    return driver

def get_url():
    return input("Enter Website URL: ")

def data_scrape(url):
    driver = setup_driver()
    data = []
    driver.get(url)

    # BUTTONS
    buttons = driver.find_elements(By.CSS_SELECTOR, "button")
    for button in buttons:
        if button.is_displayed():
            # Button Element Attributes
            button_text = f"Button Element '{button.text}'" if (button.text != None and button.text != "") else "Button Element Without Name (might be an image)"
            button_size = f"With Size Attribute {button.size}" if button.size != None else ""
            button_color = f"With Color Attribute {button.value_of_css_property("color")}" if button.value_of_css_property("color") != None else ""
            button_background_color = f"With Background Color Attribute {button.value_of_css_property("background-color")}" if button.value_of_css_property("background-color") != None else ""
            button_enabled = "Clickable" if button.is_enabled() else "Not Clickable"
            
            # Store Button Element
            store_clickability = f"{button_enabled} {button_text}"
            data.append(store_clickability)
            if (button_color != "" or button_background_color != ""):
                store_color = f"{button_text} {button_color} {button_background_color} "
                data.append(store_color)
            if (button_size != ""):
                store_size = f"{button_text} {button_size}"
                data.append(store_size)

    ## LINKS  
    links = driver.find_elements(By.TAG_NAME,"a")
    for link in links:
        if link.is_displayed():
            # Link Element Attributes
            link_text = f"Link Element '{link.text}'" if (link.text != None and link.text != "") else "Link Element Without Name (might be an image)"
            link_url = f"With URL {link.get_attribute('href')}" if link.get_attribute('href') != None else "Without URL"
            # Additional Attributes
            link_rel = f"With rel attribute {link.get_attribute('rel')}" if link.get_attribute('rel') != None and link.get_attribute('rel') != "" else ""
            link_target = f"With target attribute {link.get_attribute('target')}" if link.get_attribute('target') != None and link.get_attribute('target') != "" else ""
            link_download = f"A download is attached to document {link.get_attribute('download')}" if link.get_attribute('download') != None and link.get_attribute('download') != "" else ""
            
            # Store Link Element
            store_navigation = f"{link_text} {link_url} {link_target} {link_rel}"
            data.append(store_navigation)
            if link_download != "":
                store_download = f"{link_text} {link_download}"
                data.append(store_download)

    # TEXT
    # Heading Elements
    for level in range(1, 7):  # HTML has 6 levels of headings (h1 to h6)
        headings = driver.find_elements(By.TAG_NAME,f"h{level}")
        for heading in headings:
            if heading.is_displayed():
                
                text_color = f"With Color {heading.value_of_css_property("color")}" if heading.value_of_css_property("color") != None else ""
                text_background_color = f"With Background Color {heading.value_of_css_property("background-color")}" if heading.value_of_css_property("background-color") != None else ""
                store = f"Heading Element (h{level}): '{heading.text}'"
                
                # Store
                data.append(store)
                if (text_color != "" or text_background_color != ""):
                    data.append(f"{store} {text_color} {text_background_color}")

    # Paragraph Elements
    paragraphs = driver.find_elements(By.TAG_NAME,"p")
    for paragraph in paragraphs:
        if paragraph.is_displayed():
            
            text_color = f"With Color {paragraph.value_of_css_property("color")}" if paragraph.value_of_css_property("color") != None else ""
            text_background_color = f"With Background Color {paragraph.value_of_css_property("background-color")}" if paragraph.value_of_css_property("background-color") != None else ""
            store = f"Paragraph Element: '{paragraph.text}'"

            # Store
            data.append(store)
            if (text_color != "" or text_background_color != ""):
                    data.append(f"{store} {text_color} {text_background_color}")

    # Input Elements
    input_tags = driver.find_elements(By.TAG_NAME,"input")
    for input_tag in input_tags:
        if input_tag.is_displayed():
            # Basic Attributes
            input_field_name = f"Input Field Element: '{input_tag.get_attribute('name')}'" if input_tag.get_attribute('name') != None else "Without Name"
            input_field_type = f"With Type {input_tag.get_attribute('type')}" if input_tag.get_attribute('name') != None else "Without Type"
            # Additional Attributes
            input_field_value = f"With Value {input_tag.get_attribute('value')}" if input_tag.get_attribute('name') != None else "Without Value"
            input_field_placeholder = f"With Placeholder '{input_tag.get_attribute('placeholder')}'" if input_tag.get_attribute('name') != None else "Without Placeholder"
            input_field_readonly = f"Is Readonly" if input_tag.get_attribute('readonly') != None else "Is Editable"
            input_field_disabled = f"Disabled" if input_tag.get_attribute('disabled') != None else "Enabled"
            input_field_required = f"Is Required" if input_tag.get_attribute('required') != None else "Not Required"
            input_field_autocomplete = f"Is Autocomplete" if input_tag.get_attribute('autocomplete') != None else "Not Autocomplete"

            # Store based on type
            if (input_tag.get_attribute('type') == "submit"):
                store = f"Form Submit Button Element: {input_tag.get_attribute("name")} {input_field_disabled}"
                data.append(store)
            else:
                store = f"{input_field_disabled} {input_field_name} {input_field_type} {input_field_required}"
                data.append(store)
                store_value = f"{input_field_name} {input_field_type} {input_field_value}"
                data.append(store_value)
                store_placeholder = f"{input_field_name} {input_field_type} {input_field_placeholder}"
                data.append(store_placeholder)
                store_readonly = f"{input_field_name} {input_field_type} {input_field_readonly}"
                data.append(store_readonly)
                store_autocomplete = f"{input_field_name} {input_field_type} {input_field_autocomplete}"
                data.append(store_autocomplete)
            
    # Close the browser
    driver.quit()

    return data

## Prompt Generator + LLM
# Langchain and Ollama
import langchain
from langchain_ollama.llms import OllamaLLM
from langchain_core.prompts import ChatPromptTemplate

TEMPLATE_SETTING = 0
DEBUG_SETTING = 1

if (TEMPLATE_SETTING == 0):
    # Template for non fine tuned model
    # CONTEXT V3
    template = """You are an expert in software quality assurance specializing in usability testing.

    Consider the following usability aspects:
    Accessibility (keyboard navigation, screen reader support, etc.)
    Responsiveness (behavior across different screen sizes, etc.)
    Feedback (hover effects, click responses, error messages, etc.)
    Interactivity (expected behavior when clicked, typed into, or focused, etc.)
    User experience (clarity of labels, ease of use, etc.)

    Avoid descriptions like "The user" or "The test participant" and should instead focus on direct, executable actions.

    Given this UI Element: {ui_element}
    From this website: {url}

    Generate a functional usability test case with the following aspects, separated by a `~`:
    - **Objective**: Clearly state the objective of the test.
    - **Preconditions**: List any preconditions that need to be met.
    - **Test Steps**: Provide a step-by-step guide for the test. Each step should be a direct command, not a description.
    - **Expected Output**: Describe the expected output.

    Output format example, start with the objective directly and avoid saying "here is the test case" or the name of the testcase and end the output after writing the expected output:
    Objective: [Describe the objective] 
    ~ Preconditions: [List preconditions, use dashes] 
    ~ Test Steps: [Step-by-step guide] 
    ~ Expected Output: [Describe the expected output, use dashes]"""

    model_str = "llama3.1"
elif (TEMPLATE_SETTING == 1):
    # template and model for fine-tuned version
    template = """Generate a test case for the following UI element: {ui_element} from the website: {url}"""
    model_str = "qallama"

# OPTIONAL FUNCTION COMMON ERROR REMOVAL/ADJUSTMENT
def remove_common_error(output : str, setting : int = 0):
    if (setting == 0 or setting == 2):
        # Errors are strings that commonly appear as errors in the intended output
        errors = ["Objective~Preconditions~Test Steps~Expected Result", 
                "Objective:", "**Objective**:",
                "Preconditions:", "**Preconditions**:",
                "Test Steps:", "**Test Steps**:",
                "Expected Output:", "**Expected Output**:",
                "**** "
                ]
        for error in errors:
            output = output.replace(error, "")
    if (setting == 1 or setting == 2):
        # Adjustments are strings that are not intended to be deleted, but instead adjusted
        adjustments = ["Preconditions~", "Test Steps~", "Expected Result~"]
        for adjustment in adjustments:
            output = output.replace(adjustment, "~")
    return output

def load_model_chain(template : str =  template, model_str : str = model_str, temperature=0):
    prompt = ChatPromptTemplate.from_template(template)
    model = OllamaLLM(model=model_str, temperature=temperature)
    chain = prompt | model
    return chain

def create_test_cases(data, model_str : str = model_str , template : str = template, url : str = "placeholder"):
    
    # Load LLM Chain
    chain = load_model_chain(template, model_str)

    # Return Data
    return_data = []
    
    i = 0
    total = len(data)
    for item in data:
        test_case = chain.invoke({"ui_element": str(item), "url": url})
        test_case = remove_common_error(test_case)
        return_data.append(test_case)
        # LLM Reset To Free Up Context
        chain = load_model_chain(template, model_str)
        if (DEBUG_SETTING == 1):
            print(f"test case {i} out of {total} generated")
        i += 1

    return return_data

# Pandas
import pandas as pd

# Table Dataframe Initialization
def dataframe_init(data):
    df = pd.DataFrame(dict([(key, pd.Series(value)) for key, value in data.items()]))
    return df

# CSV Generator
def csv_from_test_case_batches(filename, data):
    cols = dataframe_init(data)
    cols.to_csv(f"{filename}.csv", sep='\t', encoding='utf-8', index=False, header=True)

def create_fine_tuning_dataset(scraped_data, llm_output, name : str = "fine_tuning_dataset", url : str = ""):
    # Dataset Generator : Resulting Dataset Columns / Parameters
    out_data = llm_output # LLM Output
    in_data = []
    instruction_data = []
    i = 0
    while (i < len(scraped_data)):
        # Instruction / Prompt
        template = f"Generate test case for the following UI element: {scraped_data[i]} from the website: {url}"
        instruction_data.append(template)

        # Constant Context
        input_context = f""
        in_data.append(input_context)
        i+=1

    data = {"output" : out_data, 
            "input" : in_data, 
            "instruction" : instruction_data}
    csv_from_test_case_batches(name, data)
    return dataframe_init(data) 

# System Proper Parameters
def create_table_dataset(llm_output):

    id = []
    objective = []
    precondition = []
    test_steps = []
    expected_result = []
    actual_result = []

    i = 0
    for test_case in llm_output:
        split_test_case = test_case.split('~')
        
        # Validate and skip if split is a failure
        if len(split_test_case) != 4:
            # Test Case ID
            id.append(i+1)
            # Test Case Objective
            objective.append(split_test_case[0])
            # Test Case Precondition
            precondition.append(split_test_case[1])
            # Test Case Steps 
            test_steps.append(split_test_case[2])
            # Test Case Expected Output
            expected_result.append(split_test_case[3])
            # Test Case Actual Result
            actual_result.append("Pass/Fail")

        i+=1

    data = {"Test Case ID" : id,
            "Objective" : objective,
            "Precondition" : precondition,
            "Test Steps" : test_steps,
            "Expected Result" : expected_result,
            "Actual Result" : actual_result}

    return dataframe_init(data)

##### Main Program

In [10]:
import time # for checking runtime
## MAIN PROGRAM ##
print("Welcome to Dataset Generator V3")

## Input URL
url = get_url()
start = time.time()
scraped_data = data_scrape(url)
end = time.time()
print(f"Scraping Finished In : {(end-start) * 10**3}, ms")

## Create Test Cases
print("This will take a while to complete: Please wait by doing other things. You will receive a message once generation is complete.")
start = time.time()
llm_output = create_test_cases(data=scraped_data, url=url)
end = time.time()
print(f"Generation Finished In : {(end-start) * 10**3}, ms")

## Create Fine Tuning Dataset
fine_tuning_dataset = create_fine_tuning_dataset(scraped_data, llm_output)
print("Dataset is Generated as a CSV File. Please rename it before running this notebook again")

Welcome to Dataset Generator V3
Scraping Finished In : 22297.12438583374, ms
This will take a while to complete: Please wait by doing other things. You will receive a message once generation is complete.
test case 0 out of 111 generated
test case 1 out of 111 generated
test case 2 out of 111 generated
test case 3 out of 111 generated
test case 4 out of 111 generated
test case 5 out of 111 generated
test case 6 out of 111 generated
test case 7 out of 111 generated
test case 8 out of 111 generated
test case 9 out of 111 generated
test case 10 out of 111 generated
test case 11 out of 111 generated
test case 12 out of 111 generated
test case 13 out of 111 generated
test case 14 out of 111 generated
test case 15 out of 111 generated
test case 16 out of 111 generated
test case 17 out of 111 generated
test case 18 out of 111 generated
test case 19 out of 111 generated
test case 20 out of 111 generated
test case 21 out of 111 generated
test case 22 out of 111 generated
test case 23 out of 111 

#### Version 2: [Use This Until V3]

#### Functions

In [None]:
# from selenium import webdriver

# import selenium
# from selenium import webdriver
# from selenium.webdriver.common.by import By

# def setup_driver():
#     try :
#         options = webdriver.FirefoxOptions()
#         options.add_argument("--headless")
#         driver = webdriver.Firefox(options=options) # Default with Firefox
#     except Exception:
#         try : 
#             driver = webdriver.Chrome() # Default with Chrome            
#             ## driver = webdriver.Chrome(options=options)

#             ## Uncomment when setting up for gcolab (and using chrome)
#             # from selenium.webdriver import ChromeOptions
#             # from selenium.webdriver.chrome.service import Service
#             # from webdriver_manager.chrome import ChromeDriverManager
#             # from selenium.webdriver.chrome.options import Options
#             # import google_colab_selenium as gs
#             # options = ChromeOptions()
#             # options.add_argument("--headless")
#             # driver = gs.Chrome()
#         except Exception:
#             try : 
#                 options = webdriver.EdgeOptions()
#                 options.add_argument("--no-sandbox")
#                 options.add_argument("--headless")
#                 driver = webdriver.Edge(options=options) # Default with Edge
#             except Exception:
#                 print("Error. No usable browser found for scraping")
    
#     return driver

# def get_url():
#     return input("Enter Website URL: ")

# def data_scrape(url):
#     driver = setup_driver()
#     data = []
#     driver.get(url)

#     # BUTTONS
#     buttons = driver.find_elements(By.CSS_SELECTOR, "button")
#     for button in buttons:
#         if button.is_displayed():
#             # Button Element Attributes
#             button_text = f"Button Element '{button.text}'" if (button.text != None and button.text != "") else "Button Element Without Name (might be an image)"
#             button_size = f"With Size Attribute {button.size}" if button.size != None else ""
#             button_color = f"With Color Attribute {button.value_of_css_property("color")}" if button.value_of_css_property("color") != None else ""
#             button_background_color = f"With Background Color Attribute {button.value_of_css_property("background-color")}" if button.value_of_css_property("background-color") != None else ""
#             button_enabled = "Clickable" if button.is_enabled() else "Not Clickable"
            
#             # Store Button Element
#             store_clickability = f"{button_enabled} {button_text}"
#             data.append(store_clickability)
#             if (button_color != "" or button_background_color != ""):
#                 store_color = f"{button_text} {button_color} {button_background_color} "
#                 data.append(store_color)
#             if (button_size != ""):
#                 store_size = f"{button_text} {button_size}"
#                 data.append(store_size)

#     ## LINKS  
#     links = driver.find_elements(By.TAG_NAME,"a")
#     for link in links:
#         if link.is_displayed():
#             # Link Element Attributes
#             link_text = f"Link Element '{link.text}'" if (link.text != None and link.text != "") else "Link Element Without Name (might be an image)"
#             link_url = f"With URL {link.get_attribute('href')}" if link.get_attribute('href') != None else "Without URL"
#             # Additional Attributes
#             link_rel = f"With rel attribute {link.get_attribute('rel')}" if link.get_attribute('rel') != None and link.get_attribute('rel') != "" else ""
#             link_target = f"With target attribute {link.get_attribute('target')}" if link.get_attribute('target') != None and link.get_attribute('target') != "" else ""
#             link_download = f"A download is attached to document {link.get_attribute('download')}" if link.get_attribute('download') != None and link.get_attribute('download') != "" else ""
            
#             # Store Link Element
#             store_navigation = f"{link_text} {link_url} {link_target}"
#             data.append(store_navigation)
#             if link_rel != "":
#                 store_rel = f"{link_text} {link_rel}"
#                 data.append(store_rel)
#             if link_download != "":
#                 store_download = f"{link_text} {link_download}"
#                 data.append(store_download)

#     # TEXT
#     # Heading Elements
#     for level in range(1, 7):  # HTML has 6 levels of headings (h1 to h6)
#         headings = driver.find_elements(By.TAG_NAME,f"h{level}")
#         for heading in headings:
#             if heading.is_displayed():
                
#                 text_color = f"With Color {heading.value_of_css_property("color")}" if heading.value_of_css_property("color") != None else ""
#                 text_background_color = f"With Background Color {heading.value_of_css_property("background-color")}" if heading.value_of_css_property("background-color") != None else ""
#                 store = f"Heading Element (h{level}): '{heading.text}'"
                
#                 # Store
#                 data.append(store)
#                 if (text_color != "" or text_background_color != ""):
#                     data.append(f"{store} {text_color} {text_background_color}")

#     # Paragraph Elements
#     paragraphs = driver.find_elements(By.TAG_NAME,"p")
#     for paragraph in paragraphs:
#         if paragraph.is_displayed():
            
#             text_color = f"With Color {paragraph.value_of_css_property("color")}" if paragraph.value_of_css_property("color") != None else ""
#             text_background_color = f"With Background Color {paragraph.value_of_css_property("background-color")}" if paragraph.value_of_css_property("background-color") != None else ""
#             store = f"Paragraph Element: '{paragraph.text}'"

#             # Store
#             data.append(store)
#             if (text_color != "" or text_background_color != ""):
#                     data.append(f"{store} {text_color} {text_background_color}")

#     # Input Elements
#     input_tags = driver.find_elements(By.TAG_NAME,"input")
#     for input_tag in input_tags:
#         if input_tag.is_displayed():
#             # Basic Attributes
#             input_field_name = f"Input Field Element: '{input_tag.get_attribute('name')}'" if input_tag.get_attribute('name') != None else "Without Name"
#             input_field_type = f"With Type {input_tag.get_attribute('type')}" if input_tag.get_attribute('name') != None else "Without Type"
#             # Additional Attributes
#             input_field_value = f"With Value {input_tag.get_attribute('value')}" if input_tag.get_attribute('name') != None else "Without Value"
#             input_field_placeholder = f"With Placeholder '{input_tag.get_attribute('placeholder')}'" if input_tag.get_attribute('name') != None else "Without Placeholder"
#             input_field_readonly = f"Is Readonly" if input_tag.get_attribute('readonly') != None else "Is Editable"
#             input_field_disabled = f"Disabled" if input_tag.get_attribute('disabled') != None else "Enabled"
#             input_field_required = f"Is Required" if input_tag.get_attribute('required') != None else "Not Required"
#             input_field_autocomplete = f"Is Autocomplete" if input_tag.get_attribute('autocomplete') != None else "Not Autocomplete"

#             # Store based on type
#             if (input_tag.get_attribute('type') == "submit"):
#                 store = f"Form Submit Button Element: {input_tag.get_attribute("name")} {input_field_disabled}"
#                 data.append(store)
#             else:
#                 store = f"{input_field_disabled} {input_field_name} {input_field_type} {input_field_required}"
#                 data.append(store)
#                 store_value = f"{input_field_name} {input_field_type} {input_field_value}"
#                 data.append(store_value)
#                 store_placeholder = f"{input_field_name} {input_field_type} {input_field_placeholder}"
#                 data.append(store_placeholder)
#                 store_readonly = f"{input_field_name} {input_field_type} {input_field_readonly}"
#                 data.append(store_readonly)
#                 store_autocomplete = f"{input_field_name} {input_field_type} {input_field_autocomplete}"
#                 data.append(store_autocomplete)
            
#     # Close the browser
#     driver.quit()

#     return data

# # Langchain and Ollama
# import langchain
# from langchain_ollama.llms import OllamaLLM
# from langchain_core.prompts import ChatPromptTemplate

# # Dataset Generator Version TESTING
# template = """
# You are an expert in software quality assurance specializing in usability testing. Given a UI element, generate a set of functional usability test cases with detailed test scenarios.

# The UI element could be a button, link, text, or input field (some elements have a link attached to them). Consider the attribute attached to the UI element as the attribute acts as a hint about the primary focus of the test case. Also consider the following usability aspects:

# Accessibility (keyboard navigation, screen reader support, etc.)
# Responsiveness (behavior across different screen sizes, etc.)
# Feedback (hover effects, click responses, error messages, etc.)
# Interactivity (expected behavior when clicked, typed into, or focused, etc.)
# User experience (clarity of labels, ease of use, etc.)

# Only output the following, separated by a ~:

# Objective (What the test aims to verify)
# Preconditions (Any setup or conditions required before testing)
# Test Steps (Step-by-step actions to perform the test)
# Expected Result (Expected outcome if the UI element works correctly)

# Your output should only be in the following structured format but do not include the format in the output: "Objective"~"Preconditions"~"Test Steps"~"Expected Result"
# DO NOT output anything else but one test case in the said format.

# Here is the UI Element: {ui_element}
# The UI Element is from the following website URL : {url}
# """

# common_error = "Objective~Preconditions~Test Steps~Expected Result"

# # Load Model Chain
# def load_model_chain(template : str =  template, model_str : str = "llama3.1", temperature=0.1):
#     prompt = ChatPromptTemplate.from_template(template)
#     model = OllamaLLM(model=model_str, temperature=temperature)
#     chain = prompt | model
#     return chain

# # Create Test Case Data
# def create_test_cases(data, model_str : str = "llama3.1" , template : str = template, url : str = "placeholder", common_error : str = common_error):
    
#     # Load LLM Chain
#     chain = load_model_chain(template, model_str)

#     # Return Data
#     return_data = []
    
#     i = 0
#     total = len(data)
#     for item in data:
#         return_data.append(chain.invoke({"ui_element": str(item), "url": url}).replace(common_error, ""))
#         # LLM Reset To Free Up Context
#         chain = load_model_chain(template, model_str)
#         print(f"test case {i} out of {total} generated")
#         i += 1

#     return return_data

# # Pandas
# import pandas as pd

# # Table Dataframe Initialization
# def dataframe_init(data):
#     df = pd.DataFrame(dict([(key, pd.Series(value)) for key, value in data.items()]))
#     return df

# # CSV Generator
# def csv_from_test_case_batches(filename, data):
#     cols = dataframe_init(data)
#     cols.to_csv(f"{filename}.csv", sep='\t', encoding='utf-8', index=False, header=True)

# def create_fine_tuning_dataset(scraped_data, llm_output):
#     # Dataset Generator : Resulting Dataset Columns / Parameters
#     out_data = llm_output # LLM Output
#     in_data = scraped_data # Scraped UI Element
#     instruction_data = []
#     i = 0
#     while (i < len(scraped_data)):
#         template = f"Generate test case for the following UI element:"
#         instruction_data.append(template)
#         i+=1

#     data = {"output" : out_data, 
#             "input" : in_data, 
#             "instruction" : instruction_data}
#     csv_from_test_case_batches("fine_tuning_dataset", data)
#     return dataframe_init(data) 

#### Main Program

In [None]:
# import time # for checking runtime
# # TODO : Add a looping feature for scraping multiple websites
# # IDEA : We Developers Hardcode a List of URLs, list of URLS are fed to the loop, and generates multiple csv files 

# ## MAIN PROGRAM ##
# print("Welcome to Dataset Generator V2")

# ## Input URL
# url = get_url()
# start = time.time()
# scraped_data = data_scrape(url)
# end = time.time()
# print(f"Scraping Finished In : {(end-start) * 10**3}, ms")

# ## Create Test Cases
# print("This will take a while to complete: Please wait by doing other things. You will receive a message once generation is complete.")
# start = time.time()
# llm_output = create_test_cases(data=scraped_data, url=url)
# end = time.time()
# print(f"Generation Finished In : {(end-start) * 10**3}, ms")

# ## Create Fine Tuning Dataset
# fine_tuning_dataset = create_fine_tuning_dataset(scraped_data, llm_output)
# print("Dataset is Generated as a CSV File. Please rename it before running this notebook again")

Welcome to Dataset Generator V2
Scraping Finished In : 23533.684968948364, ms
This will take a while to complete: Please wait by doing other things. You will receive a message once generation is complete.
test case 0 out of 353 generated
test case 1 out of 353 generated
test case 2 out of 353 generated
test case 3 out of 353 generated
test case 4 out of 353 generated
test case 5 out of 353 generated
test case 6 out of 353 generated
test case 7 out of 353 generated
test case 8 out of 353 generated
test case 9 out of 353 generated
test case 10 out of 353 generated
test case 11 out of 353 generated
test case 12 out of 353 generated
test case 13 out of 353 generated
test case 14 out of 353 generated
test case 15 out of 353 generated
test case 16 out of 353 generated
test case 17 out of 353 generated
test case 18 out of 353 generated
test case 19 out of 353 generated
test case 20 out of 353 generated
test case 21 out of 353 generated
test case 22 out of 353 generated
test case 23 out of 353

#### Version 1: [Use This Until V2]

##### Functions

In [1]:
# ## SCRAPER IMPORTS ##
# from selenium import webdriver

# import selenium
# from selenium import webdriver
# from selenium.webdriver.common.by import By

# ## GET URL FUNCTION (non UI version) ##
# def get_url():
#     return input("Enter Website URL: ")

# # DATA SCRAPING FUNCTION
# def data_scrape(url):
#     # Setup Selenium Webdriver
#     # TODO : For V2 Add other drivers according to user settings. Detect user browser and use that as driver potentially
#     driver = webdriver.Firefox()
#     # Setup Return Data
#     data = []

#     # Open the website
#     driver.get(url)
    
#     ## GET UI ELEMENTS (must be visible) ##

#     # BUTTONS
#     buttons = driver.find_elements(By.CSS_SELECTOR, "button")
#     for button in buttons:
#         if button.is_displayed():
#             # Button Element Attributes
#             button_text = f"Button Element '{button.text}'" if (button.text != None and button.text != "") else "Button Element Without Name"
#             button_size = f"With Size Attribute {button.size}" if button.size != None else "Without Size Attribute"
#             button_color = f"With Background Color {button.value_of_css_property("background-color")}" if button.value_of_css_property("background-color") != None else "Without Background Color"
#             button_enabled = "and the button is Clickable" if button.is_enabled() else "and the button is not Clickable"
            
#             # Store Button Element
#             store = f"{button_text} {button_size} {button_color} {button_enabled}"
#             data.append(store)
            
#     # LINKS
#     links = driver.find_elements(By.TAG_NAME,"a")
#     for link in links:
#         if link.is_displayed():
#             # Link Element Attributes
#             link_text = f"Link Element '{link.text}'" if (link.text != None and link.text != "") else "Link Element Without Name"
#             link_url = f"With URL {link.get_attribute('href')}" if link.get_attribute('href') != None else "Without URL"
#             # Additional Attributes
#             link_rel = f"With rel attribute {link.get_attribute('rel')}" if link.get_attribute('rel') != None and link.get_attribute('rel') != "" else ""
#             link_target = f"With target attribute {link.get_attribute('target')}" if link.get_attribute('target') != None and link.get_attribute('target') != "" else ""
#             link_download = f"This is a download link to document {link.get_attribute('download')}" if link.get_attribute('download') != None and link.get_attribute('download') != "" else ""
            
#             # Store Link Element
#             store = f"{link_text} {link_url} {link_rel} {link_target} {link_download}"
#             data.append(store)

#     # VISIBLE TEXT
#     # Heading Elements
#     for level in range(1, 7):  # HTML has 6 levels of headings (h1 to h6)
#         headings = driver.find_elements(By.TAG_NAME,f"h{level}")
#         for heading in headings:
#             if heading.is_displayed():
#                 store = f"Heading Element (h{level}): '{heading.text}'"
#                 # Store
#                 data.append(store)
#     # Paragraph Elements
#     paragraphs = driver.find_elements(By.TAG_NAME,"p")
#     for paragraph in paragraphs:
#         if paragraph.is_displayed():
#             store = f"Paragraph Element: '{paragraph.text}'"
#             # Store
#             data.append(store)

#     # INPUT
#     input_tags = driver.find_elements(By.TAG_NAME,"input")
#     for input_tag in input_tags:
#         if input_tag.is_displayed():
#             # Basic Attributes
#             input_field_name = f"Input Field Element: '{input_tag.get_attribute('name')}'" if input_tag.get_attribute('name') != None else "Without Name"
#             input_field_type = f"With Type {input_tag.get_attribute('type')}" if input_tag.get_attribute('name') != None else "Without Type"
#             # Additional Attributes
#             input_field_value = f"With Value {input_tag.get_attribute('value')}" if input_tag.get_attribute('name') != None else "Without Value"
#             input_field_placeholder = f"With Placeholder '{input_tag.get_attribute('placeholder')}'" if input_tag.get_attribute('name') != None else "Without Placeholder"
#             input_field_readonly = f"Is Readonly" if input_tag.get_attribute('readonly') != None else "Is Editable"
#             input_field_disabled = f"Is Not Clickable" if input_tag.get_attribute('disabled') != None else "Is Clickable"
#             input_field_required = f"Is Required" if input_tag.get_attribute('required') != None else "Not Required"
#             input_field_autocomplete = f"Is Autocomplete" if input_tag.get_attribute('autocomplete') != None else "Not Autocomplete"

#             # Store based on type
#             if (input_tag.get_attribute('type') == "submit"):
#                 store = f"Form Submit Button Element: {input_tag.get_attribute("name")} {input_field_disabled}"
#             else:
#                 store = f"{input_field_name} {input_field_type} {input_field_required} {input_field_value} {input_field_placeholder} {input_field_readonly} {input_field_disabled} {input_field_disabled} {input_field_autocomplete}"
#             data.append(store)
            
#     # Close the browser
#     driver.quit()
    
#     ## RETURN COMPILED ELEMENT DATA ##
#     return data

# ## PROMPT GENERATOR (+LLM) IMPORTS ##
# import langchain
# from langchain_ollama.llms import OllamaLLM
# from langchain_core.prompts import ChatPromptTemplate

# template = """
# You are a quality assurance expert that generates functional test cases for websites. You take in a UI element and you generate a functional test case for usability.

# Some UI elements have a link attached to them and other properties.
# ONLY output in the following format: 
# "Objective"~"Preconditions"~"Test Steps"~"Expected Result"
# DO NOT output any other text. DO NOT output 'Here are the test cases...'

# Example Input:
# Link Element: Home with URL : https://bicol-u.edu.ph/
# Link Element: Academics with URL : https://bicol-u.edu.ph/#
# Link Element 'Battle of New Orleans' With URL https://en.wikipedia.org/wiki/Battle_of_New_Orleans   
# ...

# Example Output:
# "Verify the functionality of the Link Element 'Home'"~"The user is on the webpage 'https://bicol-u.edu.ph/'"~"'1. User navigates to the webpage \'https://bicol-u.edu.ph/\'' '2. Click on Link Element \'Home\'' '3. Verify if the webpage opens in a new tab/window.'"~"Webpage 'https://bicol-u.edu.ph/' should open in a new tab/window."
# "Verify the functionality of the Link Element 'Academics'"~"The user is on the webpage 'https://bicol-u.edu.ph/'"~"'1. User navigates to the webpage \'https://bicol-u.edu.ph/\'' '2. Click on Link Element \'Academics\'' '3. Verify if the link url changes to \'https://bicol-u.edu.ph/#\'' '4. Verify if a dropdown below \'Academics\' is visible'"~"A dropdown should show below 'Academics', but the webpage does not change"
# "Verify the functionality of the Link Element 'Battle of New Orleans'"~"The user is on the webpage 'https://en.wikipedia.org/'"~"'1. User navigates to the webpage 'https://en.wikipedia.org/' '' '2. Click on Link Element 'Battle of New Orleans'' '3. Verify if the link url changes to 'https://en.wikipedia.org/wiki/Battle_of_New_Orleans' '4. Verify if the webpage opens in a new tab/window'"~"Webpage 'https://en.wikipedia.org/wiki/Battle_of_New_Orleans/' should open in a new tab/window."
# ...

# Here is the UI element : {question}
# """

# # LOAD MODEL CHAIN
# def load_model_chain(template : str =  template, model_str : str = "llama3.1"):
#     prompt = ChatPromptTemplate.from_template(template)
#     model = OllamaLLM(model=model_str)
#     chain = prompt | model
#     return chain

# # CREATE TEST CASE DATA
# def create_test_cases(data, model_str : str = "llama3.1" , template : str = template):
    
#     # Load LLM Chain
#     chain = load_model_chain(template, model_str)

#     # Return Data
#     return_data = []
    
#     for item in data:
#         return_data.append(chain.invoke({"question": str(item)}))
#         # LLM Reset To Free Up Context
#         chain = load_model_chain(template, model_str)

#     return return_data

# ## TABLE GENERATOR IMPORTS ##
# import pandas as pd

# # INITIALIZE TABLE DATAFRAME
# def dataframe_init(data):
#     df = pd.DataFrame(dict([(key, pd.Series(value)) for key, value in data.items()]))
#     return df

# # CSV GENERATOR
# def csv_from_test_case_batches(filename, data):
#     cols = dataframe_init(data)
#     cols.to_csv(f"{filename}.csv", sep='\t', encoding='utf-8', index=False, header=True)

# ## CREATE THE FINE TUNING DATASET ##
# def create_fine_tuning_dataset(scraped_data, llm_output):
#     # Dataset Generator : Resulting Dataset Columns / Parameters
#     out_data = llm_output # LLM Output
#     in_data = scraped_data # Scraped UI Element
#     instruction_data = []
#     i = 0
#     while (i < len(scraped_data)):
#         template = f"####Question:\n Generate test case for the following UI element: {in_data[i]}"
#         instruction_data.append(template)
#         i+=1

#     data = {"output" : out_data, 
#             "input" : in_data, 
#             "instruction" : instruction_data}
#     csv_from_test_case_batches("fine_tuning_dataset", data)
#     return dataframe_init(data)    


In [3]:
# import time # for checking runtime
# # TODO : Add a looping feature for scraping multiple websites
# # IDEA : We Developers Hardcode a List of URLs, list of URLS are fed to the loop, and generates multiple csv files 

# ## MAIN PROGRAM ##
# print("Welcome to Dataset Generator V1")

# ## Input URL
# start = time.time()
# scraped_data = data_scrape(get_url())
# end = time.time()
# print(f"Scraping Finished In : {(end-start) * 10**3}, ms")

# ## Create Test Cases
# print("This will take a while to complete: Please wait by doing other things. You will receive a message once generation is complete.")
# start = time.time()
# llm_output = create_test_cases(scraped_data)
# end = time.time()
# print(f"Generation Finished In : {(end-start) * 10**3}, ms")

# ## Create Fine Tuning Dataset
# fine_tuning_dataset = create_fine_tuning_dataset(scraped_data, llm_output)
# print("Dataset is Generated as a CSV File. Please rename it before running this notebook again")

Welcome to Dataset Generator V1
Scraping Finished In : 29831.44760131836, ms
This will take a while to complete: Please wait by doing other things. You will receive a message once generation is complete.
Generation Finished In : 1074076.6022205353, ms
Dataset is Generated as a CSV File. Please rename it before running this notebook again


#### Prototype Code

In [None]:
# # Imports

# # Langchain and Ollama
# import langchain
# from langchain_ollama.llms import OllamaLLM
# from langchain_core.prompts import ChatPromptTemplate

# # BeautifulSoup and Requests
# from bs4 import BeautifulSoup
# import requests

# # Math
# from math import ceil,floor
# import csv

# # Selenium
# import selenium
# from selenium import webdriver
# from selenium.webdriver.common.by import By
# from time import sleep

# # Pandas
# import pandas as pd

In [None]:
# # Table File Generator

# def dataframe_init(in_data, out_data, text_data):
#     # initialize data of lists.
#     data = {"output" : out_data, 
#            "input" : in_data,
#            "instruction" : text_data}

#     # Create DataFrame
#     df = pd.DataFrame(dict([(key, pd.Series(value)) for key, value in data.items()]))
#     # df = pd.DataFrame(data)
#     return df

In [None]:
# # CSV Generator

# def csv_from_test_case_batches(filename, input_column, output_column, text_column):
#     cols = dataframe_init(input_column, output_column, text_column)
#     cols.to_csv(f"{filename}.csv", sep='\t', encoding='utf-8', index=False, header=True)

In [None]:
# # Scraper

# # GET URL FUNCTION
# def get_url():
#     return input("Enter Website URL: ")

# def data_scrape(url):
#     # Set up Selenium with a WebDriver, e.g., ChromeDriver
#     driver = webdriver.Firefox()
    
#     # Open the website
#     driver.get(url)
    
#     # Find all visible buttons
#     buttons = driver.find_elements(By.CSS_SELECTOR, "button")
#     visible_buttons = []
#     for button in buttons:
#         if button.is_displayed():
#             store = f"Button Element: {button.text} with URL : {button.get_attribute('href')}"
#             visible_buttons.append(store)
            
#     # Find all visible links
#     links = driver.find_elements(By.TAG_NAME,"a")
#     visible_links = []
#     for link in links:
#         if link.is_displayed():
#             store = f"Link Element: {link.text} with URL : {link.get_attribute('href')}"
#             visible_links.append(store)
            
#     # Find all visible forms (text inputs)
#     visible_text_inputs = []
#     # Scrape input fields
#     input_fields = driver.find_elements(By.TAG_NAME,"input")
#     for input_field in input_fields:
#         if input_field.is_displayed():
#             store = f"Input Field Name: {input_field.get_attribute("name")} Input field type: {input_field.get_attribute("type")}"
#             visible_text_inputs.append(store)
    
#     # Find all visible text
#     visible_text = []
#     # Scrape headings
#     for level in range(1, 7):  # HTML has 6 levels of headings (h1 to h6)
#         headings = driver.find_elements(By.TAG_NAME,f"h{level}")
#         for heading in headings:
#             if heading.is_displayed():
#                 store = f"Heading (h{level}): {heading.text}"
#                 visible_text.append(store)
#     # Scrape text content
#     paragraphs = driver.find_elements(By.TAG_NAME,"p")
#     for paragraph in paragraphs:
#         if paragraph.is_displayed():
#             store = f"Paragraph Text: {paragraph.text}"
#             visible_text.append(store)
            
#     # Find all visible navigation menus
#     visible_nav_menus = []
#     # Scrape navigation menus
#     navigation_menus = driver.find_elements(By.TAG_NAME,"nav")
#     for nav in navigation_menus:
#         if nav.is_displayed():
#             store = f"Navigation Menu: {nav.text}"
#             visible_nav_menus.append(store)
            
#     # Close the browser
#     driver.quit()

#     # Return Data
#     data = []
    
#     # Data append
#     data.append(visible_buttons)
#     data.append(visible_links)
#     data.append(visible_text_inputs)
#     data.append(visible_text)
#     data.append(visible_nav_menus)

#     return data

In [None]:
# # Prompt Generator + LLM

# # CONTEXT
# template = """
# You are a quality assurance expert that generates functional test cases for websites. You take in a UI element and you generate a functional test case.

# Here is the UI element (some elements have a link attached to them): {question}
# ONLY output in the following format: 
# "Objective"~"Preconditions"~"Test Steps"~"Expected Result"

# DO NOT output any other text. DO NOT output 'Here are the test cases...', your output should be like the example output below.

# Example Input:
# Link Element: Home with URL : https://bicol-u.edu.ph/
# Link Element: Academics with URL : https://bicol-u.edu.ph/#
# ...

# Example Output:
# "Verify the functionality of the Link Element 'Home'"~"The user is on the webpage 'https://bicol-u.edu.ph/'"~"'1. User navigates to the webpage \'https://bicol-u.edu.ph/\'' '2. Click on Link Element \'Home\'' '3. Verify if the webpage opens in a new tab/window.'"~"Webpage 'https://bicol-u.edu.ph/' should open in a new tab/window."
# "Verify the functionality of the Link Element 'Academics'"~"The user is on the webpage 'https://bicol-u.edu.ph/'"~"'1. User navigates to the webpage \'https://bicol-u.edu.ph/\'' '2. Click on Link Element \'Academics\'' '3. Verify if the link url changes to \'https://bicol-u.edu.ph/#\'' '4. Verify if a dropdown below \'Academics\' is visible'"~"A dropdown should show below 'Academics', but the webpage does not change"
# ...

# """

# # Load Model Chain
# def load_model_chain(template : str =  template, model_str : str = "llama3.1"):
#     # Prompt
#     prompt = ChatPromptTemplate.from_template(template)
#     # Model
#     model = OllamaLLM(model=model_str)
#     # Chain
#     chain = prompt | model
#     return chain

# # Create Test Case Data
# def create_test_cases(data, chain, model_str : str = "llama3.1" , template : str = template, batch_size : int = 10):
    
#     return_data = []
    
#     for sub_data in data:
#         element_test_cases = []
#         i = 0
#         j = 0
#         print(f"Batch Number: {ceil(len(sub_data)/batch_size)}")
#         while (j<ceil(len(sub_data)/batch_size)):
#             print(f"[{j}] Batch {str(len(sub_data[i:i+batch_size]))}")
#             appending = []
#             for dat in sub_data[i:i+batch_size]:
#                 appending.append(chain.invoke({"question": str(dat)}))
#             element_test_cases.append(appending)
#             i+=batch_size
#             j+=1
#             prompt = ChatPromptTemplate.from_template(template)
#             model = OllamaLLM(model=model_str)
#             chain = prompt | model
#         return_data.append(element_test_cases)
#     return return_data


In [None]:
# # Main Program

# # FILENAME
# filename = "sample_name"
# # URL
# url = get_url()
# # DATA
# data = data_scrape(url) # Returns Link Data Only For Now

# # Prepare data to be plcaed in the Input Column
# input_data = []
# for item_set in data:
#     for item in item_set:
#         input_data.append(item)

# # CHAIN
# chain = load_model_chain()
# # GENERATE TEST CASES
# test_cases = create_test_cases(data, chain)

# # Prepare data to be plcaed in the Output Column [Split]
# split_sets = []
# for test_case_set in test_cases:
#     for test_case in test_case_set:
#         for test in test_case:
#             split_sets.append(test.split('~'))
# # Prepare data to be plcaed in the Output Column [Actual Column]
# output_data = []
# for sp in split_sets:
#     output_data.append(sp)
    
# # Fine-Tuning Template
# text_data = []
# i = 0
# while (i < len(input_data)):
#     template = f"####Question:\n Generate test case for the following UI element: {input_data[i]}"
#     text_data.append(template)
#     i+=1
    
# # CSV OUTPUT
# csv_from_test_case_batches(filename, input_data, output_data, text_data)