### Dataset Generation
- Consists of scraper, prompt generator, and table generator. Because of this, development of prompt generator, table generator, and scraper are partially based on the development of dataset generation notebook (and vice versa)

#### Prototype Code

In [None]:
# Imports

# Langchain and Ollama
import langchain
from langchain_ollama.llms import OllamaLLM
from langchain_core.prompts import ChatPromptTemplate

# BeautifulSoup and Requests
from bs4 import BeautifulSoup
import requests

# Math
from math import ceil,floor
import csv

# Selenium
import selenium
from selenium import webdriver
from selenium.webdriver.common.by import By
from time import sleep

# Pandas
import pandas as pd

In [None]:
# Table File Generator

def dataframe_init(in_data, out_data, text_data):
    # initialize data of lists.
    data = {"output" : out_data, 
           "input" : in_data,
           "instruction" : text_data}

    # Create DataFrame
    df = pd.DataFrame(dict([(key, pd.Series(value)) for key, value in data.items()]))
    # df = pd.DataFrame(data)
    return df

In [None]:
# CSV Generator

def csv_from_test_case_batches(filename, input_column, output_column, text_column):
    cols = dataframe_init(input_column, output_column, text_column)
    cols.to_csv(f"{filename}.csv", sep='\t', encoding='utf-8', index=False, header=True)

In [None]:
# Scraper

# GET URL FUNCTION
def get_url():
    return input("Enter Website URL: ")

def data_scrape(url):
    # Set up Selenium with a WebDriver, e.g., ChromeDriver
    driver = webdriver.Firefox()
    
    # Open the website
    driver.get(url)
    
    # Find all visible buttons
    buttons = driver.find_elements(By.CSS_SELECTOR, "button")
    visible_buttons = []
    for button in buttons:
        if button.is_displayed():
            store = f"Button Element: {button.text} with URL : {button.get_attribute('href')}"
            visible_buttons.append(store)
            
    # Find all visible links
    links = driver.find_elements(By.TAG_NAME,"a")
    visible_links = []
    for link in links:
        if link.is_displayed():
            store = f"Link Element: {link.text} with URL : {link.get_attribute('href')}"
            visible_links.append(store)
            
    # Find all visible forms (text inputs)
    visible_text_inputs = []
    # Scrape input fields
    input_fields = driver.find_elements(By.TAG_NAME,"input")
    for input_field in input_fields:
        if input_field.is_displayed():
            store = f"Input Field Name: {input_field.get_attribute("name")} Input field type: {input_field.get_attribute("type")}"
            visible_text_inputs.append(store)
    
    # Find all visible text
    visible_text = []
    # Scrape headings
    for level in range(1, 7):  # HTML has 6 levels of headings (h1 to h6)
        headings = driver.find_elements(By.TAG_NAME,f"h{level}")
        for heading in headings:
            if heading.is_displayed():
                store = f"Heading (h{level}): {heading.text}"
                visible_text.append(store)
    # Scrape text content
    paragraphs = driver.find_elements(By.TAG_NAME,"p")
    for paragraph in paragraphs:
        if paragraph.is_displayed():
            store = f"Paragraph Text: {paragraph.text}"
            visible_text.append(store)
            
    # Find all visible navigation menus
    visible_nav_menus = []
    # Scrape navigation menus
    navigation_menus = driver.find_elements(By.TAG_NAME,"nav")
    for nav in navigation_menus:
        if nav.is_displayed():
            store = f"Navigation Menu: {nav.text}"
            visible_nav_menus.append(store)
            
    # Close the browser
    driver.quit()

    # Return Data
    data = []
    
    # Data append
    data.append(visible_buttons)
    data.append(visible_links)
    data.append(visible_text_inputs)
    data.append(visible_text)
    data.append(visible_nav_menus)

    return data

In [None]:
# Prompt Generator + LLM

# CONTEXT
template = """
You are a quality assurance expert that generates functional test cases for websites. You take in a UI element and you generate a functional test case.

Here is the UI element (some elements have a link attached to them): {question}
ONLY output in the following format: 
"Objective"~"Preconditions"~"Test Steps"~"Expected Result"

DO NOT output any other text. DO NOT output 'Here are the test cases...', your output should be like the example output below.

Example Input:
Link Element: Home with URL : https://bicol-u.edu.ph/
Link Element: Academics with URL : https://bicol-u.edu.ph/#
...

Example Output:
"Verify the functionality of the Link Element 'Home'"~"The user is on the webpage 'https://bicol-u.edu.ph/'"~"'1. User navigates to the webpage \'https://bicol-u.edu.ph/\'' '2. Click on Link Element \'Home\'' '3. Verify if the webpage opens in a new tab/window.'"~"Webpage 'https://bicol-u.edu.ph/' should open in a new tab/window."
"Verify the functionality of the Link Element 'Academics'"~"The user is on the webpage 'https://bicol-u.edu.ph/'"~"'1. User navigates to the webpage \'https://bicol-u.edu.ph/\'' '2. Click on Link Element \'Academics\'' '3. Verify if the link url changes to \'https://bicol-u.edu.ph/#\'' '4. Verify if a dropdown below \'Academics\' is visible'"~"A dropdown should show below 'Academics', but the webpage does not change"
...

"""

# Load Model Chain
def load_model_chain(template : str =  template, model_str : str = "llama3.1"):
    # Prompt
    prompt = ChatPromptTemplate.from_template(template)
    # Model
    model = OllamaLLM(model=model_str)
    # Chain
    chain = prompt | model
    return chain

# Create Test Case Data
def create_test_cases(data, chain, model_str : str = "llama3.1" , template : str = template, batch_size : int = 10):
    
    return_data = []
    
    for sub_data in data:
        element_test_cases = []
        i = 0
        j = 0
        print(f"Batch Number: {ceil(len(sub_data)/batch_size)}")
        while (j<ceil(len(sub_data)/batch_size)):
            print(f"[{j}] Batch {str(len(sub_data[i:i+batch_size]))}")
            appending = []
            for dat in sub_data[i:i+batch_size]:
                appending.append(chain.invoke({"question": str(dat)}))
            element_test_cases.append(appending)
            i+=batch_size
            j+=1
            prompt = ChatPromptTemplate.from_template(template)
            model = OllamaLLM(model=model_str)
            chain = prompt | model
        return_data.append(element_test_cases)
    return return_data


In [None]:
# Main Program

# FILENAME
filename = "sample_name"
# URL
url = get_url()
# DATA
data = data_scrape(url) # Returns Link Data Only For Now

# Prepare data to be plcaed in the Input Column
input_data = []
for item_set in data:
    for item in item_set:
        input_data.append(item)

# CHAIN
chain = load_model_chain()
# GENERATE TEST CASES
test_cases = create_test_cases(data, chain)

# Prepare data to be plcaed in the Output Column [Split]
split_sets = []
for test_case_set in test_cases:
    for test_case in test_case_set:
        for test in test_case:
            split_sets.append(test.split('~'))
# Prepare data to be plcaed in the Output Column [Actual Column]
output_data = []
for sp in split_sets:
    output_data.append(sp)
    
# Fine-Tuning Template
text_data = []
i = 0
while (i < len(input_data)):
    template = f"####Question:\n Generate test case for the following UI element: {input_data[i]}"
    text_data.append(template)
    i+=1
    
# CSV OUTPUT
csv_from_test_case_batches(filename, input_data, output_data, text_data)