# Function Prototyping Grounds

## Dynamic webpage scraping with selenium

Selenium is a web automation tool that allows you to automate interactions with websites.

Chromedriver is a web driver that allows you to automate interactions with websites.

The target website is [FeverDreams](https://www.feverdreams.app/recent/1). Feverdreams.app is a site that allows you to browse and create AI-generated artwork.

### Create functions

In [85]:
# Run this code first to set up the scraper.

from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import json, uuid
from json import JSONDecodeError
from pathlib import Path
import html
import wget

PATH = "F:\\" 
START = "https://www.feverdreams.app/recent/1"
TARGET_LIST = []
DATA_DICT = { "creations" : [] }

In [98]:
def get_UUID():
    return str(uuid.uuid4())

mlva_uuid = get_UUID()

def get_public_metadata(source_json, identifier):
    key_list = [
        "transformation_percent",
        "clip_models_schedules",
        "diffusion_model_config",
        "width_height",
        "clip_guidance_scale",
        "skip_event",
        "sat_scale",
        "batch_name",
        "name_docarray",
        "cut_innercut",
        "skip_augs",
        "clip_denoised",
        "seed",
        "use_vertical_symmetry",
        "init_scale",
        "steps",
        "use_secondary_model",
        "text_prompts",
        "gif_fps",
        "cut_icgray_p",
        "truncate_overlength_prompt",
        "clip_models",
        "cut_overview",
        "display_rate",
        "use_horizontal_symmetry",
        "eta",
        "perlin_init",
        "init_image",
        "clamp_max",
        "randomize_class",
        "on_misspelled_token",
        "gif_size_ratio",
        "save_rate",
        "rand_mag",
        "range_scale",
        "tv_scale",
        "n_batches",
        "cut_ic_pow",
        "clamp_grad",
        "batch_size",
        "stop_event",
        "text_clip_on_cpu",
        "diffusion_sampling_mode",
        "diffusion_model",
        "cutn_batches",
        "cut_schedules_group",
        "skip_steps",
        "perlin_mode"
    ]

    
    public_metadata = {
        "mlva_uuid" : identifier,
        "piece_metadata": []
    }

    with open(source_json, "r", encoding='utf-8') as f:
        metadata = json.load(f)
        
    for key in metadata['discoart_tags']:
        if key in key_list:
            try:
                public_metadata['piece_metadata'].append(f"{key}={metadata[key]}")
            except KeyError:
                public_metadata['piece_metadata'].append(f"{key}=Null")
        else:
            pass

    return public_metadata

def get_private_metadata(source_json, identifier):
    
    private_metadata = {
        "mlva_uuid" : identifier,
        "piece_metadata": []
    }

    with open(source_json, "r", encoding='utf-8') as f:
        metadata = json.load(f)
        
    for key in metadata:
        try:
            private_metadata['piece_metadata'].append(f"{key}={metadata[key]}")
        except KeyError:
            private_metadata['piece_metadata'].append(f"{key}=Null")

    return private_metadata


def url_to_json(url:str, driver:webdriver)->str:
    """
    define a function called 'url_to_json' that takes in a url and a driver. The function should:
    1. It should take the driver and point it at the url.
    2. it should load the page and wait for the page to load.
    3. It should load the entire contents of the page into a json string.
    """
    driver.get(url)
    time.sleep(5)
    page_source = driver.find_element(By.TAG_NAME, "body").get_attribute("innerHTML")
    time.sleep(10)
    with open("F:\dogma\CentralDogma\MechArtResearch\MechPromptFinder\src\metadata_cache.json", "w+", encoding='utf-8') as f:
        print(page_source, file=f)
    return "F:\dogma\CentralDogma\MechArtResearch\MechPromptFinder\src\metadata_cache.json"

def url_to_metadata_url(url:str)->str:
    """
    define a function called 'url_to_metadata_url' that takes in a url and returns a url to the metadata.
    """
    return url.replace("/piece/", "/job/").replace("www", "api")

## Test Pulldown Function

In [None]:
driver = webdriver.Chrome(PATH + "chromedriver.exe")


test_result = get_public_metadata(url_to_json("https://api.feverdreams.app/job/a247fcea-80e9-4e5b-9895-016031a7a297", driver))
print(test_result)

## Basic Elements Of Working With Selenium

```python
def create_target_archive(driver_path, current_page:str)->list:
    """
    Define a function called create_target_archive that takes a driver, a current page, a next page and a target list. The function should:
    1. It should create a webdriver called 'driver' and point it at the current page.
    2. Find and create a list called 'browse_list' of urls in the href attributes of all 'a' elements that are children of 'div' elements.
    3. It should create a dictionary called 'results_dict' with the keys 'next_page' with the value browse_list[1] and 'new_targets' with browse_list[2:-7].
    4. It should call driver.quit() and sleep for 5 seconds.
    5. It should return results_dict.
    """
    driver = webdriver.Chrome(driver_path + "chromedriver.exe")
    driver.get(current_page)
    time.sleep(15)
    browse_list = href_list(driver.find_elements(By.XPATH, "//div/a[@href]"))
    results_dict = { "next_page" : browse_list[1], "new_targets" : browse_list[2:-7] }
    time.sleep(5)
    driver.quit()
    return results_dict  
```

## Full Pulldown Function

In [109]:
def scrape_piece(url:str, driver_path:str)->str:
    driver = webdriver.Chrome(driver_path + "chromedriver.exe")
    driver.get(url)
    time.sleep(10)
    identifier = driver.find_elements(By.XPATH, "//div/h4")[0].text,
    print(f"Identifier is type {type(identifier)} --> {identifier}")
    img_src = driver.find_elements(By.XPATH, "//a/img")[0].get_attribute("src"),
    image_path = f"F:\dogma\CentralDogma\MechArtResearch\MechPromptFinder\src\data\images\{identifier[0]}.png"
    driver.find_elements(By.XPATH, "//a/img")[0].screenshot(image_path)
    prompt = driver.find_elements(By.XPATH, "//div/code")[0].text,
    public_metadata = get_public_metadata(url_to_json(url_to_metadata_url(url), driver), mlva_uuid),
    private_metadata = get_private_metadata(url_to_json(url_to_metadata_url(url), driver), mlva_uuid),
    piece_cache = {
        "identifier":identifier,
        "img_src": img_src,
        "prompt": prompt,
        "public_metadata": public_metadata,
        "private_metadata": private_metadata,
    }

    time.sleep(10)
    driver.quit()
    image_dest = Path(image_path)
    if Path.is_file(image_dest):
        print(f"Download of image: {img_src} to {image_path} completed.")
    else:
        print(f"Download of image: {img_src} to {image_path} may have failed.")
    output_file = f"data/feverdream__scrape/{identifier[0]}.json"
    with open(output_file, "w", encoding='utf-8') as f:
        json.dump(piece_cache, f, indent=4)
    return output_file

## Full Pulldown Test

In [110]:
url = "https://www.feverdreams.app/piece/b2ebc94f-f199-4699-bf53-814e5ca7da15"

result = scrape_piece(url, PATH)
print(result)

  driver = webdriver.Chrome(driver_path + "chromedriver.exe")


Identifier is type <class 'tuple'> --> ('b2ebc94f-f199-4699-bf53-814e5ca7da15',)
Download of image: ('http://images.feverdreams.app/jpg/b2ebc94f-f199-4699-bf53-814e5ca7da15.jpg',) to F:\dogma\CentralDogma\MechArtResearch\MechPromptFinder\src\data\images\b2ebc94f-f199-4699-bf53-814e5ca7da15.png completed.
data/feverdream__scrape/b2ebc94f-f199-4699-bf53-814e5ca7da15.json


In [None]:
url = "https://www.feverdreams.app/piece/b2ebc94f-f199-4699-bf53-814e5ca7da15"
driver = webdriver.Chrome(PATH + "chromedriver.exe")
driver.get(url)
identifier = "b2ebc94f-f199-4699-bf53-814e5ca7da15"
img_src = "http://images.feverdreams.app/jpg/b2ebc94f-f199-4699-bf53-814e5ca7da15.jpg"
image_path = f"F:\dogma\CentralDogma\MechArtResearch\MechPromptFinder\src\data\images\{identifier}.jpg"
if Path.is_file(image_path):
    print(f"Download of image: {img_src} to {image_path} completed.")
else:
    print(f"Download of image: {img_src} to {image_path} may have failed.")