In [1]:
# Set up selenium with by, webdriver, options and keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium import webdriver
options = webdriver.ChromeOptions()
options.add_argument('headless')
driver = webdriver.Chrome(options=options)

# Going to freesound
url = "https://freesound.org/"
driver.get(url)

In [2]:
# Create sounds dir
import os
if not os.path.exists('sounds'):
    os.makedirs('sounds')

# Setup download dir except it doesn't work for some reason lol
prefs = {'download.default_directory' : os.getcwd() + '/sounds'}
options.add_experimental_option('prefs', prefs)

In [3]:
# Click the log in button - it has data-toggle="login-modal" as a property
login_button = driver.find_element(By.CSS_SELECTOR, "[data-toggle='login-modal']")
login_button.click()

# This should open a div with id loginModal
# We can wait for it to appear with WebDriverWait
# Wait for 10 seconds for the login modal to appear
login_modal = WebDriverWait(driver, 10).until(
    EC.presence_of_element_located((By.ID, "loginModal"))
)

# Now we can find the username and password fields
# Input field with name="username" and "password" respectively
username_field = login_modal.find_element(By.CSS_SELECTOR, "[name='username']")
password_field = login_modal.find_element(By.CSS_SELECTOR, "[name='password']")

# Fill in the username and password
# Get them from environment variables
username_field.send_keys(os.environ.get('FREESOUND_USERNAME'))
password_field.send_keys(os.environ.get('FREESOUND_PASSWORD'))

# Click the login button which is a submit button with class="btn-primary" and inner text "Log in"
login_button = login_modal.find_element(By.CSS_SELECTOR, "button.btn-primary")
login_button.click()

In [4]:
random_sounds = {}
licenses = {}

In [5]:
from random import randint

# Now we load the search page
url = "https://freesound.org/search/?s=Date+added+(newest+first)&g=1"

# Find the a element with title="Last Page" so we can get the number of pages
driver.get(url)
print("Getting last page number")
last_page = driver.find_element(By.CSS_SELECTOR, "a[title='Last Page']")
last_page_number = int(last_page.text)

# Because we want to get random sounds, pick a random page
num_pages = 10
page_nums = [randint(1, last_page_number) for _ in range(num_pages)]

print(f"Page numbers: {page_nums}")

print("Getting random sounds")

# We add the query &page=PAGE_NUM to the url to get the page
for page in page_nums:
    url = f"https://freesound.org/search/?s=Date+added+(newest+first)&g=1&page={page}"
    driver.get(url)

    # Wait until there's some -bw-link--black anchor elements to click on
    # These are the links to the sounds
    WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.CSS_SELECTOR, ".bw-link--black"))
    )

    print(f"Getting page {page}")

    # If there's any explicit-sound-blocker divs, click the buttons to remove them
    # This is because we can't download sounds that have explicit content
    explicit_blockers = driver.find_elements(By.CSS_SELECTOR, ".explicit-sound-blocker")
    try:
        for blocker in explicit_blockers:
            button = blocker.find_element(By.CSS_SELECTOR, ".btn")
            button.click()
            print("Clicked button to remove explicit content blocker")
    except:
        pass

    # Now find all the divs with class bw-search__result
    # These are the divs that contain the sound info
    results = driver.find_elements(By.CSS_SELECTOR, ".bw-search__result")

    print(f"Found {len(results)} sounds on page {page}")

    # Pick a random one and click on the a element with class="bw-link--black"
    # Also save the name of the sound (the inner text)
    anchor = results[randint(0, len(results) - 1)].find_element(By.CSS_SELECTOR, ".bw-link--black")

    # Get the name of the sound
    name = anchor.text

    print(f"Getting sound {name}")

    # The anchor element is a link to the sound page but doesn't seem to be clickable, so let's just get the href attribute
    href = anchor.get_attribute("href")

    print(f"Got href {href}")

    # Now we can go to the sound page
    driver.get(href)

    print("Went to sound page")

    # Find element with inner text Download sound
    download_button = WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.LINK_TEXT, "Download sound"))
    )

    print("Found download button")

    # Use Xpath to find the element containing the licence
    try:
        license_element = driver.find_element(By.XPATH, "//span[@class='text-20 h-spacing-left-2 padding-right-4']")

        # Licence is the inner text
        license = license_element.text
        # Put it in the dictionary
        licenses[name] = license
        print(f"Got license {license}")
    except:
        print("Error getting license")
        print("Defaulting to linking to sound page")
        licenses[name] = href

    # Get the href attribute
    download_link = download_button.get_attribute("href")

    # Download the sound (or try to)
    try:
        driver.get(download_link)
        print("Downloaded sound")
    except:
        print("Error downloading sound")


    # Now put it in the dictionary
    random_sounds[name] = download_link


Getting last page number
Page numbers: [13206, 939, 2081, 7528, 10263, 3438, 3682, 5401, 1794, 7762]
Getting random sounds
Getting page 13206
Found 15 sounds on page 13206
Getting sound machine for a bottles.wav
Got href https://freesound.org/people/Dalibor/sounds/34181/
Went to sound page
Found download button


NoSuchElementException: Message: no such element: Unable to locate element: {"method":"css selector","selector":".text-20 h-spacing-left-2 padding-right-4"}
  (Session info: headless chrome=112.0.5615.165)
Stacktrace:
#0 0x55942cad3fe3 <unknown>
#1 0x55942c812d36 <unknown>
#2 0x55942c84fc4d <unknown>
#3 0x55942c84fd61 <unknown>
#4 0x55942c88b6a4 <unknown>
#5 0x55942c8708ed <unknown>
#6 0x55942c889232 <unknown>
#7 0x55942c870693 <unknown>
#8 0x55942c84303a <unknown>
#9 0x55942c84417e <unknown>
#10 0x55942ca95dbd <unknown>
#11 0x55942ca99c6c <unknown>
#12 0x55942caa34b0 <unknown>
#13 0x55942ca9ad63 <unknown>
#14 0x55942ca6dc35 <unknown>
#15 0x55942cabe138 <unknown>
#16 0x55942cabe2c7 <unknown>
#17 0x55942cacc093 <unknown>
#18 0x7f5e40824b43 <unknown>


In [None]:
# Copy the sounds to the sounds directory
import shutil
# Everything not ending in .ipynb is a sound
for file in os.listdir():
    if file.endswith(".ipynb"):
        continue
    shutil.move(file, "sounds")

In [None]:
# Print out the dictionary in a nice format to copy and paste to my friend Adrien
for name, link in random_sounds.items():
    print(f"{name}:\n{link}")

excuse me_woman.wav:
https://freesound.org/people/BJP09/sounds/619918/download/619918__bjp09__excuse-me_woman.wav
birds in park 2.wav:
https://freesound.org/people/brick_on_de/sounds/274134/download/274134__brick_on_de__birds-in-park-2.wav
Wooden Planks Dropping on Concrete, Breaking Wood:
https://freesound.org/people/TheLittleCrow/sounds/589866/download/589866__thelittlecrow__wooden-planks-dropping-on-concrete-breaking-wood.wav
7000005.wav:
https://freesound.org/people/levelclearer/sounds/417216/download/417216__levelclearer__7000005.wav
coyotes.wav:
https://freesound.org/people/Nirtana/sounds/641626/download/641626__nirtana__coyotes.wav
Kedington Ambeince.mp3:
https://freesound.org/people/jackwheatley/sounds/58283/download/58283__jackwheatley__kedington-ambeince.mp3
letter-give.aif:
https://freesound.org/people/toefur/sounds/280247/download/280247__toefur__letter-give.aiff
Summer Songbirds Eastern Missouri June 2021.wav:
https://freesound.org/people/RedHatCreator/sounds/577865/downlo

In [None]:
# Write/append licences to file
with open("sounds/licenses.txt", "a") as f:
    for name, license in licenses.items():
        f.write(f"{name}: {license}\n")