In [1]:
import time
import json
import os
from seleniumwire import webdriver  # Import from seleniumwire
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.firefox.options import Options
import requests
from ultraprint.logging import logger

In [2]:

# Create a logger object
log = logger('scraping_log', include_extra_info=False, write_to_file=False, log_level='DEBUG')

# Initialize the driver
log.debug("Initializing the Firefox WebDriver")

# Parameters
base_url = "https://pixabay.com/music/search/?order=ec&pagi="
audio_folder = "data\\audio_files"
max_pages = 100

# set default download directory
download_dir = os.path.join(os.getcwd(), audio_folder)

# Set the download directory
log.debug(f"Setting the download directory to {download_dir}")
profile = webdriver.FirefoxProfile()
profile.set_preference("browser.download.folderList", 2)
profile.set_preference("browser.download.manager.showWhenStarting", False)
profile.set_preference("browser.download.dir", download_dir)
profile.set_preference("browser.helperApps.neverAsk.saveToDisk", "audio/mpeg")
profile.set_preference("browser.download.useDownloadDir", True)
profile.set_preference("browser.download.panel.shown", False)

options = Options()
options.profile = profile
driver = webdriver.Firefox(options=options)

# Load the page
log.debug(f"Loading the page at {base_url}")
driver.get(base_url+"1")

if not os.path.exists(audio_folder):
    log.debug(f"Creating audio folder at {audio_folder}")
    os.makedirs(audio_folder)

[90m [2024-12-20 12:03:25] [DEBUG] [scraping_log] Initializing the Firefox WebDriver[00m
[90m [2024-12-20 12:03:25] [DEBUG] [scraping_log] Setting the download directory to e:\Python and AI\Web Scraping\Pixabay_Scraper\data\audio_files[00m
[90m [2024-12-20 12:03:30] [DEBUG] [scraping_log] Loading the page at https://pixabay.com/music/search/?order=ec&pagi=[00m


In [3]:
media_items = driver.find_elements(By.CLASS_NAME, "name--q8l1g")
item = media_items[0]

# get the href
audio_page_link = item.get_attribute("href")

# Load the audio page (new tab)
log.debug(f"Loading the audio page at {audio_page_link}")

# Open a new tab
driver.execute_script("window.open('');")
driver.switch_to.window(driver.window_handles[1])
driver.get(audio_page_link)

[90m [2024-12-20 12:03:34] [DEBUG] [scraping_log] Loading the audio page at https://pixabay.com/music/vlog-music-beat-trailer-showreel-promo-background-intro-theme-274290/[00m


In [4]:
def get_downloaded_file_name():
    for request in driver.requests:
        if request.response and 'cdn.pixabay.com/download/audio' in request.url:
            file_name = request.url.split('filename=')[-1]
            return file_name

In [11]:
title_card = driver.find_element(By.CLASS_NAME, "title--VRujt")
music_name = title_card.text

attribution_card = driver.find_element(By.CLASS_NAME, "userName--owby3")
credits = attribution_card.get_attribute("href")

tags_parent = driver.find_element(By.CLASS_NAME, "tagsSection--8gH54")
tags_cards = tags_parent.find_elements(By.CLASS_NAME, "label--Ngqjq")
tags = [tag.text for tag in tags_cards]

side_panel = driver.find_element(By.CLASS_NAME, "sidePanel--XFASR")
downlaod_button = side_panel.find_element(By.CLASS_NAME, "triggerWrapper--NACCC")
final_download_button = downlaod_button.find_element(By.TAG_NAME, "button")
downlaod_button.click()

# Wait for the download button to appear
WebDriverWait(driver, 10).until(
    EC.presence_of_element_located((By.CLASS_NAME, "buttons--cqw3Y"))
)

download_button_card = driver.find_element(By.CLASS_NAME, "buttons--cqw3Y")
button = download_button_card.find_element(By.CLASS_NAME, "label--Ngqjq")
button.click()

file_name = get_downloaded_file_name()

In [16]:
side_panel = driver.find_element(By.CLASS_NAME, "sidePanel--XFASR")
downlaod_button = side_panel.find_element(By.CLASS_NAME, "triggerWrapper--NACCC")
final_download_button = downlaod_button.find_element(By.TAG_NAME, "button")
downlaod_button.click()

# Wait for the download button to appear
WebDriverWait(driver, 10).until(
    EC.presence_of_element_located((By.CLASS_NAME, "buttons--cqw3Y"))
)

download_button_card = driver.find_element(By.CLASS_NAME, "buttons--cqw3Y")
button = download_button_card.find_element(By.CLASS_NAME, "label--Ngqjq")
button.click()

In [16]:
#close the current tab and switch back to the main tab
driver.close()
driver.switch_to.window(driver.window_handles[0])

In [17]:
driver.get(base_url+"2")