### This code scrapes SignUp's homepage, utilizing Selenium to select its "View all titles" button and then scrapes the list that appears. At the time of writing, that list is of supported Disney+ titles, but there are buttons to switch to several other streaming services as well.

The output data contains the following variables:
* blank column that designates index
* **title**: name of the title.
* columns that indicate whether the title has captions in American Sign Language (**ASL**), Auslan (**Auslan** - not supported by any titles at the time of writing), British Sign Language (**BSL**), or Indian Sign Language (**ISL**)

Required packages:
* selenium
* pandas
* bs4
* requests
* re

In [1]:
import pandas as pd
from bs4 import BeautifulSoup
import requests
import re
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.select import Select

In [3]:
timer = 3 # change if your browser needs more time until the list of movies is visible

In [4]:
# initializes our Edge session
driver = webdriver.Edge()

url = "https://www.signupcaptions.com/"
driver.get(url)

# a wait condition
# it waits until the CSS selector of the button we want to click is present before moving on and times out after 3 seconds
WebDriverWait(driver, timeout = timer).until(EC.presence_of_element_located((By.CSS_SELECTOR, "#movies > button")))

# tracks down the button we want to click and stores it
b = driver.find_element(By.CSS_SELECTOR, "#movies > button")
# clicks the button
driver.execute_script("arguments[0].click();", b)
# waits until the list of movies is visible before moving on and times out after 3 seconds
#"#movies > div.allMovies > div:nth-child(62) > img"
WebDriverWait(driver, timeout=timer).until(EC.visibility_of_element_located((By.CSS_SELECTOR, "#movies > div.allMovies")))
# saves the new version of the page as a variable
exp = driver.page_source
# closes the driver
driver.quit()

In [5]:
# standard conversion to BS4
soup = BeautifulSoup(exp)

In [6]:
movies = soup.find_all("div", class_ = "movie")

In [37]:
#movielist = pd.DataFrame[movies[i].find_all("div")[0].text for i in range(0, len(movies))])
movielist = [movies[i].find_all("div")[0].text for i in range(0, len(movies))]

In [38]:
def trial(test):
    try:
        test[0]
    except IndexError:
        test = "ZNull"
    return test

In [42]:
count = len(movielist)
ASL = [False] * count
AUS = [False] * count
BSL = [False] * count
ISL = [False] * count

In [44]:
for i in range(0, count):
    test = movies[i].find_all("div")[1].text
    if (test[0] == "a"):
        if (test[1] == "s"):
            ASL[i] = True
            test = trial(test.lstrip("asl"))
        if (test[1] == "u"):
            AUS[i] = True
            test = trial(test.lstrip("aus"))
    if (test[0] == "b"):
        BSL[i] = True
        test = trial(test.lstrip("bsl"))
    if (test[0] == "i"):
        ISL[i] = True
        test = trial(test.lstrip("isl"))

In [48]:
final = pd.DataFrame({"title": movielist, "ASL": ASL, "Auslan": AUS, "BSL": BSL, "ISL": ISL})

In [49]:
final.to_csv("signupdisney.csv")