In [None]:
# Imports
from selenium import webdriver 
from selenium.webdriver.common.by import By 
from selenium.webdriver.support.ui import WebDriverWait 
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver import ActionChains
import json

In [None]:
# To get element in the page, right click inspect element and copy the element (by css, xpath, etc)

# Functions
# Initialise the test browser (chromedriver) with the specified URL - this will open a new window
def initialise_driver(installed_path, opening_link):
    driver = webdriver.Chrome(installed_path) # specify the filepath where chromedriver is installed
    driver.get(opening_link) # specify a URL (e.g. youtube search query link) for the browser to open
   
    return driver

# From the youtube search result page, this function will scrape the page and return a list for the video links
def get_links(driver):
    get_video_link = driver.find_elements_by_xpath('//*[@id="video-title"]') # find location of links using xpath
    links = []
    for i in get_video_link:
        links.append(i.get_attribute('href'))
    
    return links

# Create a list of dictionaries where each dictionary contains info for one video
def initialise_dictionary_list(skill, links): 
    dictionary_list = []
    for i in range(len(links)):
        if links[i] is not None:
            init_dict = {
                "link": links[i],
                "title": "",
                "description": "",
                "views": 0,
                "skill": skill,
            }

            dictionary_list.append(init_dict)
    
    return dictionary_list

# Scrape the required info from each video page
def scrape_info(driver, links, dictionary_list, flag): # the flag variable can be "title", "description" or "views"
    wait = WebDriverWait(driver, 180) # selenium specifies a wait time of 180s so that the page can finish loading

    if flag is "title":
        for i in range(len(links)):
            if links[i] is not None:
                driver.get(links[i])

                v_title = wait.until(EC.visibility_of_element_located
                ((By.CSS_SELECTOR,"h1.title yt-formatted-string"))).text # wait until the element is fully loaded and get the text
                dictionary_list[i]['title'] = v_title

    elif flag is "description":
        for i in range(len(links)):
            if links[i] is not None:
                description = []
                driver.get(links[i])

                # wait until description element and the 'show more' button appears
                v_description_element = wait.until(EC.presence_of_element_located((By.XPATH, 
                "/html/body/ytd-app/div/ytd-page-manager/ytd-watch-flexy/div[5]/div[1]/div/div[9]/div[2]/ytd-video-secondary-info-renderer/div/ytd-expander/tp-yt-paper-button[2]/yt-formatted-string")))
                # since the description section is not fully shown, the 'show more' button needs to be clicked
                ActionChains(driver).move_to_element(v_description_element).click().perform()
                # once clicked, the complete description can be found 
                v_full_description = driver.find_elements_by_class_name("style-scope ytd-video-secondary-info-renderer")
                
                # complete description is seperated, put text of each description element into a list 
                for line in v_full_description:
                    description.append(line.text)
                   
                dictionary_list[i]['description'] = description

    elif flag is "views":
        for i in range(len(links)):
            if links[i] is not None:
                driver.get(links[i])
                v_views = wait.until(EC.visibility_of_element_located((By.XPATH, 
                "//*[@id='count']/ytd-video-view-count-renderer/span[1]"))).text # wait until element is fully loaded and get the text
        
                dictionary_list[i]['views'] = v_views

    return dictionary_list

def write_to_json_file(dictionary_list, filename):
    filename += ".json"
    json_string = json.dumps(dictionary_list, indent=4)
    json_file = open(filename, "w")
    json_file.write(json_string)
    json_file.close()

In [None]:
# Run code here
driver = initialise_driver("D:\\Installers\\chromedriver_win32\\chromedriver", 
"https://www.youtube.com/results?search_query=%22online+collaboration%22") # change install path and link when needed
links = get_links(driver) # get the links of the search results
video_dictionary = initialise_dictionary_list("Online collaboration", links) # initialise dictionary, specify skill
video_dictionary = scrape_info(driver, links, video_dictionary, "title") # get the titles of each video
video_dictionary = scrape_info(driver, links, video_dictionary, "description") # get the description of each video
video_dictionary = scrape_info(driver, links, video_dictionary, "views") # get the views of each video

write_to_json_file(video_dictionary, "data_online_collaboration") # write info to json file
