In [1]:
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager

from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys
from selenium_stealth import stealth

import time
import random
import string
import json
import pafy

import pandas as pd
import numpy as np

# Hide video unavailable warnings
import warnings
warnings.filterwarnings("ignore")

# Data Inladen

In [2]:
mails = pd.read_csv("../data/mails.csv")

In [3]:
videos = pd.read_csv("../data/uploads.csv")

# Helpers

In [21]:
def vid_to_watch(videos, usertype = 1):
    """Finds a conspiracy video to watch and 
    makes sure it's still accessible and under 2 hours long"""
    
    if usertype == 1:
        # Choose a conspiracy video to watch
        to_watch = np.random.choice(videos[videos["conspiracy"]]["video_id"])
    elif usertype == 2:
        # Choose a non-conspiracy video to watch
        to_watch = np.random.choice(videos[~videos["conspiracy"]]["video_id"])
        
    # Check its length
    url = f"http://www.youtube.com/watch?v={to_watch}"
    try:
        video = pafy.new(url);
        vid_len = video.length
    except: # Video no longer accessible
        vid_len = 9999
    
    # Videos over 2 hours will be skipped
    while vid_len > 7200:
        to_watch = np.random.choice(videos[videos["conspiracy"]]["video_id"])
        
        url = f"http://www.youtube.com/watch?v={to_watch}"
        try:
            video = pafy.new(url);
            vid_len = video.length
        except:
            vid_len = 9999
            
    # Calculate how much of the video will be watched
    watch_time = np.random.normal(0.6, 0.15) * vid_len

    return watch_time, to_watch

In [5]:
def init_driver():
    """Initializes a selenium driver in stealth mode"""
        
    # Add options
    options = webdriver.ChromeOptions()
    options.add_argument("--no-sandbox")
    options.add_argument("--disable-dev-shm-usage")
    options.add_argument("--disable-blink-features")
    options.add_argument("--disable-blink-features=AutomationControlled")
    options.add_argument("--disable-infobars")

    options.add_argument("start-maximized")

    # options.add_argument("--headless")

    options.add_experimental_option("excludeSwitches", ["enable-automation"])
    options.add_experimental_option('useAutomationExtension', False)
    
    # Start driver
    driver = webdriver.Chrome(options=options)

    # Hide the fact we're using a bot
    stealth(driver,
            languages=["en-US", "en"],
            vendor="Google Inc.",
            platform="Win32",
            webgl_vendor="Intel Inc.",
            renderer="Intel Iris OpenGL Engine",
            fix_hairline=True,
            )
    
    return driver

In [6]:
def login_google(driver, mail, password):
    """Logs in on Google given a driver, email-address and password.
    This code only works if the email-address was made within chromedriver itself"""
    
    # login on google
    driver.get("https://accounts.google.com/ServiceLogin")
    time.sleep(np.random.uniform(1, 2.5))

    for letter in mail:
        driver.find_element_by_id("identifierId").send_keys(letter)
        time.sleep(np.random.uniform(0.05, 0.2))

    driver.find_element_by_id("identifierNext").click()

    time.sleep(np.random.uniform(1, 2))

    driver.find_element_by_xpath("//input[@name = 'password']").send_keys(password)
    driver.find_element_by_xpath('//button[contains(@class, "VfPpkd-LgbsSe")]').click()

    time.sleep(np.random.uniform(1, 2))

    # If security-check is asked, skip it
    try:
        driver.find_element_by_xpath("//div[contains(@class, 'U26fgb O0WRkf')]").click()
    except:
        pass

In [7]:
def in_seconds(timestamp):
    """Converts a YouTube timestamp to a number of seconds"""
    # Order times in ascending order
    times = timestamp.split(":")[::-1]
    
    # Convert to seconds
    return sum([int(times[i]) * 60**i for i in range(len(times))])

In [28]:
def prepare_video(driver):
    """Does everything necessary to start watching a video"""

    # Skip ad(s)
    for ad in range(2):
        time.sleep(7)
        try:
            driver.find_element_by_xpath("//div[@class = 'ytp-ad-text ytp-ad-skip-button-text']").click()
        except:
            pass

    # Toggle autoplayer
    time.sleep(1)
    autoplay = driver.find_element_by_xpath("//div[@class = 'ytp-autonav-toggle-button']")
    
    if autoplay.get_attribute("aria-checked") == "true":
        autoplay.click()

In [25]:
def check_video_running(driver, watch_time):
    timestamp = driver.find_element_by_class_name("ytp-time-current").text
            
    # Sometimes the timestamp cannot be displayed
    if timestamp:   
        return in_seconds(timestamp) < watch_time
            
    # Check if the video is done (can happen when ads play and the video is very short)
    try:
        driver.find_element_by_xpath("//div[contains(@class, 'ended-mode')]")
        return False
    except:
        pass
    
    # Video is still running
    return True

# Het experiment

In [29]:
def run_experiment(videos, mails, usertype=1):
    experiment_results = {"user":[], "vids_watched":[], "video":[], "channel":[]}

    for user in mails.itertuples():
        # Find current user's info
        mail, password = user[1], user[2]

        # Initialize driver
        driver = init_driver()    

        # Login to google
        login_google(driver, mail, password)

        # Start watching videos
        for i in range(1, 3):
            # Get a random video from the dataset and watch it
            watch_time, to_watch = vid_to_watch(videos, usertype)
            driver.get(f"http://youtube.com/watch?v={to_watch}")
            vid_running = True

            # Skip ads, disable autoplay
            prepare_video(driver)

            # Watch video
            while check_video_running(driver, watch_time):
                time.sleep(1)

            if usertype == 3:
                video_recs = driver.find_elements_by_xpath("//a[contains(@class, 'ytd-compact-video-renderer')]")
                to_watch_new = [rec.get_attribute("href") for rec in video_recs][:10]

            # Go to the youtube homepage
            driver.get("http://youtube.com")
            time.sleep(1)

            # Get videos on youtube home
            channels = driver.find_elements_by_xpath("//a[@id = 'avatar-link']")
            vids = driver.find_elements_by_xpath("//a[@id = 'video-title-link']")

            # Get top 15 recommendations
            for rec in range(15):
                experiment_results["user"].append(mail)
                experiment_results["vids_watched"].append(i)

                experiment_results["video"].append(vids[rec].get_attribute("href"))
                experiment_results["channel"].append(channels[rec].get_attribute("href"))
        break

    return pd.DataFrame(experiment_results)

run_experiment(videos, mails, 3)

['https://www.youtube.com/watch?v=bDnA_coA168', 'https://www.youtube.com/watch?v=C7IJ7npTYrU', 'https://www.youtube.com/watch?v=TBuIGBCF9jc', 'https://www.youtube.com/watch?v=tT0ob3cHPmE', 'https://www.youtube.com/watch?v=-jvuhOa9wY4', 'https://www.youtube.com/watch?v=P_6my53IlxY', 'https://www.youtube.com/watch?v=yXoGxDTU9vQ', 'https://www.youtube.com/watch?v=gf9A4Ac_8Jo', 'https://www.youtube.com/watch?v=kd2KEHvK-q8', 'https://www.youtube.com/watch?v=a3bOL8j3ypQ', 'https://www.youtube.com/watch?v=bcMtd50lkyo', 'https://www.youtube.com/watch?v=1D8hxXif5bQ', 'https://www.youtube.com/watch?v=z16vhtjWKL0', 'https://www.youtube.com/watch?v=to5qRLRSS7g', 'https://www.youtube.com/watch?v=Wo6OO992ywI', 'https://www.youtube.com/watch?v=az6c7negl6o', 'https://www.youtube.com/watch?v=y750zcVhOq4', 'https://www.youtube.com/watch?v=uZdv-TtiMkg', 'https://www.youtube.com/watch?v=nq1UTeKKBkY', 'https://www.youtube.com/watch?v=FSdK7VdHkaQ']
['https://www.youtube.com/watch?v=8LbAbDDbzXQ', 'https://ww

Unnamed: 0,user,vids_watched,video,channel
0,scriptiebot@gmail.com,1,https://www.youtube.com/watch?v=KFr3bQsZ-UQ,https://www.youtube.com/user/lisab1230
1,scriptiebot@gmail.com,1,https://www.youtube.com/watch?v=yI48HDw6tmc,https://www.youtube.com/user/leodehaastv
2,scriptiebot@gmail.com,1,https://www.youtube.com/watch?v=zLtErudfv1w,https://www.youtube.com/user/InTouchMinistries
3,scriptiebot@gmail.com,1,https://www.youtube.com/watch?v=tQ8PnPF3orY,https://www.youtube.com/c/FashionFeed
4,scriptiebot@gmail.com,1,https://www.youtube.com/watch?v=ngjQs_QjSwc,https://www.youtube.com/user/HauensteinCenter
5,scriptiebot@gmail.com,1,https://www.youtube.com/watch?v=K8CcSDXffno,https://www.youtube.com/channel/UCvcMkIT_afFIV...
6,scriptiebot@gmail.com,1,https://www.youtube.com/watch?v=92Ziq-SvDVI,https://www.youtube.com/user/preppernurse1
7,scriptiebot@gmail.com,1,https://www.youtube.com/watch?v=BELNmsxSajM,https://www.youtube.com/c/kootenbie
8,scriptiebot@gmail.com,1,https://www.youtube.com/watch?v=bV-P-W4Sak0,https://www.youtube.com/channel/UCyAsKlawRiYQv...
9,scriptiebot@gmail.com,1,https://www.youtube.com/watch?v=5KS2b6919oE,https://www.youtube.com/channel/UCeNIWUnD3qBq6...
