In [1]:
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager

from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys
from selenium_stealth import stealth

import time
import random
import string
import json
import pafy

import pandas as pd
import numpy as np

# Hide video unavailable warnings
import warnings
warnings.filterwarnings("ignore")

# Data Inladen

In [2]:
mails = pd.read_csv("../data/mails.csv")

In [3]:
videos = pd.read_csv("../data/uploads.csv")

# Helpers

In [4]:
def vid_to_watch(videos):
    """Finds a conspiracy video to watch and 
    makes sure it's still accessible and under 2 hours long"""
    
    # Choose a conspiracy video to watch
    to_watch = np.random.choice(videos[videos["conspiracy"]]["video_id"])
        
    # Check its length
    url = f"http://www.youtube.com/watch?v={to_watch}"
    try:
        video = pafy.new(url);
        vid_len = video.length
    except: # Video no longer accessible
        vid_len = 9999
    
    # Videos over 2 hours will be skipped
    while vid_len > 7200:
        to_watch = np.random.choice(videos[videos["conspiracy"]]["video_id"])
        
        url = f"http://www.youtube.com/watch?v={to_watch}"
        try:
            video = pafy.new(url);
            vid_len = video.length
        except:
            vid_len = 9999
            
    # Calculate how much of the video will be watched
    watch_time = np.random.normal(0.6, 0.15) * vid_len

    return watch_time, to_watch

In [5]:
def init_driver():
    """Initializes a selenium driver in stealth mode"""
        
    # Add options
    options = webdriver.ChromeOptions()
    options.add_argument("--no-sandbox")
    options.add_argument("--disable-dev-shm-usage")
    options.add_argument("--disable-blink-features")
    options.add_argument("--disable-blink-features=AutomationControlled")
    options.add_argument("--disable-infobars")

    options.add_argument("start-maximized")

    # options.add_argument("--headless")

    options.add_experimental_option("excludeSwitches", ["enable-automation"])
    options.add_experimental_option('useAutomationExtension', False)
    
    # Start driver
    driver = webdriver.Chrome(options=options)

    # Hide the fact we're using a bot
    stealth(driver,
            languages=["en-US", "en"],
            vendor="Google Inc.",
            platform="Win32",
            webgl_vendor="Intel Inc.",
            renderer="Intel Iris OpenGL Engine",
            fix_hairline=True,
            )
    
    return driver

In [6]:
def login_google(driver, mail, password):
    """Logs in on Google given a driver, email-address and password.
    This code only works if the email-address was made within chromedriver itself"""
    
    # login on google
    driver.get("https://accounts.google.com/ServiceLogin")
    time.sleep(np.random.uniform(1, 2.5))

    for letter in mail:
        driver.find_element_by_id("identifierId").send_keys(letter)
        time.sleep(np.random.uniform(0.05, 0.2))

    driver.find_element_by_id("identifierNext").click()

    time.sleep(np.random.uniform(1, 2))

    driver.find_element_by_xpath("//input[@name = 'password']").send_keys(password)
    driver.find_element_by_xpath('//button[contains(@class, "VfPpkd-LgbsSe")]').click()

    time.sleep(np.random.uniform(1, 2))

    # If security-check is asked, skip it
    try:
        driver.find_element_by_xpath("//div[contains(@class, 'U26fgb O0WRkf')]").click()
    except:
        pass

In [22]:
def in_seconds(timestamp):
    """Converts a YouTube timestamp to a number of seconds"""
    # Order times in ascending order
    times = timestamp.split(":")[::-1]
    
    # Convert to seconds
    return sum([int(times[i]) * 60**i for i in range(len(times))])

# Het experiment

In [25]:
experiment_results = {"user":[], "vids_watched":[], "video":[], "channel":[]}

for user in mails.itertuples():
    # Find current user's info
    mail, password = user[1], user[2]

    # Initialize driver
    driver = init_driver()    

    # Login to google
    login_google(driver, mail, password)

    # Start watching videos
    for i in range(1, 3):
        # Get a random video from the dataset and watch it
        watch_time, to_watch = vid_to_watch(videos)
        driver.get(f"http://youtube.com/watch?v={to_watch}")
        vid_running = True
        
        # Skip ad(s)
        for ad in range(2):
            time.sleep(7)
            try:
                driver.find_element_by_xpath("//div[@class = 'ytp-ad-text ytp-ad-skip-button-text']").click()
            except:
                pass

        # Toggle autoplayer
        time.sleep(2)
        autoplay = driver.find_element_by_xpath("//div[@class = 'ytp-autonav-toggle-button']")

        if autoplay.get_attribute("aria-checked") == "true":
            autoplay.click()
        

        # Watch video
        while vid_running:
            # Check if we passed the given watch time for the video
            timestamp = driver.find_element_by_class_name("ytp-time-current").text
            
            # Sometimes the timestamp cannot be displayed
            if timestamp:
                seconds_passed = in_seconds(timestamp)
                
                if seconds_passed > watch_time:
                    vid_running = False
            
            # Check if the video is done (can happen when ads play and the video is very short)
            try:
                driver.find_element_by_xpath("//div[contains(@class, 'ended-mode')]")
                vid_running = False
            except:
                pass

            time.sleep(1)

        # Go to the youtube homepage
        driver.get("http://youtube.com")
        time.sleep(1)

        # Get videos on youtube home
        channels = driver.find_elements_by_xpath("//a[@id = 'avatar-link']")
        vids = driver.find_elements_by_xpath("//a[@id = 'video-title-link']")

        # Get top 10 recommendations
        for rec in range(10):
            experiment_results["user"].append(mail)
            experiment_results["vids_watched"].append(i)
            
            experiment_results["video"].append(vids[rec].get_attribute("href"))
            experiment_results["channel"].append(channels[rec].get_attribute("href"))
    break
            
pd.DataFrame(experiment_results)

15 64.38009591983749
16 64.38009591983749
17 64.38009591983749
18 64.38009591983749
19 64.38009591983749
20 64.38009591983749
58 64.38009591983749
63 64.38009591983749
64 64.38009591983749
65 64.38009591983749
4 473.63294544184214
5 473.63294544184214
12 473.63294544184214
13 473.63294544184214
14 473.63294544184214
15 473.63294544184214
16 473.63294544184214
17 473.63294544184214
18 473.63294544184214
19 473.63294544184214
20 473.63294544184214
21 473.63294544184214
22 473.63294544184214
23 473.63294544184214
24 473.63294544184214
25 473.63294544184214
26 473.63294544184214
27 473.63294544184214
28 473.63294544184214
447 473.63294544184214
479 473.63294544184214


Unnamed: 0,user,vids_watched,video,channel
0,scriptiebot@gmail.com,1,https://www.youtube.com/watch?v=92Ziq-SvDVI,https://www.youtube.com/user/preppernurse1
1,scriptiebot@gmail.com,1,https://www.youtube.com/watch?v=tKk_e_-b7z8,https://www.youtube.com/user/BuddyBrownMusic
2,scriptiebot@gmail.com,1,https://www.youtube.com/watch?v=bcMtd50lkyo,https://www.youtube.com/c/JohnHageeMinistries
3,scriptiebot@gmail.com,1,https://www.youtube.com/watch?v=WTRvcNQdsss,https://www.youtube.com/c/IlRifugioPerfetto
4,scriptiebot@gmail.com,1,https://www.youtube.com/watch?v=_hgshQF2JfY,https://www.youtube.com/c/AikidoflowLondon
5,scriptiebot@gmail.com,1,https://www.youtube.com/watch?v=PN2F1zxOC8w,https://www.youtube.com/c/rumbleviral
6,scriptiebot@gmail.com,1,https://www.youtube.com/watch?v=7Qzsq9nB998,https://www.youtube.com/channel/UCNmnXNeAFnk4w...
7,scriptiebot@gmail.com,1,https://www.youtube.com/watch?v=a8jcNBVWJyE,https://www.youtube.com/c/PrioritizeYourLife
8,scriptiebot@gmail.com,1,https://www.youtube.com/watch?v=z3U0udLH974,https://www.youtube.com/user/TheCatsPyjaaaamas
9,scriptiebot@gmail.com,1,https://www.youtube.com/watch?v=Arkq3lLuo0U,https://www.youtube.com/channel/UC-l69It3hxAY3...
