In [1]:
# Selenium stuff
# For a custom wait
from selenium.common.exceptions import NoSuchElementException, TimeoutException
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

# For parsing and saving
import json
import datetime as dt
import csv
import sys
import os
import time

from pprint import pprint

from util.log_videos import adjust_video_log, get_videos, update_video_log
from util.helpers import startWebdriver, get_logging_decorator

from util.custom_values import DATA_DIR
from util.constants import METRICS, TimePeriod, TRAFFIC_SOURCES_IMP, \
    TRAFFIC_SOURCES, TRAFFIC_SOURCES_INV, Dimensions, ADV_URL


In [2]:
def scrape(driver) -> list:
    """Scrapes YouTube analytics from the chart"""
    # Css selectors
    element_css = 'yta-line-chart-base'

    # Wait 10 seconds for the line element to show up
    WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.CSS_SELECTOR, element_css))
    )
    data = []
    totals_data = driver.execute_script("return document.querySelector('#explore-app > yta-explore-deep-dive').fetchedData.data.chartProperties.mainMetricData.data[0].totalsSeries")
    series_data_array = driver.execute_script("return Array.from( document.querySelector('#explore-app > yta-explore-deep-dive').fetchedData.data.chartProperties.mainMetricData.data[0].series.values() )")
    data.append(totals_data)
    data.extend(series_data_array)

    return data

In [3]:
# Scrape data
driver = startWebdriver()
lines = {}
base_url = ADV_URL.format(
    video_id="wRMVMSysvf4",
    time_period=TimePeriod.first_24h.value,
    metric="{metric}",
    dimension="{dimension}"
)
# Get data for totals
metric_key, metric_code = "views", "VIEWS"

try:
    url = base_url.format(metric=metric_code, dimension=Dimensions.traffic_source.value)
    driver.get(url)

    data = scrape(driver)
finally:
    driver.quit()

In [4]:
names = [category['name'] for category in data]

data_dict = {
    ca['name']: [
        {'rel': point['hovercardInfo']['relativeDateFormatted'], 'x': dt.datetime.fromtimestamp(point['x']/1000, dt.timezone.utc), 'y': point['y']} 
        for point in ca['data']
    ] for ca in data}

pprint(data_dict)

category_dict = {ca['name']: ca['data'][0]['hovercardInfo']['entityTitle'] for ca in data}
pprint(category_dict)

{'END_SCREEN_main': [{'rel': 'Video published',
                      'x': datetime.datetime(2021, 8, 22, 20, 0, 7, tzinfo=datetime.timezone.utc),
                      'y': 0},
                     {'rel': 'First 1 minute',
                      'x': datetime.datetime(2021, 8, 22, 20, 1, 7, tzinfo=datetime.timezone.utc),
                      'y': 0},
                     {'rel': 'First 2 minutes',
                      'x': datetime.datetime(2021, 8, 22, 20, 2, 7, tzinfo=datetime.timezone.utc),
                      'y': 0},
                     {'rel': 'First 3 minutes',
                      'x': datetime.datetime(2021, 8, 22, 20, 3, 7, tzinfo=datetime.timezone.utc),
                      'y': 0},
                     {'rel': 'First 4 minutes',
                      'x': datetime.datetime(2021, 8, 22, 20, 4, 7, tzinfo=datetime.timezone.utc),
                      'y': 0},
                     {'rel': 'First 5 minutes',
                      'x': datetime.datetime(2021, 8, 22, 20, 5