In [None]:
# Authentication method take from https://www.thepythoncode.com/article/using-youtube-api-in-python

# Notes carried over from previos version: you will have to use PIP to install the GitHub and 
# Google SDKs before these import statements will work.
# The YouTube API is part of the family of Google APIs and uses Google's generic SDK
# Use:
# pip install google-api-python-client # Not sure if this is needed in this new version
# pip install google-auth-oauthlib
# pip install PyGithub

import os
import pickle
import json
import requests
from time import sleep
import csv
import io
import datetime
from pathlib import Path

# GitHub SDK
from github import Github

# Google API SDKs:
from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request

#import urllib.parse as p
#import re

# Control the location of credentials. 
# !!! It is unsafe to store credentials in the working directory if the code will be pushed to GitHub !!!
cred_directory = 'home' # set to 'home' if the credential is in the home directory, otherwise working directory

# Set configuration details necessary for interacting with the GitHub API

# the access token should be generated for read/write access to public repos
# see https://developer.github.com/v3/auth/#working-with-two-factor-authentication
# see https://github.com/settings/tokens/new
# select public_repo

# reference on PyGithub: https://pygithub.readthedocs.io/en/latest/github_objects/Repository.html
# reference on GitHub API: https://developer.github.com/v3/guides/getting-started/

github_username = ''  # set to empty string if using a token (for 2FA)
organization_name = 'heardlibrary'
organization_is_user = False
repo_name = 'dashboard'
path_to_directory = 'disc/youtube/'

# Set configuration details necessary for interacting with the YouTube Analytics API
# You will need to modify the following line according to how you named your secrets file
client_secrets_filename = 'codegraf_1-1_youtube_credentials.json'
pickle_filename = 'youtube_token.pickle'

if cred_directory == 'home':
    home = str(Path.home()) #gets path to home directory; supposed to work for Win and Mac
    client_secrets_file_path = home + '/' + client_secrets_filename
    pickle_file_path = home + '/' + pickle_filename
else:
    cred_directory = 'working'
    client_secrets_file_path = client_secrets_filename
    pickle_file_path = pickle_filename


In [None]:
# -----------------
# utility functions
# -----------------

def generate_utc_date():
    """Generates today's date as an ISO 8601 formatted string."""
    whole_time_string_z = datetime.datetime.utcnow().isoformat() # form: 2019-12-05T15:35:04.959311
    date_z = whole_time_string_z.split('T')[0] # form 2019-12-05
    return date_z

# RAW FILE FUNCTIONS

def read_string_from_github_file(organization_name, repo_name, path_to_directory, filename):
    """Read raw string from a file in GitHub."""
    path = path_to_directory + filename
    r = requests.get('https://raw.githubusercontent.com/' + organization_name + '/' + repo_name + '/master/' + path)
    return r.text

# LIST OF DICTIONARIES FUNCTIONS

def read_dicts_from_csv(filename):
    """Read from a CSV file on disk into a list of dictionaries (representing a table)."""
    with open(filename, 'r', newline='', encoding='utf-8') as file_object:
        dict_object = csv.DictReader(file_object)
        table = []
        for row in dict_object:
            table.append(row)
    return table

def write_dicts_to_csv(table, filename, fieldnames):
    """Write a list of dictionaries to a CSV file on disk."""
    with open(filename, 'w', newline='', encoding='utf-8') as csv_file_object:
        writer = csv.DictWriter(csv_file_object, fieldnames=fieldnames)
        writer.writeheader()
        for row in table:
            writer.writerow(row)

def read_dicts_from_github_csv(organization_name, repo_name, path_to_directory, filename):
    """Read from a CSV file in GitHub into a list of dictionaries (representing a table)."""
    path = path_to_directory + filename
    r = requests.get('https://raw.githubusercontent.com/' + organization_name + '/' + repo_name + '/master/' + path)
    file_text = r.text.split('\n')
    file_rows = csv.DictReader(file_text)
    table = []
    for row in file_rows:
        table.append(row)
    return table

def write_dicts_to_string(table, fieldnames):
    """Write a list of dictionaries to a CSV file using filestream."""
    output = io.StringIO()
    writer = csv.DictWriter(output, fieldnames=fieldnames)
    writer.writeheader()
    for row in table:
        writer.writerow(row)
    return output.getvalue()

# LIST OF LISTS FUNCTIONS

def read_lists_from_github_csv(organization_name, repo_name, path_to_directory, filename):
    """Read from a CSV file in GitHub into a list of lists (representing a table)."""
    path = path_to_directory + filename
    r = requests.get('https://raw.githubusercontent.com/' + organization_name + '/' + repo_name + '/master/' + path)
    file_text = r.text.split('\n')
    # remove any trailing newlines
    if file_text[len(file_text)-1] == '':
        file_text = file_text[0:len(file_text)-1]
    file_rows = csv.reader(file_text)
    table = []
    for row in file_rows:
        table.append(row)
    return table

def write_lists_to_csv(file_name, array):
    """Write a list of lists to a CSV file on disk."""
    with open(file_name, 'w', newline='', encoding='utf-8') as file_object:
        writer_object = csv.writer(file_object)
        for row in array:
            writer_object.writerow(row)

def write_lists_to_string(table):
    """Write a list of lists to a CSV file using filestream."""
    output = io.StringIO()
    writer = csv.writer(output)
    for row in table:
        writer.writerow(row)
    return output.getvalue()

# -----------------
# functions for interacting with GitHub
# -----------------

# value of directory should be either 'home' or 'working'
def load_credential(filename, directory):
    cred = ''
    # to change the script to look for the credential in the working directory, change the value of home to empty string
    if directory == 'home':
        home = str(Path.home()) #gets path to home directory; supposed to work for Win and Mac
        credential_path = home + '/' + filename
    else:
        directory = 'working'
        credential_path = filename
    try:
        with open(credential_path, 'rt', encoding='utf-8') as file_object:
            cred = file_object.read()
    except:
        print(filename + ' file not found - is it in your ' + directory + ' directory?')
        exit()
    return(cred)

# pass in an empty string for organization_name to use an individual account
# pass in an empty string for github_username to use a token instead of username login
def login_get_repo(repo_name, github_username, organization_name, organization_is_user, cred_directory):
    if github_username == '':
        token = load_credential('linked-data_github_token.txt', cred_directory)
        g = Github(login_or_token = token)
    else:
        pwd = load_credential('pwd.txt', cred_directory)
        g = Github(github_username, pwd)
    
    if organization_is_user:
        # this option accesses a user's repo instead of an organizational one
        # In this case, the value of organization_name is not used.
        user = g.get_user()
        repo = user.get_repo(repo_name)
    else:
        # this option creates an instance of a repo in an organization
        # to which the token creator has push access
        organization = g.get_organization(organization_name)
        repo = organization.get_repo(repo_name)
    return(repo)

def get_user_list(repo):
    person_list = []
    people = repo.get_collaborators()
    for person in people:
        person_list.append(person.login)
    return person_list

def get_file_sha(account, repo, file_path):
    # get the data about the file to get its blob SHA

    r = requests.get('https://api.github.com/repos/' + account + '/' + repo + '/contents/' + file_path)
    file_data = r.json()
    try:
        sha = file_data['sha']
    except:
        # if the file doesn't already exist on GitHub, no sha will be returned
        sha = ''
    return sha

# use this function to update an existing text file
def update_file(repo, account, repo_name, path_to_directory, filename, content):
    path = path_to_directory + filename
    commit_message = 'Update ' + filename + ' file via API'
    sha = get_file_sha(account, repo_name, path)
    if sha == '':
        response = repo.create_file(path, commit_message, content)
    else:
        response = repo.update_file(path, commit_message, content, sha)
    return response

# -----------------
# functions for interacting with the YouTube API
# -----------------

def youtube_authenticate():
    """Performs authentication with Google's API.
    
    Notes
    -----
    Stores the access tokens locally in pickled form.
    If the pickle token doesn't exist or is expired, performs an authorization with user via webpage.
    """
    # Disable OAuthlib's HTTPs verification when running locally.
    # *DO NOT* leave this option enabled when running in production.
    os.environ["OAUTHLIB_INSECURE_TRANSPORT"] = '1'
    api_service_name = 'youtubeAnalytics'
    api_version = 'v2'
    scopes = ['https://www.googleapis.com/auth/youtube.readonly']
    creds = None
    # The file token.pickle stores the user's access and refresh tokens, and is
    # created automatically when the authorization flow completes for the first time
    if os.path.exists(pickle_file_path):
        with open(pickle_file_path, 'rb') as token:
            creds = pickle.load(token)
    # if there are no (valid) credentials availablle, let the user log in.
    if not creds or not creds.valid:
        if creds and creds.expired and creds.refresh_token:
            creds.refresh(Request())
        else:
            flow = InstalledAppFlow.from_client_secrets_file(client_secrets_file_path, scopes)
            creds = flow.run_local_server(port=0)
        # save the credentials for the next run
        with open(pickle_file_path, 'wb') as token:
            pickle.dump(creds, token)

    return build(api_service_name, api_version, credentials=creds)

# -----------------
# High level functions
# -----------------

def built_video_filter_string(video_metadata_filename):
    """Load video data and build the filter string from video IDs.
    
    Notes
    -----
    Under the v2 API, the video filter can send up to 500 IDs. 
    As of 2021-02-02, I have 268 videos I'm tracking, so at some point 
    this may have to be broken into two or more API calls.
    """
    metadata = read_dicts_from_github_csv(organization_name, repo_name, path_to_directory, video_metadata_filename)
    filter_string = 'video=='
    output_header_list = ['date']
    count = 0
    for video in metadata:
    #for video in metadata[0:5]: # switch to this line for testing
        count += 1
        if count > 500:
            print('Warning: limit of 500 videos exceeded! Modify the script.')
            break
        filter_string += video['id'].strip() + ','
        output_header_list.append(video['id'].strip())
    # remove final trailing comma
    filter_string = filter_string[:len(filter_string)-1]
    return filter_string, output_header_list

def get_youtube_usage_stats(todays_date_utc, filter_string):
    """Perform the call to the YouTube Analytics API."""
    print('sending request to YouTube Analytics API')
    
    result = youtube.reports().query(
        ids='channel==MINE',
        startDate='2013-01-01', # don't have any videos dated earlier than that
        endDate=todays_date_utc,
        metrics='estimatedMinutesWatched,views',
        filters=filter_string,
        dimensions='video'
    ).execute()
        
    #print(json.dumps(result, indent=2))
    print('done retrieving data from YouTube API')
    return result

# The revised tables are then pushed to GitHub
# The first column must be the date.
def add_data_to_tables():
    """Retrieve data for counts and minutes, then append row to table, push revised table to GitHub
    
    Notes
    -----
    The first column must be the date.
    """
    todays_date_utc = generate_utc_date()
    filter_string, output_header_list = built_video_filter_string('video-metadata.csv')
    
    minutes_table = read_lists_from_github_csv(organization_name, repo_name, path_to_directory, 'total_minutes_watched.csv')
    views_table = read_lists_from_github_csv(organization_name, repo_name, path_to_directory, 'total_views.csv')

    # Check to make sure that there are the same number of videos in the metadata list and the tables
    # If not, nothing happens
    if len(minutes_table[0]) != len(output_header_list):
        print('minutes table:', len(minutes_table[0]), ' header list:', len(output_header_list))
        print('Warning! Minutes table does not have the same number of videos as the videos metadata table!')
        return
    
    if len(views_table[0]) != len(output_header_list):
        print('views table:', len(views_table[0]), ' header list:', len(output_header_list))
        print('Warning! Views table does not have the same number of videos as the videos metadata table!')
        return
    
    tries = 0
    success = False

    # try to acquire the data for an hour
    while (success == False) and (tries < 12):
        try:
            results = get_youtube_usage_stats(todays_date_utc, filter_string)
            api_data = results['rows']
            #print(api_data)

            #dictionary = get_unit_counts(query)
            success = True
            
            # We start the new row with the date (first column)
            minutes_row = [todays_date_utc]
            views_row = [todays_date_utc]

            # The video IDs from the API data are compared with the column headers from the minutes table.
            # We are assuming that all videos in the video metadata table are found in the column headers.
            for header in output_header_list[1:]: # skip the first item (date)
                found = False
                
                # Step through the API records and match with the header
                for video in api_data:
                    if video[0] == header:
                        found = True
                        minutes_row.append(str(video[1]))
                        views_row.append(str(video[2]))
                        break
                # In the case where the videos metadata table has a video not in the API results, it's added as a blank cell
                if not found:
                    minutes_row.append('0')
                    views_row.append('0')
        except:
            tries += 1
            sleep(300) # wait 5 minutes and try again

    if success:
        # log into the GitHub API and create a repo instance
        repo = login_get_repo(repo_name, github_username, organization_name, organization_is_user, cred_directory)
        
        minutes_table.append(minutes_row)
        rawCsvText = write_lists_to_string(minutes_table)
        response = update_file(repo, organization_name, repo_name, path_to_directory, 'total_minutes_watched.csv', rawCsvText)
        print('minutes response: ')
        print(response)

        views_table.append(views_row)
        rawCsvText = write_lists_to_string(views_table)
        response = update_file(repo, organization_name, repo_name, path_to_directory, 'total_views.csv', rawCsvText)
        print('views response: ')
        print(response)

        # Update the date last run
        response = update_file(repo, organization_name, repo_name, path_to_directory, 'last_run.txt', generate_utc_date() )
        print('done')

        #write_lists_to_csv('total_minutes_watched_test.csv', minutes_table)
        #write_lists_to_csv('total_views_test.csv', views_table)
    return


In [None]:
print('Authentication done:', datetime.datetime.utcnow().isoformat())
# authenticate to YouTube API
youtube = youtube_authenticate()


In [None]:
while True: # infinite loop
    try:
        print('Time checked:', datetime.datetime.utcnow().isoformat())

        date_last_run = read_string_from_github_file(organization_name, repo_name, path_to_directory, 'last_run.txt')
        print('Date last run:', date_last_run)

        date_now_utc = generate_utc_date()
        print('UTC date now is:', date_now_utc)

        #if date_now_utc > date_last_run:
        if True:
            add_data_to_tables()
        print()
        # wait an hour before checking again
        sleep(3600)
    except Exception as ex:
        print('Error occurred, trying again in 10 minutes')
        print(type(ex).__name__, ex.args)
        sleep(600)
        continue
