In [None]:
%pip install requests
%pip install youtube-dl


In [None]:
import os
import sys
import json
import requests
import time

In [None]:
API_KEY = "AIzaSyAwhEJyDJ02IIIrcgZo4GLU7M7_MaoqL3E"

In [None]:
base_url = "https://youtube.googleapis.com/youtube/v3/"
headers = {'Accept': 'application/json'}

In [None]:
def read_json_from_file(directory, filename):
    with open(os.path.join(directory, filename), encoding="utf-8") as json_file:
        data = json.load(json_file)
        
    return data

In [None]:
def write_json_to_file(directory, data, filename=None):
    if not os.path.exists(directory):
        os.makedirs(directory)
        
    if (filename is None):
        filename = '{0}.json'.format(time.time())

    with open(os.path.join(directory, filename), 'w', encoding="utf-8") as f:
        json.dump(data, f, ensure_ascii=False)

In [None]:
def write_video_ids_to_url_file(directory, video_ids):
    if not os.path.exists(directory):
        os.makedirs(directory)

    filename = '{0}.csv'.format(time.time())
    
    file_path = os.path.join(directory, filename)
    
    with open(file_path, 'w', encoding="utf-8") as f:
        for item in video_ids:
            f.write("http://www.youtube.com/watch?v={0}".format(item))
            f.write("\r\n")
            
    return file_path

In [None]:
def search(keyword, next_page_token):
    part = "snippet"
    max_results = 50
    res_type = "video"
    
    url = '{0}search?part={1}&maxResults={2}&type={3}&q={4}&key={5}'.format(
        base_url, 
        part,
        max_results, 
        res_type,
        keyword, 
        API_KEY
    )
    
    if (next_page_token is not None):
        url += '&pageToken={0}'.format(next_page_token)
    
    response = requests.get(url, headers=headers)
    
    if response.status_code == 200:
        return json.loads(response.content.decode('utf-8'))
    else:
        raise Exception("HTTP not OK: {0}".format(response.content))

In [None]:
def get_video_ids(search_data):
    
#     Initialize list
    video_ids = set()

#     Parse JSON
    for search_result in search_data["items"]:
        video_ids.add(search_result["id"]["videoId"]);

    return video_ids

In [None]:
def get_video_details(video_ids):
    
    csv_video_ids = ",".join(video_ids);
    
    part = "snippet,contentDetails"
    url = "{0}videos?part={1}&id={2}&key={3}".format(
        base_url,
        part,
        csv_video_ids,
        API_KEY
    )
    
    response = requests.get(url, headers=headers)
    
    if response.status_code == 200:
        return json.loads(response.content.decode('utf-8'))["items"]
    else:
        raise Exception("HTTP not OK: {0}".format(response.content))

In [None]:
def youtube_downloader(url_file_path):
    # Invoke external YouTube downloader with YouTube URL file
    bashCommand = "youtube-dlc -a {0} -j > output_video_traces.temp".format(url_file_path)

    print("-->Running youtube-dlc: {0}".format(bashCommand))

    !{bashCommand}
    
    # Open output_video_details file and make a proper array out of it
    with open("output_video_traces.temp", encoding="utf-8") as temp_file:
        json_strings = temp_file.readlines()

        json_array = []

        for json_str in json_strings:
            json_array.append(json.loads(json_str))

        # Save json array to disk
        write_json_to_file("traces", json_array)

In [None]:
def do_work(search_term, page_limit, video_ids_cache):
    
    # Count previously found videos
    initial_video_ids_cache_size = len(video_ids_cache)
    
    next_page_token = None
    
    for i in range(page_limit):
        
        # Search YouTube API
        search_result = search(search_term, next_page_token)
        # Save search result
        write_json_to_file("searches", search_result)
        
        # Parse search result to get video ids
        found_video_ids = get_video_ids(search_result)
        
        # Find which found IDs are not duplicates
        unique_video_ids = found_video_ids.difference(video_ids_cache)
        
        # Obtain video details (including projection method)
        video_details = get_video_details(unique_video_ids)
        # Save video details
        #write_json_to_file("video_details", list(unique_video_ids))
        
        # Find which of our video IDs actually refer to 360 videos
        for video_details_item in video_details:
            if (video_details_item['contentDetails']['projection'] != "360"):
                # This is not a 360 video; remove it from the set
                unique_video_ids.remove(video_details_item['id'])
        
        # We now have a local set (unique_video_ids) which contains unique 360 video IDs
        
        # Check if our unique video ids set is not empty
        if (len(unique_video_ids) != 0):
            # Save video IDs as URLs to hand to external YouTube Downloader
            url_file_path = write_video_ids_to_url_file("videos", unique_video_ids)

            # Add found video IDs to our set (no duplicates)
            video_ids_cache.update(unique_video_ids)

            # Download video traces for the URLs found
            print("->Downloading [{0}] 360-degree video traces...".format(len(unique_video_ids)))
            youtube_downloader(url_file_path)
        
        print("Processed [{0}/{1}] search pages. Found and stored [{2}] video traces so far.".format(
            i + 1,
            page_limit,
            len(video_ids_cache) - initial_video_ids_cache_size))
        
        # Check if there is a next page available
        if ('nextPageToken' in search_result):
            # Update next_page_token for subsequent searches
            next_page_token = search_result['nextPageToken']
        elif (i < page_limit - 1):
            # There are no more search results for us after this
            print("No more search results to explore for this search term: \"{0}\". Exiting...".format(search_term))
            break
        
    print("Processed and stored a total of {0} new 360-degree video traces.".format(
        len(video_ids_cache) - initial_video_ids_cache_size))
    
    return video_ids_cache

In [None]:
# Keep track of what (unique) video IDs we have found
video_ids_cache = set(read_json_from_file("cache", "found_video_ids.json"))

try:
    
    # Execute the program
    do_work("360 charity", 20, video_ids_cache)
    
except Exception as e:
    print("Execution interrupted: {0}".format(e))

In [None]:
# Store video IDs so that subsequent runs will avoid duplicates
print("Writing found video IDs to cache...")
write_json_to_file("cache", list(video_ids_cache), "found_video_ids.json")

print("Done. Total video traces cached: [{0}]".format(len(video_ids_cache)))