# YouTube Data API v3

In [2]:
import requests
import json

import pandas as pd

# api_key is stored in config.py
import config

### 1 - Youtube Search (query, n)
    Returns list of channel dictionaries
### 2 - Youtube Channel List
    Returns details on a specific channel
#### A - Request channel details (channelId)
    Returns a json dictionary for a specific channel Id
#### B - Run Channel List (list of channelIds)
    Returns a list of dictionaries per channel

Search method iterating using tokens to get more than 50 results. 

Uses requests to grab type=channel, part=snippet, order=?;

Creates an empty list, extends the list with the json's reponse items (50 at a time).

In [43]:
# Search for channels by a specific query
def request_youtube_search_channels(query, n):
    '''Returns a list of n channels that match the query.\n
    Uses /youtube/v3/search'''
    
    # Logic to determine maxResults parameter
    
    # Empty list to store 50 items from each response
    channel_list = list()
    
    nextPageToken = ''

    while len(channel_list)<n:
        # Debugging
        print(len(channel_list), n)
        
        # Logic for determining maxResults
        
        # Create request object
        resp = requests.get(
            'https://www.googleapis.com/youtube/v3/search',
            params=dict(part='snippet', 
                        type='channel',
                        maxResults=20,
                        pageToken=nextPageToken,
                        q=query,
                        key=config.api_key)
        )
        nextPageToken = json.loads(resp.content)['nextPageToken']
        channel_list.extend(json.loads(resp.content)['items'])
        
    return channel_list

CHANNEL_LIST = request_youtube_search_channels('Corridor',20)

# Extract Channel Ids from the list of dictionaries
CHANNELID_LIST = [channel['snippet']['channelId'] for channel in CHANNEL_LIST]

0 20


Function to retrieve channel details by channelId. Returns the entire dictionary from the json response object.

In [23]:
# Get channel details and snippet
def request_channel_list(channelId):
    resp = requests.get(
        'https://www.googleapis.com/youtube/v3/channels',
        params=dict(part='contentDetails, snippet, statistics, brandingSettings, topicDetails, status, id, contentOwnerDetails',
                id=channelId,
                key=config.api_key),
    )
    return json.loads(resp.content)
# Debug
# a = request_channel_list('UCsn6cjffsvyOZCZxvGoJxGg') # spaceX UCtI0Hodo5o5dUb67FeUjDeA # James Hoffman UCMb0O2CdPBNi-QqPk5T3gsQ

In [31]:
# Returns a list of json dictionaries for a given list of channelIds
# Iteratively runs requests for channel details and retrieves detail items

def run_channel_list(channelId_list):
    
    # Instantiate empty channel details list
    channel_details_list = list()
    
    # Loop over each channelId in channelId_list
    for channelId in channelId_list:
        
        # Extend channel details list with each channels json dictionary
        channel_details_list.extend(request_channel_list(channelId)['items'])
        
    return channel_details_list

CHANNEL_DETAILS_LIST = run_channel_list(CHANNELID_LIST)

In [95]:
# Returns a list of dictionaries ready to be loaded into a dataframe
# Transforms a list of json dictionaries into workable dictionaries for dataframe

def extract_channel_details(details_list):
    # Instantite empty channels details list for new dictionary format
    channel_details_list = list()
    
    # Loop over each channel's json dictionary in details_list
    for channel in details_list:
        # Instantiate new details dictionary; in series update the dictionary to include relevant details
        channel_details_dict = {}
        channel_details_dict.update(channel['snippet'])
        channel_details_dict.update(channel['contentDetails'])
        channel_details_dict.update(channel['topicDetails'])
        channel_details_dict.update(channel['status'])
        channel_details_dict.update(channel['statistics'])
        channel_details_dict.update(channel['brandingSettings']['channel'])
        
        # Append the added channel's new dictionary format to channel details list
        channel_details_list.append(channel_details_dict)
    return channel_details_list

extract_channel_details(CHANNEL_DETAILS_LIST)

Need to change this to extract from a list of more than 50 dictionaries rather than a response object

Everything below this is antiquated

In [None]:
# Function to return a dataframe from a response object
def extract_youtube_search():

    # Select keys to use from searchResult object
    keys_list = ['publishedAt','channelId','title','description','channelTitle','publishTime']
    
    # empty channel list
    channel_list = list()
    
    # loop over the list of dictionaries and extract 
    for entry in range(0,len(json.loads(RESP.content)['items'])):
        channel_list.append({key:json.loads(RESP.content)['items'][entry]['snippet'][key] for key in keys_list})
    # create a dataframe of each snippet
    df = pd.DataFrame(channel_list)
    return df

CHANNEL_DF = extract_youtube_search()
CHANNEL_DF