# Youtube Data API

In [None]:
!pip install google-api-python-client

In [2]:
import json
from googleapiclient.discovery import build

In [3]:
DEVELOPER_KEY = json.load(open('../keys/youtube_key.json'))['api_key']
YOUTUBE_API_SERVICE_NAME = 'youtube'
YOUTUBE_API_VERSION = 'v3'

youtube = build(
    YOUTUBE_API_SERVICE_NAME,
    YOUTUBE_API_VERSION,
    developerKey=DEVELOPER_KEY
)


In [4]:
# Get Channel object

# 1. First find Channel ID by Channel Name

request = youtube.search().list(
        part="snippet",
        maxResults=5,
        q="Barbascura eXtra",
        type="channel"
    )

response = request.execute()

channelId = response['items'][0]['id']['channelId']

channelId

'UCHi6Q3Z-5oJUC691WLlSntA'

In [5]:
# 2. Then fetch the information we need about it

request = youtube.channels().list(
    part="snippet,contentDetails",
    id= channelId
    )
response = request.execute()
uploaded_playlist = response['items'][0]['contentDetails']['relatedPlaylists']['uploads']
uploaded_playlist

'UUHi6Q3Z-5oJUC691WLlSntA'

In [6]:
channel_videos : list = list()

page_token = None

while True:
    request = youtube.playlistItems().list(
            part="snippet,contentDetails",
            maxResults=50,
            playlistId=uploaded_playlist,
            pageToken = page_token
    )

    response = request.execute()

    channel_videos.extend(
        v['contentDetails']['videoId']
        for v in response['items']
    )

    if response.get('nextPageToken', None) is None:
        break

    page_token = response['nextPageToken']

len(channel_videos)

381

In [8]:
import time

# Get Comments for Every Video
commentators : dict[str, set[str]] = {}

for video_id in channel_videos:

    try:
        page_token = None

        video_commentators : set = set()

        while True:
            request = youtube.commentThreads().list(
                part="snippet",
                videoId=video_id,
                maxResults=100,
                pageToken = page_token
            )

            response = request.execute()

            video_commentators.update(
                ct['snippet']['topLevelComment']['snippet']['authorDisplayName']
                for ct in response['items']
            )

            if response.get('nextPageToken', None) is None:
                break

            page_token = response['nextPageToken']

        time.sleep(0.1)
        commentators[video_id] = video_commentators
    except Exception as e:
        commentators[video_id] = set()



In [9]:
len(commentators)

381

In [55]:
# Now use networkx to create a Network:
#   * Nodes are the Videos
#   * The Edge (a, b) is present if a and b share some commentators

In [10]:
# Write Weighted Edge List to a File

nodes_list = list(commentators.keys())
edges_list = []

for i in range(0, len(nodes_list)):
    video1 = nodes_list[i]
    for j in range(i+1, len(nodes_list)):
        video2 = nodes_list[j]

        common_commentators = commentators[video1] & commentators[video2]

        if len(common_commentators) > 5:
            edges_list.append((video1, video2, len(common_commentators)))


with open('../data/barbascurax_commenters.edges', 'w') as f:
    f.write('\n'.join("%s %s %d" % edge for edge in edges_list))


In [12]:
# Build the Network

import networkx as nx

G = nx.read_weighted_edgelist(
    '../data/barbascurax_commenters.edges',
)

G.order(), G.size()

(376, 28603)

In [14]:
# Draw the Network

pos=nx.spring_layout(G)

nx.draw(
    G,
    pos=pos,
    node_color="lightblue",
    edge_color="gray"
)

ModuleNotFoundError: No module named 'matplotlib'

In [15]:
nx.write_gexf(G, '../data/barbascurax_commenters.gexf')