In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import youtubecollector as ytc
import pandas as pd
from tqdm import tqdm_notebook as tqdm

## Youtube client setup

In [None]:
youtube_client = ytc.youtube_client.create_youtube_client("./api.conf")

## Channel Seed
The pipeline starts with a list of channels for which all videos are checked, for which all comments, recommendations and captations are collected

In [None]:
channel_seed_filename = "input/seeds_nl_right.csv"
channel_outputfile = "output/channels_nl_right.csv"

In [None]:
channel_seed_df = pd.read_csv(channel_seed_filename)

channels = ytc.channels.get_channels(channel_seed_df, youtube_client)

ytc.channels.write_channels(channels, channel_outputfile)

## Videos

In [None]:
video_output_file = "output/videos.csv"

In [None]:
all_videos = list()

for channel in tqdm(channels):
    next_page_token = True # to start first while loop
    first_page = True
    
    while next_page_token:
        
        if first:
            response = ytc.video.get_videos(channel.channel_uploads, youtube_client) #without next_page_token
            first = False # so next time uses next_page_token
        else:
            response = ytc.video.get_videos(channel.channel_uploads, youtube_client, next_page_token)
            
        next_page_token = response.get('nextPageToken')

        videos = ytc.video.convert_to_videos(response, youtube_client)
        all_videos.extend(videos)
    
        ytc.video.write_videos(videos, video_output_file)

## Comments

In [None]:
comments_output_file = "output/comments.csv"

In [None]:
for video in tqdm(all_videos):
        response = ytc.comments.get_comments(video.video_id, youtube_client)
        next_page_token = response.get('nextPageToken')
        
        comments = ytc.comments.convert_to_comments(response)
    
        ytc.comments.write_comments(comments_output_file, comments)
    
    

## Recommendations

In [None]:
recommendations_output_file = "../output/recommendations.csv"

In [None]:
video_to_recommendations = dict()
for video in tqdm(all_videos, ):
    response = ytc.recommendations.get_recommendations(video.video_id, youtube_client)
    
    recommendations = ytc.recommendations.convert_to_recommendations(response, video.video_id)
    video_to_recommendations[video.video_id]=recommendations
    
    ytc.recommendations.write_recommendations(recommendations_output_file, recommendations)
    

## Transcripts

In [None]:
transcripts_output_file = "../output/transcripts.csv"

In [None]:
ytc.transcripts.get_captions(all_videos)

In [None]:
video_id_transcripts = ytc.transcripts.extract_transcripts("./*.vtt")

ytc.transcripts.write_transcripts(transcripts_output_file, video_id_transcripts)