In [None]:
from googleapiclient.discovery import build
import numpy as np
import pandas as pd

import urllib

from youtube_transcript_api import YouTubeTranscriptApi
import os
import shutil

In [None]:
api_key = 'enter your api key' #generated youtube api key from googgle console
youtube = build('youtube', 'v3', developerKey=api_key)

df=pd.read_csv("tech_youtube_channel.csv")
df

In [None]:
channel_ids=df.squeeze() #converting df to series
print(channel_ids)
print(type(channel_ids))

## Function to get channel statistics

In [None]:
def get_channel_stats(youtube, channel_ids):
    all_data = []
    request = youtube.channels().list(
                part='snippet,contentDetails,statistics',
                id=','.join(channel_ids))
    response = request.execute() 
    
    for i in range(len(response['items'])):
        data = dict(Channel_name = response['items'][i]['snippet']['title'],
                    Subscribers = response['items'][i]['statistics']['subscriberCount'],
                    Views = response['items'][i]['statistics']['viewCount'],
                    Total_videos = response['items'][i]['statistics']['videoCount'],
                    playlist_id = response['items'][i]['contentDetails']['relatedPlaylists']['uploads'])
        all_data.append(data)
    
    return all_data

In [None]:
channel_statistics = get_channel_stats(youtube, channel_ids)
channel_data = pd.DataFrame(channel_statistics)

channel_data['Subscribers'] = pd.to_numeric(channel_data['Subscribers']) #converting to numeric values
channel_data['Views'] = pd.to_numeric(channel_data['Views']) #converting to numeric values
channel_data['Total_videos'] = pd.to_numeric(channel_data['Total_videos']) #converting to numeric values
channel_data['channel_ID']=channel_ids
channel_data.dtypes

In [None]:
channel_data.to_csv('All_Tech_Channel_Details.csv')
channel_data

## Function to get video ids

In [None]:
def get_video_ids(youtube, playlist_id):

    request = youtube.playlistItems().list(
                part='contentDetails',
                playlistId = playlist_id,
                maxResults = 50)
    response = request.execute()

    video_ids = []

    for i in range(len(response['items'])):
        video_ids.append(response['items'][i]['contentDetails']['videoId'])

    next_page_token = response.get('nextPageToken')
    more_pages = True

    while more_pages:
        if next_page_token is None:
            more_pages = False
        else:
            request = youtube.playlistItems().list(
                        part='contentDetails',
                        playlistId = playlist_id,
                        maxResults = 50,
                        pageToken = next_page_token)
            response = request.execute()

            for i in range(len(response['items'])):
                video_ids.append(response['items'][i]['contentDetails']['videoId'])

            next_page_token = response.get('nextPageToken') #to have more than 50 videos(remove the limit)

    return video_ids

def get_video_details(youtube, video_ids):

    all_video_stats = []
    
    for i in range(0, len(video_ids), 50):
        #try:
            request = youtube.videos().list(
                        part='snippet,statistics',
                        id=','.join(video_ids[i:i+50]))
            response = request.execute()
        
            for video in response['items']:
                video_stats = dict(Title = video['snippet']['title'],
                                Published_date = video['snippet']['publishedAt'],
                                Views = video['statistics']['viewCount'],
                                Likes = video['statistics']['likeCount'],
                                Comments = video['statistics']['commentCount']
                                )
                all_video_stats.append(video_stats)
        #except:
           # continue
    return all_video_stats

In [None]:
for k in range(len(channel_data)):
    
    playlist_id = channel_data.loc[channel_data['Channel_name']==channel_data.loc[k]['Channel_name'], 'playlist_id'].iloc[0]
    
    try:
        video_ids = get_video_ids(youtube, playlist_id)
        video_details = get_video_details(youtube, video_ids)
    except:
        continue

    video_data = pd.DataFrame(video_details)
    video_data['Views'] = pd.to_numeric(video_data['Views']) #converting to numeric values
    video_data['Likes'] = pd.to_numeric(video_data['Likes']) #converting to numeric values
    print(video_data)

    video_data['Month'] = pd.to_datetime(video_data['Published_date']).dt.strftime('%b')
    video_data['ID']=video_ids
    video_data=video_data.loc[(video_data['Published_date'] >= '2020-01-01')] #select all videos on and after 1st Jan 2020 
    print(video_data)

    thumbnail=video_data['ID'].values.tolist() #thumbnail is the list of 'ID' in video_data dataframe
    
    path = r"C:\Users\goenk\Desktop\data\tech\{}".format(channel_data.loc[k]['Channel_name']) #making different folders for each youtube channel
    os.mkdir(path)

    #opening each folder to store the thumbnail images, video captions for each video of the channel
    
    for i in thumbnail: 
        try:      

            video_data.to_csv("All_Video_Details.csv") #channel details csv

            videoImage=("https://img.youtube.com/vi/{}/sddefault.jpg".format(i)) #thumbnail photo
            urllib.request.urlretrieve(videoImage, r"C:\Users\goenk\Desktop\data\tech\{}\{}.jpg".format(channel_data.loc[k]['Channel_name'],thumbnail.index(i)))  #thumbnail images moved to channel folder     
            
            srt = YouTubeTranscriptApi.get_transcript(i)  #video caption/transcript/subtitle
            text = ""
            with open("{}.txt".format(thumbnail.index(i)), "w") as file:
                for j in srt:
                    text += j["text"] + " "
                file.write(text)
            
            source_folder = r"C:\Users\goenk\Desktop\data\tech\\"
            destination_folder = r"C:\Users\goenk\Desktop\data\tech\{}\\".format(channel_data.loc[k]['Channel_name']) 
            files_to_move = ["All_Video_Details.csv" , "{}.txt".format(thumbnail.index(i))]

            # iterate files
            for file in files_to_move:
                # construct full file path
                source = source_folder + file
                destination = destination_folder + file
                shutil.move(source, destination) # move file
                print('Moved:', file)

            #os.startfile("{}.txt".format(thumbnail.index(i))) #opens the subtitle file while running the code
        except:       
            print("PROBLEM ENCOUNTERED") 
            continue