In [7]:
# imports
from youtube_transcript_api import YouTubeTranscriptApi
import googleapiclient.discovery
import googleapiclient.errors
import csv

In [1]:
import json
# Load the secrets from the JSON file
with open('client_secrets.json', 'r') as file:
    secrets = json.load(file)

# Set up the API key and YouTube API client
api_key = secrets['api_key']  

In [2]:
# Constants
API_SERVICE_NAME = "youtube"
API_VERSION = "v3"
API_KEY = api_key

VIDEO_CSV_FILE = "youtube_shorts_description.csv"
VIDEO_LIST = ["l9_8_pDTmis","QYEfTly0pTE","jYJTPqU66IY","dBsomKKHhtk","dTLYweJ08Tg","k9v_bsZUQRg","Js6ZUBSW6s0","1AY9Sqt7yCg","f8a2tiHatCc","bnem7I5UkaA","aFJ1ThX8XHU","n7x4Jj9pdH8","LdoJnz_ZQyU","m5uJjHV_eVs","xN5OsH0UCmo","KiEErvcX_qo","NLvfrxL3YGA","nK-Hy0TxIik","yWJVX9MKrUM","d2EPEgWPn8Y"]
# OAuth Credentials can be created here: https://console.cloud.google.com/apis/credentials
# YouTube API needs to be activated before use here: https://console.developers.google.com/apis/api/youtube.googleapis.com/overview

In [3]:
class YTVideo:
    def __init__(self, videoId=""):
        self.youtubeClient = self.getYoutubeAPICLient()
        
        self.videoId = videoId
        self.transcript, self.duration = self.extractTranscript()
       
        videoInfo = self.getVideoInfo()
        self.title = videoInfo['snippet']['title']
        self.description = videoInfo['snippet']['description']
        self.thumbnailURL = videoInfo['snippet']['thumbnails']['maxres']['url']
        self.channelTitle = videoInfo['snippet']['channelTitle']
        self.publishedAt = videoInfo['snippet']['publishedAt']
        self.views = videoInfo['statistics']['viewCount']
        self.likes = videoInfo['statistics']['likeCount']
        self.commentCount = videoInfo['statistics']['commentCount']
        self.top10comments = self.getTopComments()
        self.category = self.getCategoryByID(videoInfo['snippet']['categoryId'])
        return

    def getYoutubeAPICLient(self):
        return googleapiclient.discovery.build(API_SERVICE_NAME, API_VERSION, developerKey=API_KEY)
    
    # extract transcript and transform it to string
    def extractTranscript(self):
        transcript = ""
        duration = 0
        transcriptList = YouTubeTranscriptApi.get_transcript(self.videoId)
        for t in transcriptList:
            transcript += f"{t['text']} "
            duration += t['duration']
        return transcript, duration

    # extract video info from YT API
    def getVideoInfo(self):
        response = self.youtubeClient.videos().list(part="snippet,contentDetails,statistics", id=self.videoId).execute()
        return response['items'][0]
        
    def getCategoryByID(self, categoryID):
        response = self.youtubeClient.videoCategories().list(part="snippet", id=categoryID).execute()
        return response['items'][0]['snippet']['title']

    def getTopComments(self):
        response = self.youtubeClient.commentThreads().list(part="snippet", order="relevance", maxResults=10, videoId=self.videoId).execute()
        comment_list = []
        for comment in response['items']:
            comment_list.append(comment['snippet']['topLevelComment']['snippet']['textDisplay'])
        return comment_list

In [4]:
# receives a list of video IDs and generates a csv file with available information about the video
def generate_csv(vidList):
    # open csv and create csv writer
    with open(VIDEO_CSV_FILE, 'w') as csvfile:
        writer = csv.writer(csvfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)

        # extract info from YT API and write to csv file
        writer.writerow(["Video ID", "Video Title", "Channel Title", "Transcript", "Duration", "Words per Second", "Top10 Comments", "Category"])
        for vidID in vidList:
            video = YTVideo(vidID)
            writer.writerow([video.videoId, video.title.encode("utf-8"), video.channelTitle, video.transcript, video.duration, len(video.transcript.split(" "))/video.duration, video.top10comments, video.category])
    return

In [132]:
video = YTVideo("l9_8_pDTmis")

In [8]:
generate_csv(VIDEO_LIST)