In [28]:
# imports
from youtube_transcript_api import YouTubeTranscriptApi
from youtube3 import YoutubeClient
import csv

In [29]:
# Constants
YOUTUBE_AUTH_FILE_LOCATION = "client_secret.json"
VIDEO_CSV_FILE = "youtube_shorts_description.csv"
VIDEO_LIST = ["l9_8_pDTmis","QYEfTly0pTE","jYJTPqU66IY","dBsomKKHhtk","dTLYweJ08Tg","k9v_bsZUQRg","Js6ZUBSW6s0","1AY9Sqt7yCg","f8a2tiHatCc","bnem7I5UkaA","aFJ1ThX8XHU","n7x4Jj9pdH8","LdoJnz_ZQyU","m5uJjHV_eVs","xN5OsH0UCmo","KiEErvcX_qo","NLvfrxL3YGA","nK-Hy0TxIik","yWJVX9MKrUM","d2EPEgWPn8Y"]
# OAuth Credentials can be created here: https://console.cloud.google.com/apis/credentials
# YouTube API needs to be activated before use here: https://console.developers.google.com/apis/api/youtube.googleapis.com/overview

In [30]:
class YTVideo:
    def __init__(self, videoId=""):
        self.videoId = videoId
        self.transcript, self.duration = self.extractTranscript()

        videoInfo = self.getVideoInfo()
        self.title = videoInfo["title"]
        self.description = videoInfo["description"]
        self.thumbnailURL = videoInfo['thumbnails']['maxres']['url']
        self.channelTitle = videoInfo['channelTitle']
        self.publishedAt = videoInfo['publishedAt']
        return

    # extract transcript and transform it to string
    def extractTranscript(self):
        transcript = ""
        duration = 0
        transcriptList = YouTubeTranscriptApi.get_transcript(self.videoId)
        for t in transcriptList:
            transcript += t["text"] + " "
            duration += t["duration"]
        return transcript, duration

    # extract video info from YT API
    def getVideoInfo(self):
        YTClient = YoutubeClient(YOUTUBE_AUTH_FILE_LOCATION)
        video = YTClient.get_video(self.videoId)
        return video['items'][0]['snippet']

In [31]:
# receives a list of video IDs and generates a csv file with available information about the video
def generate_csv(vidList):
    # open csv and create csv writer
    with open(VIDEO_CSV_FILE, 'w') as csvfile:
        writer = csv.writer(csvfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)

        # extract info from YT API and write to csv file
        writer.writerow(["Video ID", "Video Title", "Channel Title", "Transcript", "Duration", "Words per Second"])
        for vidID in vidList:
            video = YTVideo(vidID)
            writer.writerow([video.videoId, video.title, video.channelTitle, video.transcript, video.duration, len(video.transcript.split(" "))/video.duration])
    return

In [32]:
generate_csv(VIDEO_LIST)