1+ import requests
2+ from bs4 import BeautifulSoup as bs
3+
4+
5+ def get_video_info (url ):
6+ # download HTML code
7+ content = requests .get (url )
8+ # create beautiful soup object to parse HTML
9+ soup = bs (content .content , "html.parser" )
10+ # initialize the result
11+ result = {}
12+ # video title
13+ result ['title' ] = soup .find ("span" , attrs = {"class" : "watch-title" }).text .strip ()
14+ # video views (converted to integer)
15+ result ['views' ] = int (soup .find ("div" , attrs = {"class" : "watch-view-count" }).text [:- 6 ].replace ("," , "" ))
16+ # video description
17+ result ['description' ] = soup .find ("p" , attrs = {"id" : "eow-description" }).text
18+ # date published
19+ result ['date_published' ] = soup .find ("strong" , attrs = {"class" : "watch-time-text" }).text
20+ # number of likes as integer
21+ result ['likes' ] = int (soup .find ("button" , attrs = {"title" : "I like this" }).text .replace ("," , "" ))
22+ # number of dislikes as integer
23+ result ['dislikes' ] = int (soup .find ("button" , attrs = {"title" : "I dislike this" }).text .replace ("," , "" ))
24+ # channel details
25+ channel_tag = soup .find ("div" , attrs = {"class" : "yt-user-info" }).find ("a" )
26+ # channel name
27+ channel_name = channel_tag .text
28+ # channel URL
29+ channel_url = f"https://www.youtube.com{ channel_tag ['href' ]} "
30+ # number of subscribers as str
31+ channel_subscribers = soup .find ("span" , attrs = {"class" : "yt-subscriber-count" }).text .strip ()
32+ result ['channel' ] = {'name' : channel_name , 'url' : channel_url , 'subscribers' : channel_subscribers }
33+ return result
34+
35+ if __name__ == "__main__" :
36+ import argparse
37+ parser = argparse .ArgumentParser (description = "YouTube Video Data Extractor" )
38+ parser .add_argument ("url" , help = "URL of the YouTube video" )
39+
40+ args = parser .parse_args ()
41+ # parse the video URL from command line
42+ url = args .url
43+
44+ data = get_video_info (url )
45+
46+ # print in nice format
47+ print (f"Title: { data ['title' ]} " )
48+ print (f"Views: { data ['views' ]} " )
49+ print (f"\n Description: { data ['description' ]} \n " )
50+ print (data ['date_published' ])
51+ print (f"Likes: { data ['likes' ]} " )
52+ print (f"Dislikes: { data ['dislikes' ]} " )
53+ print (f"\n Channel Name: { data ['channel' ]['name' ]} " )
54+ print (f"Channel URL: { data ['channel' ]['url' ]} " )
55+ print (f"Channel Subscribers: { data ['channel' ]['subscribers' ]} " )
0 commit comments