-
Notifications
You must be signed in to change notification settings - Fork 0
/
scraper.py
110 lines (98 loc) · 2.38 KB
/
scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
from threading import Thread
import youtube_dl
import stream
import os
ydl_opts = {
'format': 'bestaudio/best',
'postprocessors': [{
'key': 'FFmpegExtractAudio',
'preferredcodec': 'mp3',
'preferredquality': '196',
}],
}
class Scraper:
def __init__(self):
self.ydl = youtube_dl.YoutubeDL(ydl_opts)
def get_metadata(self, url): # url should be youtube link
with self.ydl:
result = self.ydl.extract_info(
url,
download=False # We just want to extract the info
)
self.ydl.download([url])
if "entries" in result:
# Can be a playlist or a list of videos
video = result["entries"][0]
else:
# Just a video
video = result
# print(video)
for result in video["formats"]:
if result["fps"] is None:
metadata = result
# print(metadata)
for file in os.listdir():
if file.endswith(".mp3"):
try:
os.rename(file, "song.mp3")
except FileExistsError:
os.remove("song.mp3")
os.rename(file, "song.mp3")
return metadata
def download(self, url, filename):
# Ensures that the link will be the direct download
# url = self.get_metadata(url)["url"] if (not "googlevideo.com" in url and) else url
threaded_download = Thread(target=stream.get_stream, args=(url, filename))
threaded_download.start()
if __name__ == "__main__":
scraper = Scraper()
url = "https://youtu.be/cB4dYfFgaME"
url = scraper.get_metadata(url)["url"] if "googlevideo.com" not in url else url
print(url)
scraper.download(url, filename="song.m4a")
# formats = [
# {
# "asr": 48000,
# "filesize": 59570,
# "format_id": "249",
# "tbr": 48.425,
# "ext": "webm",
# "acodec": "opus",
# "abr": 48.425,
# "container": "webm_dash",
# "format": "249 - audio only (tiny)"
# },
# {
# "asr": 48000,
# "filesize": 77899,
# "format_id": "250",
# "tbr": 63.326,
# "ext": "webm",
# "acodec": "opus",
# "abr": 63.326,
# "container": "webm_dash",
# "format": "250 - audio only (tiny)"
# },
# {
# "asr": 48000,
# "filesize": 142292,
# "format_id": "251",
# "tbr": 115.672,
# "ext": "webm",
# "acodec": "opus",
# "abr": 115.672,
# "container": "webm_dash",
# "format": "251 - audio only (tiny)"
# },
# {
# "asr": 44100,
# "filesize": 157753,
# "format_id": "140",
# "tbr": 127.58,
# "ext": "m4a",
# "acodec": "mp4a.40.2",
# "abr": 127.58,
# "container": "m4a_dash",
# "format": "140 - audio only (tiny)"
# }
# ]