Skip to content

Commit 79a66fb

Browse files
Merge pull request avinashkranjan#2906 from jaivsh/master
Added Pixabay Image Scraper
2 parents ec01786 + 9762e15 commit 79a66fb

File tree

2 files changed

+141
-0
lines changed

2 files changed

+141
-0
lines changed

Pixabay Scraper/README.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
## Pixabay Scraper
2+
3+
This scraper class scrapes and returns the images data from the pixabay. The user can input the specific query and the data would be returned to the user in list/JSON format.
4+

Pixabay Scraper/main.py

Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
import requests
2+
3+
4+
class Pixabay():
5+
"""
6+
Class - `Pixabay`\n
7+
8+
| Methods | Details |
9+
| --------------------------- | ---------------------------------------------------------------------------------------------------- |
10+
| `.get_video()` | Downloads the videos from pixaby to the local storage. |
11+
| `.get_photo()` | Downloads the photos from pixaby to local storage. |
12+
13+
14+
"""
15+
def __init__(self, verbose=True):
16+
self.bunch = '38504833-19606430bd8fde504120d1630'
17+
self.name = 'Pixabay'
18+
self.verbose = verbose
19+
20+
def __get_params_video(self, query, num, update_params={}):
21+
22+
"""
23+
Class - `Pixabay`
24+
Example:
25+
```
26+
quora = Pixabay()
27+
quora.get_params_video(query,num)
28+
```
29+
Returns:
30+
```js
31+
{
32+
'q': query given by user,
33+
'video_type': typeof video(default is film),
34+
'orientation': orientation of the video,
35+
'safesearch': prevents adult content,
36+
'per_page': number of videos to be fetched
37+
}
38+
```
39+
"""
40+
params = {
41+
'q': query,
42+
'video_type': 'film',
43+
'orientation': 'horizontal',
44+
'safesearch': 'true',
45+
'per_page': num
46+
}
47+
params.update(update_params)
48+
return params
49+
50+
def get_video(self, query, num=10, params={}):
51+
52+
"""
53+
Class - `Pixabay`
54+
Example:
55+
```
56+
quora = Pixabay()
57+
quora.get_video(query,num)
58+
```
59+
Returns: Downloads num number of videos into local storage.
60+
61+
"""
62+
63+
BASE_URL = 'https://pixabay.com/api/videos/'
64+
_params = self.__get_params_video(query, num, params)
65+
response = requests.get(BASE_URL, params=_params)
66+
67+
data = response.json()
68+
hits = data['hits']
69+
for i, hit in enumerate(hits):
70+
if self.verbose:
71+
print(f" Downloading Pixabay videos {i+1}/{num}")
72+
video_url = hit['videos']['large']['url']
73+
response = requests.get(video_url)
74+
if response is not None:
75+
with open(f'video_pixabay_{i+1:02d}.mp4', 'wb') as f:
76+
f.write(response.content)
77+
78+
def __get_params_photo(self, query, num, update_params={}):
79+
"""
80+
Class - `Pixabay`
81+
Example:
82+
```
83+
quora = Pixabay()
84+
quora.get_params_photo(query,num)
85+
```
86+
Returns:
87+
```js
88+
{
89+
90+
'q': query given by user,
91+
'video_type': type of photo,
92+
'orientation': orientation of the photo,
93+
'safesearch': prevents adult content,
94+
'per_page': number of images to be fetched
95+
}
96+
```
97+
"""
98+
params = {
99+
100+
'q': query,
101+
'image_type': 'photo',
102+
'orientation': 'horizontal',
103+
'safesearch': 'true',
104+
'per_page': num
105+
}
106+
params.update(update_params)
107+
return params
108+
109+
def get_photo(self, query, num=10, params={}):
110+
"""
111+
Class - `Pixabay`
112+
Example:
113+
```
114+
quora = Pixabay()
115+
quora.get_photo(query,num)
116+
```
117+
Returns: Downloads num number of photos into local storage.
118+
119+
"""
120+
121+
BASE_URL = 'https://pixabay.com/api/'
122+
_params = self.__get_params_photo(query, num, params)
123+
response = requests.get(BASE_URL, params=_params)
124+
125+
data = response.json()
126+
hits = data['hits']
127+
for i, hit in enumerate(hits):
128+
if self.verbose:
129+
print(f" Downloading Pixabay photos {i+1}/{num}")
130+
image_url = hit['largeImageURL']
131+
response = requests.get(image_url)
132+
if response is not None:
133+
with open(f'photo_pixabay_{i+1:02d}.jpg', 'wb') as f:
134+
f.write(response.content)
135+
136+
137+

0 commit comments

Comments
 (0)