Skip to content

Commit e8dce9b

Browse files
authored
Merge pull request #122 from 2hands10fingers/patch-4
Create general_scraper.py
2 parents 1ea1dc9 + ff9004c commit e8dce9b

File tree

1 file changed

+31
-0
lines changed

1 file changed

+31
-0
lines changed

Automation/src/general_scraper.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
from bs4 import BeautifulSoup as bs
2+
from requests import get
3+
4+
class Scraper:
5+
6+
def __init__(self, url, parser=None, headers=None):
7+
self.url = url
8+
self.parser = parser
9+
self.headers = headers
10+
11+
def headers():
12+
ua_one = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) '
13+
ua_two = 'AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'
14+
headers = {'User-Agent': ua_one + ua_two}
15+
return headers
16+
17+
def get_piece(self, element, attribute, attribute_name=None, operation=None):
18+
header = Scraper.headers() if self.headers == None else self.headers
19+
parser = 'lxml' if self.parser == None else self.parser
20+
soup = bs(get(self.url, headers=header).text, parser)
21+
result = soup.find_all(element, { attribute : attribute_name})
22+
23+
return operation(result) if operation != None else result
24+
25+
26+
# EXAMPLE
27+
def lprint(this):
28+
for i in this:
29+
print(i.img)
30+
31+
Scraper('https://www.elfenix.com/').get_piece("div", attribute="class", attribute_name="coleql_height", operation=lprint)

0 commit comments

Comments
 (0)