-
Notifications
You must be signed in to change notification settings - Fork 1
/
main.py
92 lines (78 loc) · 3.21 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import argparse
import os.path
import time
import pickle
import logging
import yaml
import sys
from linebot import LineBotApi
from linebot.models import TextSendMessage
from linebot.exceptions import LineBotApiError
from fb_scraper.utils.crypto.decrypt import decrypt
from fb_scraper.utils.logger_config import setup_logger
from fb_scraper.scraper import FacebookScraper, Sort
from selenium.common.exceptions import TimeoutException
logger = setup_logger(__name__)
def decrypt_yaml(path):
decrypted_data = decrypt(path)
try:
yaml_data = yaml.safe_load(decrypted_data)
return yaml_data
except yaml.reader.ReaderError:
logger.error("YAML failed to load, may be due to incorrect password or invalid YAML format.")
sys.exit()
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('enc_file', help='Encrypted config file')
parser.add_argument('type', help='Type of post you wish to scrape, "listing" or "post"')
parser.add_argument('-H', '--headless', action='store_true')
args = parser.parse_args()
sort_type = None
headless = args.headless
if args.type == 'listing':
sort_type = Sort.CHRONOLOGICAL_LISTINGS
elif args.type == 'post':
sort_type = Sort.CHRONOLOGICAL
else:
print('Expected --type "listing" or "post"')
sys.exit(1)
enc_file = args.enc_file
if not os.path.exists(enc_file):
print('File not found.')
sys.exit(1)
config = decrypt_yaml(enc_file)
interval = 10
scraper = FacebookScraper(headless=headless)
scraper.login(config['fb_cred']['account'], config['fb_cred']['password'])
line_bot_api = LineBotApi(config['line_bot']['channel_access_token'])
keywords = config['keywords']
prev_post_id = None
while True:
try:
scraper.to_group(config['group_id'], sort_type)
except TimeoutException:
logger.info(f'Waiting {interval} secs...')
time.sleep(interval)
logger.info('Refreshing page...')
continue
latest_post = scraper.fetch_post()
logger.info('Latest post:')
logger.info(latest_post)
if prev_post_id is not None and prev_post_id != latest_post['id']:
logger.info('NEW POST!')
if keywords == '' or any(kw in latest_post['content'] for kw in keywords) or any(
kw in latest_post['listing_text'] for kw in keywords):
logger.info('Keyword found!')
push_message = config['message'].format(url=latest_post['url'], content=latest_post['content'],
listing_text=latest_post['listing_text'])
for user in config['receivers']:
try:
logger.info('Sending message to {}...'.format(user))
line_bot_api.push_message(config['receivers'][user], TextSendMessage(text=push_message))
except LineBotApiError as e:
logger.error(e)
raise e
prev_post_id = latest_post['id']
logger.info(f'Waiting {interval} secs...')
time.sleep(interval)
logger.info('Refreshing page...')