-
Notifications
You must be signed in to change notification settings - Fork 0
/
test_parse.py
executable file
·55 lines (48 loc) · 1.31 KB
/
test_parse.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
#!/usr/bin/env python3
# just try to parse all ffn meta and see what breaks
import sys
from bs4 import BeautifulSoup # type: ignore
import minerva
from minerva import extractFFNDeathCode, FFNFic
from weaver import Web, RemoteWebScraper
import weaver.enc as enc
from oil import oil
from typing import TYPE_CHECKING
if TYPE_CHECKING:
import psycopg2
def plog(msg: str, fname: str = "./test_parse.log") -> None:
with open(fname, 'a') as f:
f.write(msg + '\n')
print(msg)
def testLid(db: 'psycopg2.connection', lid: int) -> None:
url = f'https://www.fanfiction.net/s/{lid}/1'
scraper = RemoteWebScraper(db)
w = scraper.softScrape(url)
assert(w.created is not None)
dec = enc.decode(w.response, url)
if dec is None:
plog(" {url} has unknown encoding")
sys.exit(1)
html = dec[1]
code = extractFFNDeathCode(html)
if code != 0:
plog(f" {url} is freshly dead: {code}")
return
soup = BeautifulSoup(html, 'html5lib')
parser = minerva.ffn.parser.FFNParser()
fic = parser.get(db, lid, w.created // 1000, soup)
print(fic.__dict__)
qlids = [11575324, 13865144]
with oil.open() as db:
for lid in qlids:
testLid(db, lid)
start = 0
start = 286387
start = 5939060
end = 15000000
while start < end:
ei = start + 10000
for fid in FFNFic.getLiveFids(db, start, ei, 1):
print(fid)
testLid(db, fid)
start = ei