forked from bs2kbs2k/calibre-store-standardebooks
-
Notifications
You must be signed in to change notification settings - Fork 0
/
standard_ebooks_description.py
134 lines (112 loc) · 4.51 KB
/
standard_ebooks_description.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
# -*- coding: utf-8 -*-
__license__ = 'GPL 3'
__copyright__ = '2020, Anant Ahuja <anant_ahuja@outlook.com>'
__docformat__ = 'restructuredtext en'
from contextlib import closing
from calibre import browser
from calibre.utils.xml_parse import safe_xml_fromstring
from calibre.utils.opensearch.url import URL
class Description(object):
'''
A class for representing OpenSearch Description files.
'''
def __init__(self, url=""):
'''
The constructor which may pass an optional url to load from.
d = Description("http://www.example.com/description")
'''
if url:
self.load(url)
def load(self, url):
'''
For loading up a description object from a url. Normally
you'll probably just want to pass a URL into the constructor.
'''
br = browser()
with closing(br.open(url, timeout=15)) as f:
doc = safe_xml_fromstring(f.read())
# version 1.1 has repeating Url elements.
self.urls = []
for element in doc.xpath('//*[local-name() = "Url"]'):
template = element.get('template')
type = element.get('type').split(';')[0]
if template and type:
url = URL()
url.template = template
url.type = type
self.urls.append(url)
# Stanza catalogs.
for element in doc.xpath('//*[local-name() = "link"]'):
if element.get('rel') != 'search':
continue
href = element.get('href')
type = element.get('type')
if href and type:
url = URL()
url.template = href
url.type = type
self.urls.append(url)
# this is version 1.0 specific.
self.url = ''
if not self.urls:
self.url = ''.join(
doc.xpath('//*[local-name() = "Url"][1]//text()'))
self.format = ''.join(
doc.xpath('//*[local-name() = "Format"][1]//text()'))
self.shortname = ''.join(
doc.xpath('//*[local-name() = "ShortName"][1]//text()'))
self.longname = ''.join(
doc.xpath('//*[local-name() = "LongName"][1]//text()'))
self.description = ''.join(
doc.xpath('//*[local-name() = "Description"][1]//text()'))
self.image = ''.join(
doc.xpath('//*[local-name() = "Image"][1]//text()'))
self.sameplesearch = ''.join(
doc.xpath('//*[local-name() = "SampleSearch"][1]//text()'))
self.developer = ''.join(
doc.xpath('//*[local-name() = "Developer"][1]//text()'))
self.contact = ''.join(
doc.xpath('/*[local-name() = "Contact"][1]//text()'))
self.attribution = ''.join(
doc.xpath('//*[local-name() = "Attribution"][1]//text()'))
self.syndicationright = ''.join(
doc.xpath('//*[local-name() = "SyndicationRight"][1]//text()'))
tag_text = ' '.join(doc.xpath('//*[local-name() = "Tags"]//text()'))
if tag_text is not None:
self.tags = tag_text.split(' ')
self.adultcontent = doc.xpath(
'boolean(//*[local-name() = "AdultContent" and contains(., "true")])')
def get_url_by_type(self, type):
'''
Walks available urls and returns them by type. Only
appropriate in opensearch v1.1 where there can be multiple
query targets. Returns none if no such type is found.
url = description.get_url_by_type('application/rss+xml')
'''
for url in self.urls:
if url.type == type:
return url
return None
def get_best_template(self):
'''
OK, best is a value judgement, but so be it. You'll get
back either the atom, rss or first template available. This
method handles the main difference between opensearch v1.0 and v1.1
'''
# version 1.0
if self.url:
return self.url
# atom
if self.get_url_by_type('application/atom+xml'):
return self.get_url_by_type('application/atom+xml').template
# rss
if self.get_url_by_type('application/rss+xml'):
return self.get_url_by_type('application/rss+xml').template
# other possible rss type
if self.get_url_by_type('text/xml'):
return self.get_url_by_Type('text/xml').template
# otherwise just the first one
if len(self.urls) > 0:
return self.urls[0].template
# out of luck
return None