-
Notifications
You must be signed in to change notification settings - Fork 1
/
Process.py
343 lines (322 loc) · 12.6 KB
/
Process.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
from functools import wraps
import anyio
import httpx
import Lib
import sys
import os
import time
import re
def initialize(API_DATA_CONFIG):
Lib.init_import(API_DATA_CONFIG["module_name"])
def Data_parse(data):
"""For MODDERS:
1.If you want to add a Link verifier to prevent invalid link error then you have nothing to edit here, however you can modify it to return true all the time if you dont want this feature
2.You can modify the 1st condition on if the target site does not support or does not use numbers to identify their gallery.
3.If you want to use links, be sure that the user has to include http:// or https:// to prevent further problems.
The Lib.Checklink is connected to Lib/NHentai.py (In Default)
"""
if data.isdigit():
data = Lib.CheckLink(data, digit=True)
return(0,data)
elif "http://" in data or "https://" in data:
result,data = Lib.CheckLink(data)
return(result,data)
else:
return(1,"A link should have http:// or https://")
def validatename(func):
"""Info:
Use this as a wrapper to prevent filename complication such as;
"/" is normally used to differentiate directory
"\\" similar usage but for windows.
(Note I used double backslash in source code to escape the backslash character. Read about it: https://www.w3schools.com/python/gloss_python_escape_characters.asp)
if both of these are present, the program might confuse it for a different directory rather than treating it as a file name.
Use this wrapper to modify forbidden filenames from windows system
The following reserved characters:
< (less than)
> (greater than)
: (colon)
" (double quote)
/ (forward slash)
\ (backslash)
| (vertical bar or pipe)
? (question mark)
* (asterisk)
"""
@wraps(func)
def wrapper(self):
word_orig = func(self)
word = word_orig
forbidden = ['<', '>', ':', '"', "|", "?", "*"]
for char in forbidden:
word = word.replace(char, "")
word = word.replace("\\","_")
word = word.replace("/","_")
return (word,word_orig)
return wrapper
def sorttags(func):
""":
Use this Wrapper to sort your tags.
"""
@wraps(func)
def wrapper(self):
#Start here
tags = func(self)
return tags
return wrapper
class CommunicateApi:
"""Info:
Used for the __main__ to communicate with the NHentai.py(In default).
Usually does not require modifying this unless if you want to add missing features
"""
def __init__(self, data):
self._Handler = Lib.Api(data)
self.name = self._Handler.name
def Pages(self):
data = self._Handler.Pages()
return data
@sorttags #Wrapper that will sort tags
def Tags(self):
"""If the tags are all over the place, you can use 'sorttags' wrapper to create a tag sorting algorithm to keep the tags clean sorted.
"""
data = self._Handler.Tags()
return data
@validatename #Wrapper that modify an invalid name
def Title(self):
data = self._Handler.Title()
return data
def Link_Page(self,var):
"""Info:
If 'var' is 20 then it should return Page links from 1 to 20
'var' is dependent on how many links should be returned for example: var = 20, Return ['https://site.example/page1.jpg',2,3...'https://site.example/page20.jpg']
Link_Page is connected to Api.Direct_link on Lib
"""
data_list = [self._Handler.Direct_link(x) for x in range(1,(int(var)+1))]
return data_list
def File_iter(data):
try:
Iter = Lib.Iterdata(data)
if not Iter.available:
return False
return Iter
except (AttributeError, TypeError) as e:
return False
def version():
try:
return Lib.Current_Version
except AttributeError:
return False
def version_host():
try:
return Lib.Version_Host
except AttributeError:
return False
async def Queue(link,title_value,location,client,loggon,sem,task_status):
async def __start_process():
"""Info:
This is the main downloader of the program
It uses asynchronous functions download multiple pages at the same time. It has a timeout of up until 6 fails before stops completely to free up resources and bandwidth to the next Page to be downloaded Queued by the anyio.Semaphore()
link - Downloadable link to the file
title_value - Page file number
location - Location to save the page
client - Httpx AsyncClient object
loggon - Logger to log status on a log file (For Debugging purposes)
sem - Semaphore object
task_status - TaskGroup object. call task_status.started() to start another Task. Failure to call the method will cause the program to fail.
"""
"""
Download a file from the given URL and save it to the specified save path.
If there is a failure, the function will retry the download up to MAX_RETRIES times.
If the download was interrupted, it will continue from the last downloaded byte
"""
#Functions
async def _get_head(client,link: str, return_err: bool = False, attempts: int = 5):
redo = 0
recent_exp = None
while not redo >= attempts:
try:
resp = await client.head(link)
resp.raise_for_status()
return resp
except httpx.HTTPError as e:
redo += 1
recent_exp = e
loggon.exception(f'Error collecting header {redo}/{attempts}')
else:
raise recent_exp if recent_exp else AttributeError('Error had occured but cannot find the error')
def _check_if_done(Data: dict):
"Check early if the progress is on dump and the file exists. This function is important to reduce strain on the remote server"
try:
dl_path = Data.progress_status[title_value]["directory"]
if os.path.getsize(dl_path) == Data.progress_status[title_value]["Max"]:
return True
os.remove(dl_path)
return False
except FileNotFoundError:
return False
except KeyError:
return False
def _no_data_check(size: int, downloaded: int):
"Check the current state of the data received and the actual data got, this is an important function to identify the currect action needed"
if downloaded == size:
return 0
elif downloaded < size:
return 1
elif downloaded > size:
return 2
def _invoke_finish(cond: bool):
Data.progress_status[title_value]["bool"] = cond
VolatileData.response_proc.append(cond)
task_status.started() #This starts the child process
#Before start, check if download gas occured before
if _check_if_done(Data):
_invoke_finish(True)
return True
#Set initial values
MAX_RETRIES = 6
retries = 0
headers = {}
downloaded_size = 0
headers["User-Agent"] = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36"
# print(location)
#Load client
async with httpx.AsyncClient() as client:
#Get headers
try:
#If dump is loaded
download_path = os.path.normpath(Data.progress_status[title_value]["directory"])
downloaded_ondata_max = Data.progress_status[title_value]['Max']
downloaded_ondata_current = Data.progress_status[title_value]['Bytes']
downloaded_size = os.path.getsize(download_path) if os.path.exists(download_path) else 0
#Synchronize Dump from actual file size
if downloaded_size != downloaded_ondata_current:
Data.progress_status[title_value]['Bytes'] = downloaded_size
downloaded_ondata_current = downloaded_size
_action_type = _no_data_check(downloaded_ondata_max, downloaded_ondata_current)
except KeyError:
#If dump does not exist
#Get headers
resp = await _get_head(client, link)
#Preload progress
Data.progress_status[title_value] = {"bool":False,"Bytes":0,"Max":False}
#Get header content
format_file = resp.headers.get('Content-Type').replace("image/","")
file_name = ("%s.%s" % (title_value, format_file))
download_path = os.path.join(os.getcwd(),location,file_name)
Data.progress_status[title_value]["directory"] = download_path
#Configure loaded data
save_path = os.path.join(location,file_name)
downloaded_size = os.path.getsize(save_path) if os.path.exists(save_path) else 0
_action_type = _no_data_check(int(resp.headers.get('content-length', 0)), downloaded_size)
#Initialize data
loggon.info(f'Action required code: {_action_type}')
if _action_type == 0:
Data.progress_status[title_value]["Max"] = downloaded_size
Data.progress_status[title_value]["Bytes"] = downloaded_size
_invoke_finish(True)
return True
elif _action_type == 1:
pass
elif _action_type == 2:
#Reset download due to override
os.remove(download_path)
Data.progress_status[title_value]["Max"] = downloaded_size
headers['Range'] = f"bytes={downloaded_size}-"
headers = {}
#====
while retries <= MAX_RETRIES:
Data.progress_status[title_value]["Max"] = downloaded_size
try:
async with client.stream('GET', link, headers=headers) as response:
response.raise_for_status()
response_code = response.status_code
#Check expected response code
if response_code < 300 and response_code >= 200:
if response_code not in (200,206):
loggon.warn(f"<{title_value}> Unexpected SUCCESSFUL HTTP response code: {response_code}")
else:
loggon.error(f"<{title_value}> Unexpected http response code: {response_code}")
continue
async with await anyio.open_file(download_path, "ab") as asf:
# check if the file is partially downloaded
if response.headers.get("Content-Range"):
loggon.info(f"Resuming download from byte {downloaded_size}")
#Log all progress
max_size = int(response.headers.get("Content-Length", 0))
total_size = max_size + downloaded_size
Data.progress_status[title_value]["Max"] += max_size
Data.progress_status[title_value]["Bytes"] = total_size
async for chunk in response.aiter_bytes(chunk_size=8192):
downloaded_size += len(chunk)
Data.progress_status[title_value]["Bytes"] = downloaded_size
await asf.write(chunk)
loggon.info(f"<{title_value}>Downloaded {downloaded_size} of {total_size} bytes")
_invoke_finish(True)
return True
except httpx.HTTPError as e:
retries += 1
#Retry 0-[max retries] times
if retries < 7:
loggon.exception(f"\nP:{title_value},Error: {e}, full data on logs")
loggon.info(f"<{title_value}>Problem occured, retrying {retries}/6")
headers["Range"] = f"bytes={downloaded_size}-"
VolatileData.retry_proc.append(True)
await anyio.sleep(1)
continue
loggon.exception("DLException: ")
_invoke_finish(False)
return False
else:
_invoke_finish(False)
return False
async with sem:
try:
return await __start_process()
except Exception as e:
loggon.exception('Unknown Error')
VolatileData.response_proc.append(2)
return 2
#del Process.Data.retry_proc[:]
#del Process.Data.response_proc[:]
#Process.Data.total = 0
#Process.Data.progress = 0
class Data_raw:
def __init__(self):
self.progress_status = {}
def reset(self):
self.progress_status = {}
return self
class VData:
def __init__(self, data):
self.response_proc = []
self.retry_proc = []
self.data = data
def total(self):
Max = 0
for n in list(Data.progress_status):
Max += Data.progress_status[n]["Max"]
return Max
def progress(self):
Bytes = 0
for n in list(Data.progress_status):
Bytes += Data.progress_status[n]["Bytes"]
return Bytes
def reset(self,data):
self.response_proc = []
self.retry_proc = []
self.data = data
return self
def reset_datas():
global Data
global VolatileData
try:
Data = Data.reset()
VolatileData = VolatileData.reset(Data)
except AttributeError:
print("You have not initialized datas")
def init_datas():
global Data
global VolatileData
Data = Data_raw()
VolatileData = VData(Data)
if __name__ == "__main__":
pass