This repository has been archived by the owner on Jun 23, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 41
/
spider.py
205 lines (191 loc) · 8.56 KB
/
spider.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
import httpx
import json
import traceback
from hashlib import md5
from loguru import logger
from .captcha import recognize
class Tree: # 任务树
def __init__(self, task):
self.task = task
self.child = []
def sort(self):
try:
self.child.sort(
key=lambda node: node.task['displayOrder']
)
for ch in self.child:
ch.sort()
except Exception:
logger.warning(f'[构建任务树] | 排序出错:\n{traceback.format_exc()}')
class Spider(httpx.AsyncClient):
def __init__(self):
super().__init__(follow_redirects=True)
self.is_login = False
async def login(self, username, password):
try:
if self.is_login:
return {}
self.cookies.clear() # 重置 cookies
logger.info('[登录] | 正在获取验证码...')
result = await self.get('https://sso.ismartlearning.cn/captcha.html')
code = recognize(result.content)
password = md5(md5(password.encode()).hexdigest().encode() + b'fa&s*l%$k!fq$k!ld@fjlk').hexdigest()
logger.info('[登录] | 正在登录...')
info = (await self.post(
'https://sso.ismartlearning.cn/v2/tickets-v2',
data={
'username': username,
'password': password,
'captcha': code
},
headers={
'X-Requested-With': 'XMLHttpRequest',
'Origin': 'https://me.ismartlearning.cn',
'Referer': 'https://me.ismartlearning.cn/'
}
)).json()['result']
logger.debug(f"[登录] | {info}")
assert info['code'] == -26 # 断言登录结果
self.is_login = True
logger.success('[登录] | 登录成功')
return info
except Exception:
logger.warning(f'[登录] | 登录出错:\n{traceback.format_exc()}')
async def get_courses(self): # 获取课程列表
try:
logger.info('[获取课程列表] | 正在获取课程列表...')
courses = (await self.post(
'https://school.ismartlearning.cn/client/course/list-of-student?status=1',
data={
'pager.currentPage': 1,
'pager.pageSize': 100
}
)).json()['data']['list']
logger.debug(f"[获取课程列表] | {courses}")
logger.success('[获取课程列表] | 获取课程列表成功')
return courses
except Exception:
logger.warning(f'[获取课程列表] | 获取课程列表出错:\n{traceback.format_exc()}')
async def get_books(self, course_id): # 获取某课程的书籍列表
try:
await self.post( # 必须有这个请求,否则后面会报错
'https://school.ismartlearning.cn/client/course/list-of-student?status=1',
data={
'pager.currentPage': 1,
'pager.pageSize': 100
}
)
books = (await self.post(
'https://school.ismartlearning.cn/client/course/textbook/list-of-student',
data={
'courseId': course_id
}
)).json()['data']
return books
except Exception:
logger.warning(f'[获取书籍列表] | 获取书籍列表出错:\n{traceback.format_exc()}')
async def get_tasks(self, book_id, book_type, course_id): # 获取某书籍的任务树
try:
logger.info('[获取任务列表] | 正在获取任务列表...')
await self.post('https://school.ismartlearning.cn/client/course/textbook/chapters')
tasks = (await self.post(
'https://school.ismartlearning.cn/client/course/textbook/chapters',
data={
'bookId': book_id,
'bookType': book_type,
'courseId': course_id
}
)).json()['data']
id_record = {task['id']: Tree(task) for task in tasks}
book_name = (await self.book_info(book_id))['bookName']
root = Tree({
'book_id': tasks[0]['book_id'],
'unitStudyPercent': 0,
'name': book_name
})
logger.info('[构建任务树] | 正在构建任务树...')
for task_id in id_record:
node = id_record[task_id]
node_name = f'{node.task.get("name","")}[id:{node.task["id"]}]'
if 'parent_id' in node.task:
if node.task['parent_id'] in id_record:
id_record[node.task['parent_id']].child.append(node)
else:
logger.warning(f'[构建任务树] | {node_name} 父节点不存在')
else:
root.child.append(node)
root.sort()
logger.success('[构建任务树] | 构建任务树完成')
logger.success('[获取任务列表] | 获取任务列表完成')
return root
except Exception:
logger.warning(f'[获取任务列表] | 获取任务列表出错:\n{traceback.format_exc()}')
async def get_paper(self, paper_id): # 获取任务点信息(包括题目和答案)
try:
logger.info('[获取任务点] | 正在获取任务点信息...')
ticket = (await self.post(
'https://sso.ismartlearning.cn/v1/serviceTicket',
data={
'service': 'https://xot-api.ismartlearning.cn/client/textbook/paperinfo'
}
)).json()['data']['serverTicket']
logger.debug(f'[获取任务点] | {ticket}')
paper_info = (await self.post(
'https://xot-api.ismartlearning.cn/client/textbook/paperinfo',
params={
'ticket': ticket
},
data={
'paperId': paper_id
},
headers={
'Origin': 'https://me.ismartlearning.cn',
'Referer': 'https://me.ismartlearning.cn/',
'X-Requested-With': 'XMLHttpRequest',
'Accept-Encoding': 'gzip, deflate'
}
)).json()['data']
# logger.debug(f'[获取任务点] | {json.dumps(paper_info, indent=4)}')
logger.success('[获取任务点] | 获取任务点信息完成')
return paper_info
except Exception:
logger.warning(f'[获取任务点] | 获取任务点出错:\n{traceback.format_exc()}')
async def user_info(self):
try:
logger.info('[获取用户信息] | 正在获取用户信息...')
info = (await self.post('https://school.ismartlearning.cn/client/user/student-info')).json()
logger.success('[获取用户信息] | 获取用户信息完成')
return info
except Exception:
logger.warning(f'[获取用户信息] | 获取用户信息出错:\n{traceback.format_exc()}')
async def book_info(self, book_id):
try:
logger.info('[获取书籍信息] | 正在获取书籍信息...')
ticket = (await self.post(
'https://sso.ismartlearning.cn/v1/serviceTicket',
data={
'service': 'https://book-api.ismartlearning.cn/client/v2/book/info'
}
)).json()['data']['serverTicket']
logger.debug(f'[获取书籍信息] | {ticket}')
book_info = (await self.post(
'https://book-api.ismartlearning.cn/client/v2/book/info',
params={
'ticket': ticket
},
data={
'bookId': book_id,
'bookType': 0
},
headers={
'Origin': 'https://me.ismartlearning.cn',
'Referer': 'https://me.ismartlearning.cn/',
'X-Requested-With': 'XMLHttpRequest',
'Accept-Encoding': 'gzip, deflate'
}
)).json()['data']
logger.debug(f'[获取书籍信息] | {json.dumps(book_info, indent=4)}')
logger.success('[获取书籍信息] | 获取书籍信息完成')
return book_info
except Exception:
logger.warning(f'[获取书籍信息] | 获取书籍信息出错:\n{traceback.format_exc()}')