Skip to content

Commit

Permalink
to b1
Browse files Browse the repository at this point in the history
  • Loading branch information
Germey committed Jul 6, 2020
1 parent 4cfd5f3 commit e844660
Show file tree
Hide file tree
Showing 4 changed files with 50 additions and 69 deletions.
3 changes: 0 additions & 3 deletions README.md
Expand Up @@ -8,9 +8,6 @@
![Docker Pulls](https://img.shields.io/docker/pulls/germey/gerapy)
![PyPI - License](https://img.shields.io/pypi/l/gerapy)


> 注:从 Gerapy 2.x 开始,其定位发生改变,不再支持 Scrapyd,转而支持 Docker、Kubernetes 的部署,另外开发还会迁移到 Scrapy 可视化配置和智能解析方面,敬请期待。
Distributed Crawler Management Framework Based on Scrapy, Scrapyd, Scrapyd-Client, Scrapyd-API, Django and Vue.js.

## Documentation
Expand Down
2 changes: 1 addition & 1 deletion gerapy/__version__.py
@@ -1,4 +1,4 @@
VERSION = (0, 9, '3a3')
VERSION = (0, 9, '3b1')

__version__ = '.'.join(map(str, VERSION))

Expand Down
110 changes: 47 additions & 63 deletions gerapy/server/core/views.py
@@ -1,3 +1,4 @@
import re
from pathlib import Path
from urllib.parse import unquote
import base64
Expand Down Expand Up @@ -251,6 +252,8 @@ def project_configure(request, project_name):
configuration = json.dumps(data.get('configuration'), ensure_ascii=False)
project.update(**{'configuration': configuration})

# for safe protection
project_name = re.sub('[\!\@\#\$\;\&\*\~\"\'\{\}\]\[\-\+\%\^]+', '', project_name)
# execute generate cmd
cmd = ' '.join(['gerapy', 'generate', project_name])
p = Popen(cmd, shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE)
Expand Down Expand Up @@ -634,17 +637,15 @@ def job_list(request, client_id, project_name):
if request.method == 'GET':
client = Client.objects.get(id=client_id)
scrapyd = get_scrapyd(client)
try:
result = scrapyd.list_jobs(project_name)
jobs = []
statuses = ['pending', 'running', 'finished']
for status in statuses:
for job in result.get(status):
job['status'] = status
jobs.append(job)
return JsonResponse(jobs)
except ConnectionError:
return JsonResponse({'message': 'Connect Error'}, status=500)
result = scrapyd.list_jobs(project_name)
jobs = []
statuses = ['pending', 'running', 'finished']
for status in statuses:
for job in result.get(status):
job['status'] = status
jobs.append(job)
return JsonResponse(jobs)



@api_view(['GET'])
Expand All @@ -663,21 +664,18 @@ def job_log(request, client_id, project_name, spider_name, job_id):
client = Client.objects.get(id=client_id)
# get log url
url = log_url(client.ip, client.port, project_name, spider_name, job_id)
try:
# get last 1000 bytes of log
response = requests.get(url, timeout=5, headers={
'Range': 'bytes=-1000'
}, auth=(client.username, client.password) if client.auth else None)
# Get encoding
encoding = response.apparent_encoding
# log not found
if response.status_code == 404:
return JsonResponse({'message': 'Log Not Found'}, status=404)
# bytes to string
text = response.content.decode(encoding, errors='replace')
return HttpResponse(text)
except requests.ConnectionError:
return JsonResponse({'message': 'Load Log Error'}, status=500)
# get last 1000 bytes of log
response = requests.get(url, timeout=5, headers={
'Range': 'bytes=-1000'
}, auth=(client.username, client.password) if client.auth else None)
# Get encoding
encoding = response.apparent_encoding
# log not found
if response.status_code == 404:
return JsonResponse({'message': 'Log Not Found'}, status=404)
# bytes to string
text = response.content.decode(encoding, errors='replace')
return HttpResponse(text)


@api_view(['GET'])
Expand All @@ -693,38 +691,29 @@ def job_cancel(request, client_id, project_name, job_id):
"""
if request.method == 'GET':
client = Client.objects.get(id=client_id)
try:
scrapyd = get_scrapyd(client)
result = scrapyd.cancel(project_name, job_id)
return JsonResponse(result)
except ConnectionError:
return JsonResponse({'message': 'Connect Error'})
scrapyd = get_scrapyd(client)
result = scrapyd.cancel(project_name, job_id)
return JsonResponse(result)


@api_view(['GET'])
@permission_classes([IsAuthenticated])
def del_version(request, client_id, project, version):
if request.method == 'GET':
client = Client.objects.get(id=client_id)
try:
scrapyd = get_scrapyd(client)
result = scrapyd.delete_version(project=project, version=version)
return JsonResponse(result)
except ConnectionError:
return JsonResponse({'message': 'Connect Error'})
scrapyd = get_scrapyd(client)
result = scrapyd.delete_version(project=project, version=version)
return JsonResponse(result)


@api_view(['GET'])
@permission_classes([IsAuthenticated])
def del_project(request, client_id, project):
if request.method == 'GET':
client = Client.objects.get(id=client_id)
try:
scrapyd = get_scrapyd(client)
result = scrapyd.delete_project(project=project)
return JsonResponse(result)
except ConnectionError:
return JsonResponse({'message': 'Connect Error'})
scrapyd = get_scrapyd(client)
result = scrapyd.delete_project(project=project)
return JsonResponse(result)


@api_view(['POST'])
Expand Down Expand Up @@ -829,18 +818,16 @@ def task_remove(request, task_id):
:return:
"""
if request.method == 'POST':
try:
# delete job from DjangoJob
task = Task.objects.get(id=task_id)
clients = clients_of_task(task)
for client in clients:
job_id = get_job_id(client, task)
DjangoJob.objects.filter(name=job_id).delete()
# delete task
Task.objects.filter(id=task_id).delete()
return JsonResponse({'result': '1'})
except:
return JsonResponse({'result': '0'})
# delete job from DjangoJob
task = Task.objects.get(id=task_id)
clients = clients_of_task(task)
for client in clients:
job_id = get_job_id(client, task)
DjangoJob.objects.filter(name=job_id).delete()
# delete task
Task.objects.filter(id=task_id).delete()
return JsonResponse({'result': '1'})



@api_view(['GET'])
Expand Down Expand Up @@ -915,10 +902,7 @@ def render_html(request):
url = unquote(base64.b64decode(url).decode('utf-8'))
js = request.GET.get('js', 0)
script = request.GET.get('script')
try:
response = requests.get(url, timeout=5)
response.encoding = response.apparent_encoding
html = process_html(response.text)
return HttpResponse(html)
except Exception as e:
return JsonResponse({'message': e.args}, status=500)
response = requests.get(url, timeout=5)
response.encoding = response.apparent_encoding
html = process_html(response.text)
return HttpResponse(html)
4 changes: 2 additions & 2 deletions requirements.txt
Expand Up @@ -5,12 +5,12 @@ django-cors-headers==3.2.0
django-apscheduler==0.3.0
furl==2.1.0
jinja2==2.10.1
scrapy>=1.4.0
scrapy==1.5.0
scrapy-redis==0.6.8
scrapy-splash==0.7.2
python-scrapyd-api==2.1.2
redis==2.10.5
requests>=2.20.0
requests==2.20.0
pymongo==3.9.0
pymysql==0.7.10
pyquery==1.2.17
Expand Down

0 comments on commit e844660

Please sign in to comment.