Skip to content

Commit 7692ce7

Browse files
committed
获取验证码-然后识别
1 parent 49e85d5 commit 7692ce7

File tree

2 files changed

+45
-0
lines changed

2 files changed

+45
-0
lines changed
2.02 KB
Loading
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
# -*- coding: utf-8 -*-
2+
# @Time : 2017/8/21 13:26
3+
# @Author : play4fun
4+
# @File : 获取验证码-然后识别.py
5+
# @Software: PyCharm
6+
7+
"""
8+
获取验证码-然后识别.py:
9+
"""
10+
11+
from io import BytesIO
12+
from PIL import Image
13+
import requests
14+
from tesserocr import PyTessBaseAPI
15+
import tesserocr
16+
import numpy as np
17+
import cv2
18+
19+
20+
user_agent = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.106 Safari/537.36'
21+
headers = {'User-Agent': user_agent}
22+
23+
24+
url = 'http://www.bjsuperpass.com/captcha.svl?d=1503144107405'
25+
rs = requests.get(url, headers=headers, timeout=10)
26+
print('获取公交一卡通网站的验证码',rs.status_code)
27+
28+
print('用BytesIO导入到Image,Numpy,Opencv')
29+
s1 = BytesIO(rs.content) # img = Image.open(BytesIO(resp.read()))
30+
#
31+
img = Image.open(s1)
32+
img = img.convert("RGB")
33+
im=np.array(img)
34+
cv2.imshow('src',im)
35+
cv2.waitKey(0)
36+
cv2.imwrite('captcha.jpg',im)
37+
38+
39+
ocr = PyTessBaseAPI()
40+
# ocr.Init(".", "eng", tesseract.OEM_DEFAULT)
41+
ocr.SetVariable("tessedit_char_whitelist", "0123456789abcdefghijklmnopqrstuvwxyz")
42+
# ocr.SetPageSegMode(tesseract.PSM_AUTO)
43+
# ocr.SetImage(img)
44+
45+
print('验证码是',tesserocr.image_to_text(img))

0 commit comments

Comments
 (0)