File tree Expand file tree Collapse file tree 2 files changed +45
-0
lines changed
My06-验证码识别/使用Tessract-OCR识别验证码/北京公交一卡通_验证码 Expand file tree Collapse file tree 2 files changed +45
-0
lines changed Original file line number Diff line number Diff line change
1
+ # -*- coding: utf-8 -*-
2
+ # @Time : 2017/8/21 13:26
3
+ # @Author : play4fun
4
+ # @File : 获取验证码-然后识别.py
5
+ # @Software: PyCharm
6
+
7
+ """
8
+ 获取验证码-然后识别.py:
9
+ """
10
+
11
+ from io import BytesIO
12
+ from PIL import Image
13
+ import requests
14
+ from tesserocr import PyTessBaseAPI
15
+ import tesserocr
16
+ import numpy as np
17
+ import cv2
18
+
19
+
20
+ user_agent = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.106 Safari/537.36'
21
+ headers = {'User-Agent' : user_agent }
22
+
23
+
24
+ url = 'http://www.bjsuperpass.com/captcha.svl?d=1503144107405'
25
+ rs = requests .get (url , headers = headers , timeout = 10 )
26
+ print ('获取公交一卡通网站的验证码' ,rs .status_code )
27
+
28
+ print ('用BytesIO导入到Image,Numpy,Opencv' )
29
+ s1 = BytesIO (rs .content ) # img = Image.open(BytesIO(resp.read()))
30
+ #
31
+ img = Image .open (s1 )
32
+ img = img .convert ("RGB" )
33
+ im = np .array (img )
34
+ cv2 .imshow ('src' ,im )
35
+ cv2 .waitKey (0 )
36
+ cv2 .imwrite ('captcha.jpg' ,im )
37
+
38
+
39
+ ocr = PyTessBaseAPI ()
40
+ # ocr.Init(".", "eng", tesseract.OEM_DEFAULT)
41
+ ocr .SetVariable ("tessedit_char_whitelist" , "0123456789abcdefghijklmnopqrstuvwxyz" )
42
+ # ocr.SetPageSegMode(tesseract.PSM_AUTO)
43
+ # ocr.SetImage(img)
44
+
45
+ print ('验证码是' ,tesserocr .image_to_text (img ))
You can’t perform that action at this time.
0 commit comments