forked from bear/python-twitter
-
Notifications
You must be signed in to change notification settings - Fork 0
/
test_url_regex.py
122 lines (111 loc) · 3.51 KB
/
test_url_regex.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
# encoding: utf-8
from __future__ import unicode_literals, print_function
import json
import re
import sys
import unittest
import warnings
import twitter
from twitter import twitter_utils
import responses
from responses import GET, POST
warnings.filterwarnings('ignore', category=DeprecationWarning)
DEFAULT_URL = re.compile(r'https?://.*\.twitter.com/1\.1/.*')
URLS = {
"is_url": [
"t.co/test"
"http://foo.com/blah_blah",
"http://foo.com/blah_blah/",
"http://foo.com/blah_blah_(wikipedia)",
"http://foo.com/blah_blah_(wikipedia)_(again)",
"http://www.example.com/wpstyle/?p=364",
"https://www.example.com/foo/?bar=baz&inga=42&quux",
# "http://✪df.ws/123",
# "https://➡.ws/",
# "http://➡.ws/䨹",
# "http://⌘.ws",
# "http://⌘.ws/",
"http://foo.com/blah_(wikipedia)#cite-1",
"http://foo.com/blah_(wikipedia)_blah#cite-1",
"http://foo.com/(something)?after=parens",
# "http://☺.damowmow.com/",
"http://code.google.com/events/#&product=browser",
"http://j.mp",
"http://foo.bar/?q=Test%20URL-encoded%20stuff",
"http://1337.net",
"http://example.com/2.3.1.3/"
"http://a.b-c.de",
"foo.com"
],
"is_not_url": [
"http://userid:password@example.com:8080",
"http://userid:password@example.com:8080/",
"http://userid@example.com",
"http://userid@example.com/",
"http://userid@example.com:8080",
"http://userid@example.com:8080/",
"http://userid:password@example.com",
"http://userid:password@example.com/",
"http://142.42.1.1/",
"2.3",
".hello.com",
"http://142.42.1.1:8080/",
"ftp://foo.bar/baz",
"http://مثال.إختبار",
"http://例子.测试",
"http://उदाहरण.परीक्षा",
"http://",
"http://.",
"http://..",
"http://../",
"http://?",
"http://??",
"http://??/",
"http://#",
"http://##",
"http://##/",
"//",
"//a",
"///a",
"///",
"http:///a",
"rdar://1234",
"h://test",
":// should fail",
"ftps://foo.bar/",
"http://-error-.invalid/",
# "http://a.b--c.de/",
# "http://-a.b.co",
# "http://a.b-.co",
"http://223.255.255.254",
"http://0.0.0.0",
"http://10.1.1.0",
"http://10.1.1.255",
"http://224.1.1.1",
"http://1.1.1.1.1",
"http://123.123.123",
"http://3628126748",
"http://.www.foo.bar/",
"http://.www.foo.bar./",
"http://10.1.1.1"
"S.84",
"http://s.84",
"L.512+MVG",
"http://L.512+MVG"
]
}
class TestUrlRegex(unittest.TestCase):
def test_yes_urls(self):
for yes_url in URLS['is_url']:
self.assertTrue(twitter_utils.is_url(yes_url), yes_url)
def test_no_urls(self):
for no_url in URLS['is_not_url']:
self.assertFalse(twitter_utils.is_url(no_url), no_url)
def test_regex_finds_unicode(self):
string = "http://www.➡.ws"
string2 = "http://www.example.com"
pattern = re.compile(r'➡', re.U | re.I)
pattern2 = re.compile(r'(?:http?://|www\\.)*(?:[\w+-_][.])', re.I | re.U)
self.assertTrue(re.findall(pattern, string))
self.assertTrue(re.findall(pattern2, string2))
self.assertTrue(re.findall(pattern2, string))