Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/master'
Browse files Browse the repository at this point in the history
  • Loading branch information
JaredLGillespie committed Dec 4, 2018
2 parents addfcb7 + 9ff60ee commit 0ac7baf
Show file tree
Hide file tree
Showing 12 changed files with 71 additions and 38 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Byte-compiled / optimized DLL files
__pycache__/
*.pyc

# Distribution / packaging
.Python
Expand Down
6 changes: 6 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
language: python
matrix:
include:
- python: 2.7
env: TOXENV=py27
- python: 3.4
env: TOXENV=py34
- python: 3.5
env: TOXENV=py35
- python: 3.6
env: TOXENV=py36
install:
Expand Down
2 changes: 2 additions & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
include README.rst
include CHANGELOG.rst
include LICENSE.txt

include proxyscrape/VERSION
1 change: 1 addition & 0 deletions proxyscrape/VERSION
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
0.1.1
5 changes: 3 additions & 2 deletions proxyscrape/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,9 @@
- get_resources(...) retrieves all defined resources
"""

from __future__ import absolute_import

from proxyscrape.proxyscrape import (
from .proxyscrape import (
add_resource,
add_resource_type,
create_collector,
Expand All @@ -51,4 +52,4 @@
get_resources
)

from proxyscrape.scrapers import Proxy
from .scrapers import Proxy
33 changes: 19 additions & 14 deletions proxyscrape/proxyscrape.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,20 +71,21 @@ def add_resource(name, func, resource_types=None):
If 'name' is already a defined resource.
"""
if name in RESOURCE_MAP:
raise ResourceAlreadyDefinedError(f'{name} is already defined as a resource')
raise ResourceAlreadyDefinedError('{} is already defined as a resource'.format(name))

if resource_types is not None:
if not _is_iterable(resource_types):
resource_types = {resource_types, }

for resource_type in resource_types:
if resource_type not in RESOURCE_TYPE_MAP:
raise InvalidResourceTypeError(f'{resource_type} is not a defined resource type')
raise InvalidResourceTypeError(
'{} is not a defined resource type'.format(resource_type))

with _resource_lock:
# Ensure not added by the time entered lock
if name in RESOURCE_MAP:
raise ResourceAlreadyDefinedError(f'{name} is already defined as a resource')
raise ResourceAlreadyDefinedError('{} is already defined as a resource'.format(name))

RESOURCE_MAP[name] = func

Expand All @@ -108,12 +109,14 @@ def add_resource_type(name, resources=None):
If 'name' is already a defined resource type.
"""
if name in RESOURCE_TYPE_MAP:
raise ResourceTypeAlreadyDefinedError(f'{name} is already defined as a resource type')
raise ResourceTypeAlreadyDefinedError(
'{} is already defined as a resource type'.format(name))

with _resource_type_lock:
# Ensure not added by the time entered lock
if name in RESOURCE_TYPE_MAP:
raise ResourceTypeAlreadyDefinedError(f'{name} is already defined as a resource type')
raise ResourceTypeAlreadyDefinedError(
'{} is already defined as a resource type'.format(name))

if resources is not None:
if not _is_iterable(resources):
Expand All @@ -122,7 +125,7 @@ def add_resource_type(name, resources=None):

for resource in resources:
if resource not in RESOURCE_MAP:
raise InvalidResourceError(f'{resource} is an invalid resource')
raise InvalidResourceError('{} is an invalid resource'.format(resource))
else:
resources = set()

Expand Down Expand Up @@ -161,12 +164,12 @@ def create_collector(name, resource_types=None, refresh_interval=3600, resources
If 'resource_type' is not a valid resource type.
"""
if name in COLLECTORS:
raise CollectorAlreadyDefinedError(f'{name} is already defined as a collector')
raise CollectorAlreadyDefinedError('{} is already defined as a collector'.format(name))

with _collector_lock:
# Ensure not added by the time entered lock
if name in COLLECTORS:
raise CollectorAlreadyDefinedError(f'{name} is already defined as a collector')
raise CollectorAlreadyDefinedError('{} is already defined as a collector'.format(name))

collector = Collector(resource_types, refresh_interval, resources)
COLLECTORS[name] = collector
Expand All @@ -188,7 +191,7 @@ def get_collector(name):
if name in COLLECTORS:
return COLLECTORS[name]

raise CollectorNotFoundError(f'{name} is not a defined collector')
raise CollectorNotFoundError('{} is not a defined collector'.format(name))


def get_resource_types():
Expand Down Expand Up @@ -305,20 +308,21 @@ def _validate_filter_opts(self, filter_opts):
return

if not isinstance(filter_opts, dict):
raise InvalidFilterOptionError(f'{filter_opts} must be a dictionary')
raise InvalidFilterOptionError('{} must be a dictionary'.format(filter_opts))

for key in filter_opts:
if key not in FILTER_OPTIONS:
raise InvalidFilterOptionError(f'{key} is an invalid filter option')
raise InvalidFilterOptionError('{} is an invalid filter option'.format(key))

def _validate_resource_types(self, resource_types):
if set(resource_types).difference(RESOURCE_TYPE_MAP.keys()):
raise InvalidResourceTypeError(f'{resource_types} defined an invalid resource type')
raise InvalidResourceTypeError(
'{} defined an invalid resource type'.format(resource_types))

def _validate_resources(self, resources):
for resource in resources:
if resource not in RESOURCE_MAP:
raise InvalidResourceError(f'{resource} is an invalid resource')
raise InvalidResourceError('{} is an invalid resource'.format(resource))

def apply_filter(self, filter_opts):
"""Applies a filter to the collector for retrieving proxies matching specific criteria.
Expand Down Expand Up @@ -426,7 +430,8 @@ def remove_proxy(self, proxies):
for proxy in proxies:
resource_type = proxy.source
if resource_type not in self._resource_map:
raise InvalidResourceTypeError(f'{resource_type} is not a valid resource type')
raise InvalidResourceTypeError(
'{} is not a valid resource type'.format(resource_type))

id = self._resource_map[resource_type]['id']
self._store.remove_proxy(id, proxy)
Expand Down
4 changes: 3 additions & 1 deletion proxyscrape/scrapers.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,9 @@ def _get_proxy_daily_proxies_parse_inner(element, type, source):
if len(row) == 0:
continue

proxies.add(Proxy(*row.split(':'), None, None, None, type, source))
params = row.split(':')
params.extend([None, None, None, type, source])
proxies.add(Proxy(*params))
return proxies


Expand Down
5 changes: 4 additions & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
@@ -1,2 +1,5 @@
[metadata]
license_file = LICENSE.txt
license_file = LICENSE.txt

[wheel]
universal = 1
16 changes: 13 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,15 @@

here = path.abspath(path.dirname(__file__))

with open(path.join(here, 'README.rst'), encoding='utf-8') as f:
with open(path.join(here, 'README.rst')) as f:
long_description = f.read()

with open(path.join(here, 'proxyscrape/VERSION'), 'rb') as f:
version = f.read().decode('ascii').strip()

setup(
name='proxyscrape',
version='0.1.0',
version=version,
description='A library for retrieving free proxies (HTTP, HTTPS, SOCKS4, SOCKS5).',
long_description=long_description,
url='https://github.com/jaredlgillespie/proxyscrape',
Expand All @@ -21,6 +24,12 @@
'License :: OSI Approved :: MIT License',
'Natural Language :: English',
'Operating System :: OS Independent',
'Programming Language :: Python :: 2',
'Programming Language :: Python :: 2.7',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.4',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.5',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: Implementation :: CPython',
Expand All @@ -29,9 +38,10 @@
],
keywords='proxyscrape proxy scrape scraper',
packages=['proxyscrape'],
include_package_data=True,
test_suite='tests',
install_requires=[
'BeautifulSoup4',
'requests'
'requests',
]
)
12 changes: 9 additions & 3 deletions tests/test_proxyscrape.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,19 @@
import time
from threading import Thread
import unittest
from unittest.mock import Mock
try:
from unittest.mock import Mock
except ImportError:
from mock import Mock

# TODO: Change these to not be *
from proxyscrape import *
from proxyscrape.errors import *
from proxyscrape.proxyscrape import *
import proxyscrape.proxyscrape as ps
from proxyscrape.scrapers import Proxy
from proxyscrape.errors import (
CollectorAlreadyDefinedError, CollectorNotFoundError, InvalidFilterOptionError,
InvalidResourceError, InvalidResourceTypeError, ResourceAlreadyDefinedError,
ResourceTypeAlreadyDefinedError)


RESOURCE_MAP_COPY = ps.RESOURCE_MAP.copy()
Expand Down
17 changes: 4 additions & 13 deletions tests/test_scrapers.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,18 +23,18 @@
import os
import time
import unittest
from unittest.mock import Mock, patch
try:
from unittest.mock import Mock, patch
except ImportError:
from mock import Mock, patch
from proxyscrape.scrapers import Proxy, ProxyResource, RESOURCE_MAP


class TestProxyResource(unittest.TestCase):
def test_refreshes_if_expired(self):
expected = [Proxy('host', 'port', 'code', 'country', 'anonymous', 'type', 'source')]
call_count = 0

def func():
nonlocal call_count
call_count += 1
return expected

pr = ProxyResource(func, -1)
Expand All @@ -49,11 +49,8 @@ def func():

def test_doesnt_refresh_if_not_expired(self):
expected = [Proxy('host', 'port', 'code', 'country', 'anonymous', 'type', 'source')]
call_count = 0

def func():
nonlocal call_count
call_count += 1
return expected

pr = ProxyResource(func, 5)
Expand All @@ -68,11 +65,8 @@ def func():

def test_refreshes_if_forced(self):
expected = [Proxy('host', 'port', 'code', 'country', 'anonymous', 'type', 'source')]
call_count = 0

def func():
nonlocal call_count
call_count += 1
return expected

pr = ProxyResource(func, 5)
Expand All @@ -87,11 +81,8 @@ def func():

def test_doesnt_refresh_if_lock_check(self):
expected = [Proxy('host', 'port', 'code', 'country', 'anonymous', 'type', 'source')]
call_count = 0

def func():
nonlocal call_count
call_count += 1
return expected

pr = ProxyResource(func, 5)
Expand Down
7 changes: 6 additions & 1 deletion tox.ini
Original file line number Diff line number Diff line change
@@ -1,14 +1,19 @@
[tox]
envlist = py{36}
envlist = py27,py36

[testenv]
alwayscopy = true
basepython =
py27: python2.7
py34: python3.4
py35: python3.5
py36: python3.6
deps =
check-manifest
readme_renderer
flake8
coverage
mock
commands =
check-manifest --ignore tox.ini,.coveragerc,tests*
python setup.py check -m -s
Expand Down

0 comments on commit 0ac7baf

Please sign in to comment.