Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ services:
container_name: redis4proxypool
ports:
- "6374:6379"
# restart: always
proxypool:
build: .
image: "germey/proxypool:master"
Expand All @@ -16,4 +15,4 @@ services:
# volumes:
# - proxypool/crawlers/private:/app/proxypool/crawlers/private
environment:
REDIS_HOST: redis4proxypool
PROXYPOOL_REDIS_CONNECTION_STRING: redis://@redis4proxypool:6379/0
2 changes: 1 addition & 1 deletion docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,4 @@ services:
# volumes:
# - proxypool/crawlers/private:/app/proxypool/crawlers/private
environment:
REDIS_HOST: redis4proxypool
PROXYPOOL_REDIS_HOST: redis4proxypool
50 changes: 28 additions & 22 deletions proxypool/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ class Scheduler():
"""
scheduler
"""

def run_tester(self, cycle=CYCLE_TESTER):
"""
run tester
Expand All @@ -33,7 +33,7 @@ def run_tester(self, cycle=CYCLE_TESTER):
tester.run()
loop += 1
time.sleep(cycle)

def run_getter(self, cycle=CYCLE_GETTER):
"""
run getter
Expand All @@ -48,7 +48,7 @@ def run_getter(self, cycle=CYCLE_GETTER):
getter.run()
loop += 1
time.sleep(cycle)

def run_server(self):
"""
run server for api
Expand All @@ -57,42 +57,48 @@ def run_server(self):
logger.info('server not enabled, exit')
return
app.run(host=API_HOST, port=API_PORT, threaded=API_THREADED)

def run(self):
global tester_process, getter_process, server_process
try:
logger.info('starting proxypool...')
if ENABLE_TESTER:
tester_process = multiprocessing.Process(target=self.run_tester)
tester_process = multiprocessing.Process(
target=self.run_tester)
logger.info(f'starting tester, pid {tester_process.pid}...')
tester_process.start()

if ENABLE_GETTER:
getter_process = multiprocessing.Process(target=self.run_getter)
getter_process = multiprocessing.Process(
target=self.run_getter)
logger.info(f'starting getter, pid{getter_process.pid}...')
getter_process.start()

if ENABLE_SERVER:
server_process = multiprocessing.Process(target=self.run_server)
server_process = multiprocessing.Process(
target=self.run_server)
logger.info(f'starting server, pid{server_process.pid}...')
server_process.start()
tester_process.join()
getter_process.join()
server_process.join()

tester_process and tester_process.join()
getter_process and getter_process.join()
server_process and server_process.join()
except KeyboardInterrupt:
logger.info('received keyboard interrupt signal')
tester_process.terminate()
getter_process.terminate()
server_process.terminate()
tester_process and tester_process.terminate()
getter_process and getter_process.terminate()
server_process and server_process.terminate()
finally:
# must call join method before calling is_alive
tester_process.join()
getter_process.join()
server_process.join()
logger.info(f'tester is {"alive" if tester_process.is_alive() else "dead"}')
logger.info(f'getter is {"alive" if getter_process.is_alive() else "dead"}')
logger.info(f'server is {"alive" if server_process.is_alive() else "dead"}')
tester_process and tester_process.join()
getter_process and getter_process.join()
server_process and server_process.join()
logger.info(
f'tester is {"alive" if tester_process.is_alive() else "dead"}')
logger.info(
f'getter is {"alive" if getter_process.is_alive() else "dead"}')
logger.info(
f'server is {"alive" if server_process.is_alive() else "dead"}')
logger.info('proxy terminated')


Expand Down
24 changes: 12 additions & 12 deletions proxypool/setting.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
from os.path import dirname, abspath, join
from environs import Env
from loguru import logger
from proxypool.utils.parse import parse_redis_connection_string


env = Env()
Expand All @@ -24,21 +23,23 @@
APP_TEST = IS_TEST = APP_ENV == TEST_MODE

# redis host
REDIS_HOST = env.str('REDIS_HOST', '127.0.0.1')
REDIS_HOST = env.str('PROXYPOOL_REDIS_HOST',
env.str('REDIS_HOST', '127.0.0.1'))
# redis port
REDIS_PORT = env.int('REDIS_PORT', 6379)
REDIS_PORT = env.int('PROXYPOOL_REDIS_PORT', env.int('REDIS_PORT', 6379))
# redis password, if no password, set it to None
REDIS_PASSWORD = env.str('REDIS_PASSWORD', None)
REDIS_PASSWORD = env.str('PROXYPOOL_REDIS_PASSWORD',
env.str('REDIS_PASSWORD', None))
# redis db, if no choice, set it to 0
REDIS_DB = env.int('REDIS_DB', 0)
# redis connection string, like redis://[password]@host:port or rediss://[password]@host:port/0
REDIS_CONNECTION_STRING = env.str('REDIS_CONNECTION_STRING', None)

if REDIS_CONNECTION_STRING:
REDIS_HOST, REDIS_PORT, REDIS_PASSWORD, REDIS_DB = parse_redis_connection_string(REDIS_CONNECTION_STRING)
REDIS_DB = env.int('PROXYPOOL_REDIS_DB', env.int('REDIS_DB', 0))
# redis connection string, like redis://[password]@host:port or rediss://[password]@host:port/0,
# please refer to https://redis-py.readthedocs.io/en/stable/connections.html#redis.client.Redis.from_url
REDIS_CONNECTION_STRING = env.str(
'PROXYPOOL_REDIS_CONNECTION_STRING', env.str('REDIS_CONNECTION_STRING', None))

# redis hash table key name
REDIS_KEY = env.str('REDIS_KEY', 'proxies:universal')
REDIS_KEY = env.str('PROXYPOOL_REDIS_KEY', env.str(
'REDIS_KEY', 'proxies:universal'))

# definition of proxy scores
PROXY_SCORE_MAX = 100
Expand Down Expand Up @@ -78,4 +79,3 @@

# logger.add(env.str('LOG_RUNTIME_FILE', join(LOG_DIR, 'runtime.log')), level='DEBUG', rotation='1 week', retention='20 days')
# logger.add(env.str('LOG_ERROR_FILE', join(LOG_DIR, 'error.log')), level='ERROR', rotation='1 week')

20 changes: 14 additions & 6 deletions proxypool/storages/redis.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import redis
from proxypool.exceptions import PoolEmptyException
from proxypool.schemas.proxy import Proxy
from proxypool.setting import REDIS_HOST, REDIS_PORT, REDIS_PASSWORD, REDIS_DB, REDIS_KEY, PROXY_SCORE_MAX, PROXY_SCORE_MIN, \
from proxypool.setting import REDIS_CONNECTION_STRING, REDIS_HOST, REDIS_PORT, REDIS_PASSWORD, REDIS_DB, REDIS_KEY, PROXY_SCORE_MAX, PROXY_SCORE_MIN, \
PROXY_SCORE_INIT
from random import choice
from typing import List
Expand All @@ -18,14 +18,21 @@ class RedisClient(object):
redis connection client of proxypool
"""

def __init__(self, host=REDIS_HOST, port=REDIS_PORT, password=REDIS_PASSWORD, db=REDIS_DB, **kwargs):
def __init__(self, host=REDIS_HOST, port=REDIS_PORT, password=REDIS_PASSWORD, db=REDIS_DB,
connection_string=REDIS_CONNECTION_STRING, **kwargs):
"""
init redis client
:param host: redis host
:param port: redis port
:param password: redis password
:param connection_string: redis connection_string
"""
self.db = redis.StrictRedis(host=host, port=port, password=password, db=db, decode_responses=True, **kwargs)
# if set connection_string, just use it
if connection_string:
self.db = redis.StrictRedis.from_url(connection_string)
else:
self.db = redis.StrictRedis(
host=host, port=port, password=password, db=db, decode_responses=True, **kwargs)

def add(self, proxy: Proxy, score=PROXY_SCORE_INIT) -> int:
"""
Expand All @@ -51,11 +58,13 @@ def random(self) -> Proxy:
:return: proxy, like 8.8.8.8:8
"""
# try to get proxy with max score
proxies = self.db.zrangebyscore(REDIS_KEY, PROXY_SCORE_MAX , PROXY_SCORE_MAX)
proxies = self.db.zrangebyscore(
REDIS_KEY, PROXY_SCORE_MAX, PROXY_SCORE_MAX)
if len(proxies):
return convert_proxy_or_proxies(choice(proxies))
# else get proxy by rank
proxies = self.db.zrevrange(REDIS_KEY, PROXY_SCORE_MIN , PROXY_SCORE_MAX)
proxies = self.db.zrevrange(
REDIS_KEY, PROXY_SCORE_MIN, PROXY_SCORE_MAX)
if len(proxies):
return convert_proxy_or_proxies(choice(proxies))
# else raise error
Expand Down Expand Up @@ -125,4 +134,3 @@ def batch(self, cursor, count) -> List[Proxy]:
conn = RedisClient()
result = conn.random()
print(result)

13 changes: 0 additions & 13 deletions proxypool/utils/parse.py

This file was deleted.

6 changes: 6 additions & 0 deletions proxypool/utils/proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@


def is_valid_proxy(data):
"""
check this string is within proxy format
"""
if data.__contains__(':'):
ip = data.split(':')[0]
port = data.split(':')[1]
Expand All @@ -11,6 +14,9 @@ def is_valid_proxy(data):


def is_ip_valid(ip):
"""
check this string is within ip format
"""
a = ip.split('.')
if len(a) != 4:
return False
Expand Down