Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Refactor to use generators and support arbitrarily sized limits
- Loading branch information
Daniel
committed
Mar 2, 2017
1 parent
0b28ec3
commit 11832e5
Showing
5 changed files
with
137 additions
and
75 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,2 @@ | ||
from .version import __version__ | ||
from .base import comment_search, comment_fetch, submission_search, submission_activity | ||
from .base import comment_fetch, comment_search, submission_activity, submission_search |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,90 +1,80 @@ | ||
import requests | ||
import praw | ||
|
||
try: | ||
from urllib import urlencode | ||
except ImportError: | ||
from urllib.parse import urlencode | ||
from .endpoints import ENDPOINTS, BASE_ADDRESS, LIMIT_MAX, LIMIT_DEFAULT | ||
|
||
|
||
def limit_chunk(limit, limit_max): | ||
"""Return a list of limits given a maximum that can be requested per API | ||
request | ||
:param limit: The total number of items requested | ||
:param limit_max: The maximum number of items that can be requested at once | ||
""" | ||
limits = [] | ||
x = 0 | ||
|
||
while x < limit: | ||
limits.append(min(limit_max, limit - x)) | ||
x += limit_max | ||
|
||
return limits | ||
|
||
|
||
def sort_type(value): | ||
directions = ['asc', 'desc'] | ||
|
||
if value in directions: | ||
return value | ||
else: | ||
raise ValueError('Value must be one of: {}'.format(directions)) | ||
|
||
|
||
base_address = 'https://apiv2.pushshift.io/reddit' | ||
endpoints = { | ||
'comment_search': { | ||
'params': { | ||
'q': str, | ||
'subreddit': str, | ||
'limit': int, | ||
'sort': sort_type, | ||
'after': int, | ||
'before': int | ||
}, | ||
'return_type': praw.models.Comment, | ||
'url': '/search/comment/' | ||
}, | ||
'comment_fetch': { | ||
'params': { | ||
'author': str, | ||
'after': int, | ||
'before': int, | ||
'limit': int, | ||
'subreddit': str, | ||
'sort': sort_type | ||
}, | ||
'return_type': praw.models.Comment, | ||
'url': '/comment/fetch/' | ||
}, | ||
'submission_search': { | ||
'params': { | ||
'q': str, | ||
'subreddit': str, | ||
'limit': int, | ||
'sort': sort_type, | ||
'after': int | ||
}, | ||
'return_type': praw.models.Submission, | ||
'url': '/search/submission/' | ||
}, | ||
'submission_activity': { | ||
'params': { | ||
'limit': int, | ||
'before': int, | ||
'after': int | ||
}, | ||
'return_type': praw.models.Submission, | ||
'url': '/submission/activity/' | ||
} | ||
} | ||
def coerce_kwarg_types(kwargs, param_types): | ||
"""Return a dict with its values converted to types specified in param_types | ||
:param kwargs: The dict of parameters passed to the endpoint function | ||
:param param_types: The dict of all valid parameters and their types (taken | ||
from the 'param' key of the endpoint config) | ||
""" | ||
try: | ||
return {key: param_types[key](value) for key, value in list(kwargs.items())} | ||
except KeyError as e: | ||
raise ValueError('{} parameter is not accepted'.format(e.args[0])) | ||
|
||
|
||
def create_endpoint_function(name, config): | ||
"""Dynamically create a function that handles a single API endpoint | ||
:param name: The name of the endpoint, which will also become the name of | ||
the function | ||
:param config: The configuration of the API endpoint | ||
""" | ||
def endpoint_func(r, **kwargs): | ||
coerced_kwargs = {} | ||
"""Placeholder that becomes an endpoint handler through closure | ||
:param r: A reddit session object that is passed to instantiated | ||
Comment or Submission objects | ||
:param **kwargs: Query parameters passed to the API endpoint | ||
""" | ||
coerced_kwargs = coerce_kwarg_types(kwargs, config['params']) | ||
direction = 'before' | ||
|
||
if 'limit' not in coerced_kwargs: | ||
coerced_kwargs['limit'] = LIMIT_DEFAULT | ||
if coerced_kwargs.get('sort', None) == 'asc': | ||
direction = 'after' | ||
|
||
for limit in limit_chunk(coerced_kwargs['limit'], LIMIT_MAX): | ||
coerced_kwargs['limit'] = limit | ||
url = '{}{}?{}'.format(BASE_ADDRESS, config['url'], urlencode(coerced_kwargs)) | ||
data = requests.get(url).json()['data'] | ||
|
||
for item in data: | ||
yield config['return_type'](r, _data=item) | ||
|
||
for key, value in list(kwargs.items()): | ||
try: | ||
coerced_kwargs[key] = config['params'][key](value) | ||
except KeyError: | ||
raise ValueError( | ||
'{} parameter is not accepted by {} endpoint'. | ||
format(key, name) | ||
) | ||
if len(data) < limit: | ||
raise StopIteration | ||
|
||
query_params = '?{}'.format(urlencode(coerced_kwargs)) | ||
resp = requests.get('{}{}{}'.format(base_address, config['url'], query_params)) | ||
return [config['return_type'](r, _data=x) for x in resp.json()['data']] | ||
coerced_kwargs[direction] = data[-1]['created_utc'] | ||
|
||
endpoint_func.__name__ = name | ||
return endpoint_func | ||
|
||
|
||
for name, config in list(endpoints.items()): | ||
for name, config in list(ENDPOINTS.items()): | ||
globals()[name] = create_endpoint_function(name, config) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
import praw | ||
|
||
|
||
def sort_type(value): | ||
"""Ensures values passed in are one of the valid set""" | ||
directions = {'asc', 'desc'} | ||
|
||
if value in directions: | ||
return value | ||
else: | ||
raise ValueError('Value must be one of: {}'.format(directions)) | ||
|
||
|
||
LIMIT_MAX = 500 | ||
LIMIT_DEFAULT = 50 | ||
BASE_ADDRESS = 'https://apiv2.pushshift.io/reddit' | ||
ENDPOINTS = { | ||
'comment_fetch': { | ||
'params': { | ||
'after': int, | ||
'author': str, | ||
'before': int, | ||
'limit': int, | ||
'sort': sort_type, | ||
'subreddit': str | ||
}, | ||
'return_type': praw.models.Comment, | ||
'url': '/comment/fetch/' | ||
}, | ||
'comment_search': { | ||
'params': { | ||
'after': int, | ||
'before': int, | ||
'limit': int, | ||
'q': str, | ||
'sort': sort_type, | ||
'subreddit': str | ||
}, | ||
'return_type': praw.models.Comment, | ||
'url': '/search/comment/' | ||
}, | ||
'submission_activity': { | ||
'params': { | ||
'after': int, | ||
'before': int, | ||
'limit': int | ||
}, | ||
'return_type': praw.models.Submission, | ||
'url': '/submission/activity/' | ||
}, | ||
'submission_search': { | ||
'params': { | ||
'after': int, | ||
'before': int, | ||
'limit': int, | ||
'q': str, | ||
'sort': sort_type, | ||
'subreddit': str | ||
}, | ||
'return_type': praw.models.Submission, | ||
'url': '/search/submission/' | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
__version__ = '0.0.2' | ||
__version__ = '0.1.0' |