Skip to content

Commit

Permalink
Merge pull request #116 from Lukas0907/master
Browse files Browse the repository at this point in the history
Update ubermedien.de spider and reformat
  • Loading branch information
Lukas0907 committed May 19, 2018
2 parents a5ccb4b + 45b84c3 commit 468d197
Show file tree
Hide file tree
Showing 40 changed files with 1,404 additions and 1,105 deletions.
2 changes: 2 additions & 0 deletions .flake8
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[flake8]
max-line-length = 89
67 changes: 36 additions & 31 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,30 +34,30 @@
extensions = []

# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
templates_path = ["_templates"]

# The suffix(es) of source filenames.
# You can specify multiple suffix as a list of string:
#
# source_suffix = ['.rst', '.md']
source_suffix = '.rst'
source_suffix = ".rst"

# The master toctree document.
master_doc = 'index'
master_doc = "index"

# General information about the project.
project = 'feeds'
copyright = '2017, Florian Preinstorfer, Lukas Anzinger'
author = 'Florian Preinstorfer, Lukas Anzinger'
project = "feeds"
copyright = "2017, Florian Preinstorfer, Lukas Anzinger"
author = "Florian Preinstorfer, Lukas Anzinger"

# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
# built documents.
#
# The short X.Y version.
version = '2017.08.14'
version = "2017.08.14"
# The full version, including alpha/beta/rc tags.
release = '2017.08.14'
release = "2017.08.14"

# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
Expand All @@ -69,10 +69,10 @@
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This patterns also effect to html_static_path and html_extra_path
exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]

# The name of the Pygments (syntax highlighting) style to use.
pygments_style = 'sphinx'
pygments_style = "sphinx"

# If true, `todo` and `todoList` produce output, else they produce nothing.
todo_include_todos = False
Expand All @@ -83,7 +83,7 @@
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
#
html_theme = 'sphinx_rtd_theme'
html_theme = "sphinx_rtd_theme"

# Theme options are theme-specific and customize the look and feel of a theme
# further. For a list of options available for each theme, see the
Expand All @@ -94,28 +94,28 @@
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static']
html_static_path = ["_static"]

# Custom sidebar templates, must be a dictionary that maps document names
# to template names.
#
# This is required for the alabaster theme
# refs: http://alabaster.readthedocs.io/en/latest/installation.html#sidebars
html_sidebars = {
'**': [
'about.html',
'navigation.html',
'relations.html', # needs 'show_related': True theme option to display
'searchbox.html',
'donate.html',
"**": [
"about.html",
"navigation.html",
"relations.html", # needs 'show_related': True theme option to display
"searchbox.html",
"donate.html",
]
}


# -- Options for HTMLHelp output ------------------------------------------

# Output file base name for HTML help builder.
htmlhelp_basename = 'feedsdoc'
htmlhelp_basename = "feedsdoc"


# -- Options for LaTeX output ---------------------------------------------
Expand All @@ -124,15 +124,12 @@
# The paper size ('letterpaper' or 'a4paper').
#
# 'papersize': 'letterpaper',

# The font size ('10pt', '11pt' or '12pt').
#
# 'pointsize': '10pt',

# Additional stuff for the LaTeX preamble.
#
# 'preamble': '',

# Latex figure (float) alignment
#
# 'figure_align': 'htbp',
Expand All @@ -142,19 +139,21 @@
# (source start file, target name, title,
# author, documentclass [howto, manual, or own class]).
latex_documents = [
(master_doc, 'feeds.tex', 'feeds Documentation',
'Florian Preinstorfer, Lukas Anzinger', 'manual'),
(
master_doc,
"feeds.tex",
"feeds Documentation",
"Florian Preinstorfer, Lukas Anzinger",
"manual",
)
]


# -- Options for manual page output ---------------------------------------

# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [
(master_doc, 'feeds', 'feeds Documentation',
[author], 1)
]
man_pages = [(master_doc, "feeds", "feeds Documentation", [author], 1)]


# -- Options for Texinfo output -------------------------------------------
Expand All @@ -163,7 +162,13 @@
# (source start file, target name, title, author,
# dir menu entry, description, category)
texinfo_documents = [
(master_doc, 'feeds', 'feeds Documentation',
author, 'feeds', 'One line description of project.',
'Miscellaneous'),
(
master_doc,
"feeds",
"feeds Documentation",
author,
"feeds",
"One line description of project.",
"Miscellaneous",
)
]
40 changes: 21 additions & 19 deletions docs/generate_spider_documentation_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,35 +4,37 @@


def render_template(spider_name):
conf = 'Configuration'
conf = "Configuration"

lines = ['.. _spider_{}:'.format(spider_name)]
lines.append('')
lines = [".. _spider_{}:".format(spider_name)]
lines.append("")
lines.append(spider_name)
lines.append('-' * len(spider_name))
lines.append('TODO: A summary about this spider with a link to the '
'`website <https://{}>`_.'.format(spider_name))
lines.append('')
lines.append("-" * len(spider_name))
lines.append(
"TODO: A summary about this spider with a link to the "
"`website <https://{}>`_.".format(spider_name)
)
lines.append("")
lines.append(conf)
lines.append('~' * len(conf))
lines.append('Add ``{}`` to the list of spiders:'.format(spider_name))
lines.append('')
lines.append('.. code-block:: ini')
lines.append('')
lines.append(' # List of spiders to run by default, one per line.')
lines.append(' spiders =')
lines.append(' {}'.format(spider_name))
lines.append('')
return '\n'.join(lines)
lines.append("~" * len(conf))
lines.append("Add ``{}`` to the list of spiders:".format(spider_name))
lines.append("")
lines.append(".. code-block:: ini")
lines.append("")
lines.append(" # List of spiders to run by default, one per line.")
lines.append(" spiders =")
lines.append(" {}".format(spider_name))
lines.append("")
return "\n".join(lines)


@click.command()
@click.argument('spider_name')
@click.argument("spider_name")
def main(spider_name):
print(render_template(spider_name))


if __name__ == '__main__':
if __name__ == "__main__":
main()

# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 smartindent autoindent
10 changes: 10 additions & 0 deletions docs/spiders/uebermedien.com.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,14 @@ Add ``uebermedien.com`` to the list of spiders:
spiders =
uebermedien.com
Übermedien_ has a paywall for certain articles. If you want to crawl paid
articles, please provide your Blendle ``username`` and ``password``.

.. code-block:: ini
[uebermedien.de]
username =
password =
.. _Übermedien: http://www.uebermedien.de
7 changes: 7 additions & 0 deletions feeds.cfg.dist
Original file line number Diff line number Diff line change
Expand Up @@ -72,3 +72,10 @@ useragent = feeds (+https://github.com/nblock/feeds)
# wels
# linz
# nachrichten

#[uebermedien.de]
## uebermedien.de has a paywall for certain articles.
## If you want to crawl paid articles, please provide your Steady username
## and password.
# username =
# password =
2 changes: 1 addition & 1 deletion feeds/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '2017.08.14'
__version__ = "2017.08.14"
13 changes: 7 additions & 6 deletions feeds/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,22 +7,23 @@


def _read_meta(root):
with open(os.path.join(root, 'pickled_meta'), 'rb') as f:
with open(os.path.join(root, "pickled_meta"), "rb") as f:
return pickle.load(f)


def cleanup_cache(cache_dir, max_age):
""" Removes cache entries in path that are older than max_age. """

logger.debug('Cleaning cache entries from {} ...'.format(cache_dir))
logger.debug("Cleaning cache entries from {} ...".format(cache_dir))

for root, dirs, files in os.walk(cache_dir, topdown=False):
if 'pickled_meta' in files:
if "pickled_meta" in files:
meta = _read_meta(root)
timestamp = datetime.fromtimestamp(meta['timestamp'])
timestamp = datetime.fromtimestamp(meta["timestamp"])
if timestamp < max_age:
logger.debug('Removing cache entry for URL {}'.format(
meta['response_url']))
logger.debug(
"Removing cache entry for URL {}".format(meta["response_url"])
)
for name in files:
os.remove(os.path.join(root, name))
os.rmdir(root)
Expand Down

0 comments on commit 468d197

Please sign in to comment.