# Settings for the MailmanArchiveScraper script.
#
# Copy the MailmanArchiveScraper-example.cfg, rename it MailmanArchiveScraper.cfg
# and put it in the same directory as the scrip itself.
#
# The minimum settings you need to set are:
# 1. domain -- The domain name that your Mailman pages are on.
# 2. list_name -- Name of your mailing list.
# 3. email and password -- Required if your Mailman archive is password protected.
# 4. publish_dir -- The path to the local directory the files should be republished to.
# 5. publish_url -- If you're publishing the messages to a website.
#
# If you want an RSS feed to be created you'll also need to fill in the settings in that section.
###############################################################################
[Mailman]
# Settings associated with the remote server.
# The domain name that your Mailman pages are on.
# eg, in http://lists.example.com/mailman/listinfo/list-name
# it's the 'lists.example.com' part.
domain =
# Name of your mailing list.
# This can be found in the URL of the list info page.
# eg, in http://lists.example.com/mailman/listinfo/list-name
# it's the 'list-name' part.
list_name =
# The email address and password you use to log in to the private Mailman pages.
# If the email is left blank, we assume the list is public and the archives require no login.
email =
password =
###############################################################################
[Conversion]
# Settings about how to filter the pages before saving copies.
# Remove email addresses or not?
# Either 1 or 0 (for yes or no).
filter_email_addresses = 0
# We'll replace all occurrences of the main list info url
# eg, http://lists.example.com/mailman/listinfo/list-name
# or, http://lists.example.com/pipermail/list-name
# with this string. If you don't want it changed, put the main list info url here.
list_info_url =
# Should quoted text be stripped out of messages?
# If so, how many levels?
# 0 - Leave all quoted text as found.
# 1 - Remove all text except the first level of quotes (>)
# 2 - Remove all text except the first two levels (> and >>)
# etc
strip_quotes = 0
# A list of search/replace pairs, one per line, separated by '//'.
# Search strings are case-insensitive.
# This happens before filter_email_addresses is done.
# Lines after the first should be indented with a tab. eg:
# search_replace = Old//New
# Something Old//Something New
# Ancient//Modern
search_replace =
# The path to a file containing HTML. The contents will be inserted in the <head> of every page.
# eg, Use this to insert links to stylesheets, Google Analytics javascript, etc.
head_html =
###############################################################################
[RSS]
# Settings related to the generated RSS file.
# You can leave all these blank if you don't want an RSS feed generated.
# The local path where the RSS file should be generated.
# eg /Users/phil/Sites/lists/html/list-name/rss.xml
rss_file =
# The number of recent messages to be included in the RSS file.
items_for_rss = 7
# The title of the RSS feed.
rss_title =
# The description of the RSS feed (usually one sentence).
rss_description =
###############################################################################
[Local]
# Settings about the local archive.
# The absolute path to a local directory in which all the lists' HTML files will be stored.
# If it doesn't exist the script will try to create it.
# eg /Users/phil/Sites/examplesite/html/list-name/
publish_dir =
# The full web address to the directory referred to by publish_dir.
# eg http://www.example.com/list-name/
# If you're not publishing these on the web, leave this blank.
publish_url =
# How many hours back should we look back for new messages when the script runs?
# Set it to more than however often the script is run via cron.
# eg, if you run the script every hour, set this to maybe 4ish, to allow for times the script might fail.
# Set to 0 to fetch ALL messages. Every single page in the archive. All of them.
hours_to_go_back = 6
# Should we output extra data while the script is running?
# Probably set it to True if you're running on the command line.
# Set to false if running via cron - it'll print something only if something goes wrong.
# 1 or 0
verbose = 1