From d1230cf4265f05dcdb0dd034ee41d9b81b19b95a Mon Sep 17 00:00:00 2001
From: Serdar Tumgoren <tumgorenz@washpost.com>
Date: Tue, 24 Jan 2012 23:47:47 -0500
Subject: [PATCH] partial work on fec efiling scrape tutorial that demonstrates
 POST request

---
 tutorials/webscraping101/fec_efiles_scrape.py | 61 +++++++++++++++++++
 1 file changed, 61 insertions(+)
 create mode 100644 tutorials/webscraping101/fec_efiles_scrape.py

diff --git a/tutorials/webscraping101/fec_efiles_scrape.py b/tutorials/webscraping101/fec_efiles_scrape.py
new file mode 100644
index 0000000..50ec0c4
--- /dev/null
+++ b/tutorials/webscraping101/fec_efiles_scrape.py
@@ -0,0 +1,61 @@
+#!/usr/bin/env python
+"""
+The third scrape in our series demonstrates how to fetch data from
+a remote server by making a POST request.
+
+For this scrape, we'll request a list of campaign finance filing 
+links from the Federal Election Election Commission. The form for 
+these electronic filings is found at the below link:
+
+    http://fec.gov/finance/disclosure/efile_search.shtml
+
+"""
+#TODO: add documentation links for language features and libs
+import sys
+
+import requests
+from BeautifulSoup import BeautifulSoup
+
+# Build a dictionary containing our form field values
+form_data = {
+    'name':'Romney', # committee name field
+    'type':'P',      # committee type is P for Presidential
+    'frmtype':'F3P', # form type
+}
+
+# Make the POST request by passing in our form data. This should 
+# return a response object that contains status codes for your request and the
+# raw HTML of the page.
+response = requests.post('http://query.nictusa.com/cgi-bin/dcdev/forms/', data=form_data)
+
+# If the response is OK, then process the HTML
+if response.status_code == 200:
+
+    # the raw HTML is stored in the response object's "text" attribute
+    soup = BeautifulSoup(response.text)
+    links = soup.findAll('a') 
+
+    # Extract the download links 
+    download_links = []
+    for link in links:
+        if link.text == 'Download':
+            download_links.append(link)
+
+    #NOTE: You can tighten up the above code by leveraging BeautifulSoup's 
+    # more advanced features, which allow you to filter the results of the 
+    # "findAll" method by using regular expressions or lambda functions. 
+    #
+    # Below, we use a lambda function to filter for links with "href" 
+    # attributes starting with a certain URL path:
+
+    #download_links = soup.findAll('a', href=lambda path: path.startswith('/cgi-bin/dcdev/forms/DL/'))
+     
+    # To learn more: 
+    # http://www.crummy.com/software/BeautifulSoup/documentation.html#The basic find method: findAll(name, attrs, recursive, text, limit, **kwargs)
+    # http://stackoverflow.com/questions/890128/python-lambda-why
+    # http://docs.python.org/howto/regex.html
+
+else:
+    # Gracefully exit the program if response code is not 200
+    sys.exit("Response code not OK: %s" % response.status_code)
+