Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

partial port of fdic bootcamp code

  • Loading branch information...
commit 0ce6889aa61af6525ca2282933e64e08398fb982 1 parent fba1138
@zstumgoren zstumgoren authored
View
2  projects/fdic/.gitignore
@@ -0,0 +1,2 @@
+*.vim
+*.pyc
View
96 projects/fdic/save_to_csv.py
@@ -3,90 +3,84 @@
This module shows how to use the built-in csv module to
easily write out data to a file.
-
"""
+import csv
+import os
+from datetime import datetime
-# User variables
-savedir = 'C:\\data\\Python\\'
-outputheaders = ['bank', 'city', 'state', 'cert_num', 'acq_inst',
- 'closed', 'updated', 'url']
+# Import our scraper function to get the data
+from scraper import scrape_data
-# Import module created in Part I
-# from scraper import scrape_data
-
-# Import datetime modules
-from datetime import datetime
-import csv
+# Import our dynamically calculated project directory
+# It's a bit of magic that makes this code work on Macs, Windows, and Linux :)
+from settings import PROJECT_DIR
# Function to change date strings to YYYY-MM-DD format
def convertdatestring(datestring):
- # Create variable for our return value
- ret_date = ''
try:
dt = datetime.strptime(datestring, '%B %d, %Y')
ret_date = dt.strftime('%Y-%m-%d')
except ValueError:
print("Can't convert %s to date. Setting to NULL." % datestring)
- pass
-
return ret_date
-# Store the results of the scrape_data function
-# Results are dictionaries that look like below
+# Results is a list that includes our column headers and a list of data
+results = scrape_data()
+headers = results[0]
+data = results[1]
+
+"""
+The results are list of data rows that look like below:
data = [
- {
- 'bank': 'First Alliance',
- 'city': 'Manchester',
- 'state': 'NH',
- 'cert_num': '34264',
- 'acq_inst': 'Southern New Hampshire Bank & Trust',
- 'closed': 'February 15, 2013',
- 'updated': 'February 20, 2013',
- 'url': 'http://www.fdic.gov/bank/individual/failed/firstalliance.html'
- },
- {
- 'bank': 'First Alliance',
- 'city': 'Manchester',
- 'state': 'NH',
- 'cert_num': '34264',
- 'acq_inst': 'Southern New Hampshire Bank & Trust',
- 'closed': 'February 15, 2013',
- 'updated': 'February 20, 2013',
- 'url': 'http://www.fdic.gov/bank/individual/failed/firstalliance.html'
- }
+ [
+ 'First Alliance',
+ 'Manchester',
+ 'NH',
+ '34264',
+ 'Southern New Hampshire Bank & Trust',
+ 'February 15, 2013',
+ 'February 20, 2013',
+ 'http://www.fdic.gov/bank/individual/failed/firstalliance.html'
+ ],
]
-
-# data = scrape_data()
+"""
# Let's mess up one row to demo try/except:
-# data[0]['closed'] = 'Jnauary 15, 2013'
+# data[0][5] = 'Jnauary 15, 2013'
# Iterate through each row of our data and verify data types valid
for row in data:
- # First, we'll verify cert_num is an integer
+ # First, we'll convert cert_num to an integer
try:
- row['cert_num'] = int(row['cert_num'])
+ row[3] = int(row[3])
except ValueError:
- print("%s is not a valid integer. Setting to zero." % row['cert_num'])
- row['cert_num'] = 0
+ print("%s is not a valid integer. Setting to zero." % row[3])
+ row[3] = 0
# Now we'll look at the two date fields. This is a little more
# complicated, so we'll create a function that we can use for
# both fields. We need to convert them to YYYY-MM-DD format.
try:
- row['closed'] = convertdatestring(row['closed'])
+ row[5] = convertdatestring(row[5])
except:
- row['closed'] = ''
+ row[5] = ''
try:
- row['updated'] = convertdatestring(row['updated'])
+ row[6] = convertdatestring(row[6])
except:
- row['updated'] = ''
+ row[6] = ''
+
+filename = os.path.join(PROJECT_DIR, 'fdic_output.txt')
+
+# This is a Python idiom you'll see often.
+# You're opening a file so that you can read data from it.
+# Then, you use the csv module to help write the data to a file
+# http://docs.python.org/2/library/csv.html#csv.DictReader
-with open(savedir + 'fdic_output.txt', 'w') as outputfile:
- wtr = csv.DictWriter(outputfile, delimiter='|', fieldnames=outputheaders,
- lineterminator='\n', quotechar='"', quoting=csv.QUOTE_NONNUMERIC)
+with open(filename, 'wb') as outputfile:
+ wtr = csv.DictWriter(outputfile, delimiter='|', fieldnames=headers,
+ lineterminator='\n', quotechar='"', quoting=csv.QUOTE_NONNUMERIC)
# Add headers to output
wtr.writeheader()
View
70 projects/fdic/save_to_db.py
@@ -1,35 +1,47 @@
-# Load fdic data into sqlite
+"""
+Load fdic data into sqlite
+"""
+import os
+import sqlite3
-# User variables
-csvfile = 'C:\\data\\Python\\fdic_output.txt'
+# Import our dynamically calculated project directory
+# It's a bit of magic that makes this code work on Macs, Windows, and Linux :)
+from settings import PROJECT_DIR
-# Import needed libraries
-import csv
-import sqlite3
+# Create a SQLite database in our project directory
+db_file = os.path.join(PROJECT_DIR, 'bootcamp.sqlite')
+
+# Now we're ready to connect to the database
+# http://docs.python.org/2/library/sqlite3.html
+conn = sqlite3.connect(db_file)
+
+# Once we're connected, we get a database "cursor"
+# (which let's you send SQL statements to the database)
+cur = conn.cursor()
+
+# Here's the SQL to create our database table
+TBL_CREATE_STMT = """
+ CREATE TABLE IF NOT EXISTS failed_banks (
+ bank varchar (54) NOT NULL,
+ city varchar (17) NOT NULL,
+ tate varchar (4) NOT NULL,
+ cert_num INTEGER NOT NULL,
+ acq_inst VARCHAR (65) NOT NULL,
+ closed DATE NOT NULL,
+ updated DATE NOT NULL,
+ url VARCHAR (100) NOT NULL
+ )
+"""
+
+# Execute the create table sql
+cur.execute(TBL_CREATE_STMT)
+# Commit our change
+conn.commit()
-# Create the database and the table if don't already exist
-conn = sqlite3.connect('C:\\data\\python\\bootcamp.db')
-cur = conn.cursor() # This creates a cursor
-cur.execute('CREATE TABLE IF NOT EXISTS failed_banks (' \
- 'bank varchar (54) NOT NULL, ' \
- 'city varchar (17) NOT NULL, ' \
- 'state varchar (4) NOT NULL, ' \
- 'cert_num INTEGER NOT NULL, ' \
- 'acq_inst VARCHAR (65) NOT NULL, ' \
- 'closed DATE NOT NULL, ' \
- 'updated DATE NOT NULL, ' \
- 'url VARCHAR (100) NOT NULL' \
- ')')
-conn.commit() # Commit our change
-
-# Now let's add our data
-# Open and parse the file
-with open(csvfile, 'r') as data:
- rdr = csv.DictReader(data, delimiter='|', lineterminator='\n', quotechar='"')
- to_db = [(i['bank'], i['city'], i['state'], i['cert_num'], i['acq_inst'],
- i['closed'], i['updated'], i['url']) for i in rdr]
cur.executemany('INSERT INTO failed_banks (bank, city, state, cert_num, acq_inst, ' \
'closed, updated, url) VALUES (?, ?, ?, ?, ?, ?, ?, ?);', to_db)
-conn.commit() # Commit our inserts
-conn.close() # Close db connection
+# Commit our inserts
+conn.commit()
+# Close db connection
+conn.close()
View
44 projects/fdic/scraper.py
@@ -57,26 +57,34 @@ def scrape_data():
# Extract data points from the table row
data = tr.findAll('td')
- # Pluck out the text of each field and store as a
- # separate key in a dictionary
- # http://docs.python.org/2/tutorial/datastructures.html#dictionaries
- row = {
- 'bank_name': data[0].text,
- 'city': data[1].text,
- 'state': data[2].text,
- 'cert_num': data[3].text,
- 'acq_inst': data[4].text,
- 'closed': data[5].text.strip(),
- 'updated': data[6].text.strip(),
- 'url': 'http://www.fdic.gov/bank/individual/failed/' + data[0].a['href'],
- }
- # Add the dictionary to our final set of results
+ # Pluck out the text of each field, and perform a bit of clean-up
+ row = [
+ data[0].text,
+ data[1].text,
+ data[2].text,
+ data[3].text,
+ data[4].text,
+ data[5].text.strip(),
+ data[6].text.strip(),
+ 'http://www.fdic.gov/bank/individual/failed/' + data[0].a['href'],
+ ]
+ # Add the list of data to our results list (we'll end up with a list of lists)
results.append(row)
- # Return the results
- return results
+ # Let's package up the results with the field names
+ headers = [
+ 'bank_name',
+ 'city',
+ 'state',
+ 'cert_num',
+ 'acq_inst',
+ 'closed',
+ 'updated',
+ 'url'
+ ]
+ return [headers, results]
if __name__ == '__main__':
results = scrape_data()
- for row in results:
- print row['url']
+ for row in results[1]:
+ print row
View
15 projects/fdic/settings.py
@@ -0,0 +1,15 @@
+"""
+This module contains code useful for general project-wide housekeeping.
+"""
+from os.path import abspath, dirname
+
+# Use some Python magic to dynamically determine the project directory.
+# __file__ is a special Python attribute that references the current
+# file. So in this case, we get the full path to "constants.py" (minus the actual file name)
+# We'll use this later to build the path to our output csv.
+PROJECT_DIR = abspath(dirname( __file__))
+
+# Alternatively, you could hard-code the path:
+# WINDOWS_PROJECT_DIR = 'C:\\Documents and Settings\janedoe\fdic'
+# MAC_PROJECT_DIR = '/Users/janedoe/fdic'
+# LINUX_PROJECT_DIR = '/home/janedoe/fdic'
Please sign in to comment.
Something went wrong with that request. Please try again.