In [10]:
# City-Data.com scraper, only for bigger cities (6000+ residents)
import re
import requests
from bs4 import BeautifulSoup

# Some constants
PARSER = 'lxml'
CITY_DATA_URL = 'http://www.city-data.com'
HOUSE_INC, CAPITA_INC, CONDO_VAL, RENT_VAL, COL_INDEX, LAT, LNG = [0, 1, 2, 3, 4, 5, 6]

# Regular Expressions
regex_household_income = re.compile(r'household.+\$([\d,]+).+\$([\d,]+)')
regex_percapita_income = re.compile(r'capita.+\$([\d,]+).+\$([\d,]+)')
regex_condo_value = re.compile(r'condo.+\$([\d,]+).+\$([\d,]+)')
regex_rent_value = re.compile(r'\$([\d,]+)')
regex_float = re.compile(r'(\d+\.\d+)')

# Dictionary to hold the city data
incomes_col_by_state = {}

# Get the list of states table from the home page
result = requests.get(CITY_DATA_URL)
soup_home = BeautifulSoup(result.content, PARSER)
state_table = soup_home.find('table')

n = 0

# Visit each state page
for state in state_table.find_all('a'):
    state_url = CITY_DATA_URL + state['href']

    # Get the list of cities on this state page
    result = requests.get(state_url)
    soup_state = BeautifulSoup(result.content, PARSER)
    city_table = soup_state.find('table', id='cityTAB')
    
    # Visit each (major) city page
    for city in city_table.find_all('a'):
        if 'javascript' not in city['href']:
            city_url = CITY_DATA_URL + '/city/' + city['href']

            # Get the city page
            result = requests.get(city_url)
            soup_city = BeautifulSoup(result.content, PARSER)
            
            city_name, state_name = soup_city.find('h1', {'class' : 'city'}).text.split(',')
            city_name = city_name.strip()
            state_name = state_name.strip()
            
            if state_name not in incomes_col_by_state:
                incomes_col_by_state[state_name] = {}
            
            incomes_col_by_state[state_name][city_name] = {}
            
            # Find information about cities we're after
            median_income = soup_city.find('section', id='median-income')
            median_rent = soup_city.find('section', id='median-rent')
            col_index = soup_city.find('section', id='cost-of-living-index')
            coordinates = soup_city.find('section', {'class' : 'coordinates'})
            
            if median_income:
                median_income = median_income.text
                print(median_income)
                median_household_income = re.search(regex_household_income,
                                                    median_income).group(1)
                median_percapita_income = re.search(regex_percapita_income,
                                                    median_income).group(1)
                median_condo_value = re.search(regex_condo_value,
                                               median_income).group(1)
                incomes_col_by_state[state_name][city_name][HOUSE_INC] = median_household_income
                incomes_col_by_state[state_name][city_name][CAPITA_INC] = median_percapita_income
                incomes_col_by_state[state_name][city_name][CONDO_VAL] = median_condo_value
            if median_rent:
                median_rent = median_rent.text
                median_rent_value = re.search(regex_rent_value, median_rent).group(1)
                incomes_col_by_state[state_name][city_name][RENT_VAL] = median_rent_value
            if col_index:
                col_index = col_index.text
                col_index = re.search(regex_float, col_index).group(1)
                incomes_col_by_state[state_name][city_name][COL_INDEX] = col_index
            if coordinates:
                coordinates = coordinates.text
                lat, lng = re.findall(regex_float, coordinates)
                incomes_col_by_state[state_name][city_name][LAT] = lat
                incomes_col_by_state[state_name][city_name][LNG] = lng
            
            n += 1
            
            if n%10==0: print(str(n) + ' cities processed...')
    
print('done.')

Estimated median household income in 2013: $79,045 (it was $55,546 in 2000)
Anchorage:$79,045AK:$72,237
Estimated per capita income in 2013: $36,344 (it was $25,287 in 2000)
Anchorage municipality income, earnings, and wages data
Estimated median house or condo value in 2013: $295,500 (it was $152,300 in 2000)
Anchorage:$295,500AK:$254,000
Mean prices in 2013: All housing units: $315,059; Detached houses: $352,041; Townhouses or other attached units: $247,904; In 2-unit structures: $256,236; In 3-to-4-unit structures: $273,367; In 5-or-more-unit structures: $143,112; Mobile homes: $48,245
Estimated median household income in 2015: $72,975
Badger:$72,975AK:$73,355
Estimated per capita income in 2015: $33,553
Badger CDP income, earnings, and wages data
Estimated median house or condo value in 2015: $229,300
Badger:$229,300AK:$259,600
Mean prices in 2015: All housing units: $225,681; Detached houses: $239,271; Townhouses or other attached units: $213,598; In 2-unit structures: $241,931; I

AttributeError: 'NoneType' object has no attribute 'group'