In [3]:
from selenium.webdriver import Chrome
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import pymongo
from selenium.common.exceptions import NoSuchElementException
from cost_of_living import *
from functools import reduce
import matplotlib.pyplot as plt
%matplotlib inline
from hidden import debt
from math import cos, sqrt

## Write a function to produce cost-of-living in multiple cities

In [2]:
data = pd.read_excel('data/citydf.xlsx')
data['city'].replace('Washington', "Washington D.C.", inplace=True)

In [4]:
monthly_constants = {
'Cinema, International Release, 1 Seat' : 2,
'Fitness Club, Monthly Fee for 1 Adult' : 2,
'Basic (Electricity, Heating, Cooling, Water, Garbage) for 915 sq ft Apartment' : 1,
'Internet (60 Mbps or More, Unlimited Data, Cable/ADSL)' : 1,
'Gasoline (1 gallon)' : 15,
'Imported Beer (11.2 oz small bottle)' : 10,
'Bottle of Wine (Mid-Range)' : 2,
'Cappuccino (regular)'  : 30,
'Meal, Inexpensive Restaurant' : 8,
'1 Pair of Jeans (Levis 501 Or Similar)' : 1,
'groceries (dollars)': 600
}

def cost_per_city_1B_Center(citydf, monthly_constants):
    for city in citydf.city.unique():
        total = 0
        for item, price in monthly_constants.items():
            if item != 'groceries (dollars)':
                price = float(citydf[(citydf['city']==city) & (citydf['item']==item)].values[0][3]) * price
                total += price
            else:
                total += price
        total += (float(citydf[(citydf['city']==city) & 
                               (citydf['item']=='Apartment (1 bedroom) in City Centre')].values[0][3]))
        result = {
        'city' : city,
        '1_Bed_Center_Total' : total
                  }
        yield result
        
def cost_per_city_3B_Center(citydf, monthly_constants):
    for city in citydf.city.unique():
        total = 0
        for item, price in monthly_constants.items():
            if item != 'groceries (dollars)':
                price = float(citydf[(citydf['city']==city) & (citydf['item']==item)].values[0][3]) * price
                total += price
            else:
                total += price
        total += round((float(citydf[(citydf['city']==city) & 
                               (citydf['item']=='Apartment (3 bedrooms) in City Centre')].values[0][3]) / 3),2)
        result = {
        'city' : city,
        '3_Bed_Center_Total' : total
                  }
        yield result

def cost_per_city_3B_Outside(citydf, monthly_constants):
    for city in citydf.city.unique():
        total = 0
        for item, price in monthly_constants.items():
            if item != 'groceries (dollars)':
                price = float(citydf[(citydf['city']==city) & (citydf['item']==item)].values[0][3]) * price
                total += price
            else:
                total += price
        total += round(((float(citydf[(citydf['city']==city) & 
                               (citydf['item']=='Apartment (3 bedrooms) Outside of Centre')].values[0][3])) / 3),2)
        result = {
        'city' : city,
        '3_Bed_Outside_Total' : total
                  }
        yield result

def cost_per_city_1B_Outside(citydf, monthly_constants):
    for city in citydf.city.unique():
        total = 0
        for item, price in monthly_constants.items():
            if item != 'groceries (dollars)':
                price = float(citydf[(citydf['city']==city) & (citydf['item']==item)].values[0][3]) * price
                total += price
            else:
                total += price
        total += (float(citydf[(citydf['city']==city) & 
                               (citydf['item']=='Apartment (1 bedroom) Outside of Centre')].values[0][3]))
        result = {
        'city' : city,
        '1_Bed_Outside_Total' : total
                  }
        yield result

In [5]:
def find_cost_of_living(data, monthly_constants):
    B1_center = pd.DataFrame(list(cost_per_city_1B_Center(data, monthly_constants)))
    B3_center = pd.DataFrame(list(cost_per_city_3B_Center(data, monthly_constants)))
    B1_Outside = pd.DataFrame(list(cost_per_city_1B_Outside(data, monthly_constants)))
    B3_Outside = pd.DataFrame(list(cost_per_city_3B_Outside(data, monthly_constants)))
    B1_center.set_index('city', inplace=True)
    B3_center.set_index('city', inplace=True)
    B1_Outside.set_index('city', inplace=True)
    B3_Outside.set_index('city', inplace=True)
    dfs = [B1_center, B3_center, B1_Outside, B3_Outside]
    city_matrix = pd.concat(dfs, axis=1)
    city_matrix['city'] = city_matrix.index
    city_matrix.drop(columns='city', inplace=True)
    return city_matrix

In [6]:
find_cost_of_living(data, monthly_constants)

Unnamed: 0_level_0,1_Bed_Center_Total,3_Bed_Center_Total,1_Bed_Outside_Total,3_Bed_Outside_Total
city,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Palo-Alto,4387.48,3127.55,4094.21,2871.99
San-Jose,3714.08,2500.71,3456.4,2415.23
Portland,2864.56,2296.22,2485.55,1980.12
Austin,2969.98,2289.77,2399.66,1943.74
Denver,2907.57,2211.63,2574.14,1982.24
San-Diego,3210.41,2406.1,2804.0,2184.68
Washington D.C.,3517.34,2701.31,3064.0,2314.06
Boston,3821.77,2853.61,3136.72,2316.74
San-Francisco,4806.91,3329.76,4053.38,2852.41
Seattle,3375.61,2609.09,2866.58,2216.39


In [17]:
cities = {'New-York': 'New York, New York', 'San-Francisco' : 'San-Francisco, California', 
          'Honolulu' : 'Honolulu, Hawaii', 'Arlington' : 'Arlington, Virgina', 'Anchorage' : 'Anchorage, Alaska',
          'Brooklyn' : 'Brooklyn, New York', 'Santa-Rosa' : 'Santa-Rosa, California', 'Washington' : 'Washington, D.C.',
          'Charleston' : 'Charleston, South Carolina', 'Berkeley' : 'Berkeley, California', 
          'Seattle' : 'Seattle, Washington', 'Jersey-City': 'Jersey City, New Jersey', 
          'Boston' : 'Boston, Massachusetts', 'Oakland' : 'Oakland, California', 'Hartford' : 'Hartford, Connecticut',
          'Philadelphia' : 'Philadelphia, Pennsylvania', 'Albany' : 'Albany, New York','Long-Beach' : 'Long Beach, California', 
          'Irvine' : 'Irvine, California', 'Miami': 'Miami, Florida', 'Los-Angeles' : 'Los Angeles, California',
          'Boulder' : 'Boulder, Colorado', 'Sacramento' : 'Sacramento, California', 'Chicago' : 'Chicago, Illinois',
          'Greenville' : 'Greenville, South Carolina', 'Burlington' : 'Burlington, Vermont', 'San-Jose' : 'San Jose, California',
          'Everett' : 'Everett, Washington', 'Birmingham' : 'Birmingham, Alabama', 'Pittsburgh' : 'Pittsburgh, Pennsylvania',
          'Minneapolis' : 'Minneapolis, Minnesota', 'Baltimore' : 'Baltimore, Maryland', 'Portland' : 'Portland, Oregon',
          'Rochester' : 'Rochester, New York', 'Charlotte' : 'Charlotte, North Carolina',
          'Santa-Barbara' : 'Santa Barbara, California', 'West-Palm-Beach' : 'West Palm Beach, Florida', 
          'Tacoma' : 'Tacoma, Washington', 'Portland-ME' : 'Portland, Maine', 'Bellingham' : 'Bellingham, Washington',
          'San-Diego' : 'San Diego, California', 'Saint-Paul' : 'Saint Paul, Minnesota', 'Atlanta' : 'Atlanta, Georgia',
          'Asheville' : 'Asheville, North Carolina', 'Eugene' : 'Eugene, Oregon', 'Saint-Petersburg' : 'Saint Petersburg, Florida',
          'Buffalo' : 'Buffalo, New York', 'Peoria' : 'Peoria, Illinois', 'Austin' : 'Austin, Texas',
          'Cleveland' : 'Cleveland, Ohio', 'Jacksonville' : 'Jacksonville, Florida', 'Milwaukee' : 'Milwaukee, Wisconsin',
          'Denver' : 'Denver, Colorado', 'Indianapolis' : 'Indianapolis, Indiana', 'Arlington' : 'Arlington, Texas',
          'Ann-Arbor' : 'Ann Arbor, Michigan', 'Raleigh' : 'Raleigh, North Carolina', 'Orlando' : 'Orlando, Florida',
          'Tampa' : 'Tampa, Florida', 'Saint-Louis' : 'Saint Louis, Missouri', 'Nashville' : 'Nashville, Tennessee',
          'Syracuse' : 'Syracuse, New York', 'Salem' : 'Salem, Oregon', 'Kansas-City' : 'Kansas City, Missouri',
          'Spokane' : 'Spokane, Washington', 'Bakersfield' : 'Bakersfield, California', 'Columbus' : 'Columbus, Ohio',
          'Tallahassee' : 'Tallahassee, Florida', 'Memphis' : 'Memphis, Tennessee', 'Las-Vegas' : 'Las Vegas, Nevada',
          'Columbia' : 'Columbia, South Carolina', 'Dallas' : 'Dallas, Texas', 'Detroit': 'Detroit, Michigan', 
          'Phoenix' : 'Phoenix, Arizona', 'Greensboro' : 'Greensboro, North Carolina', 'Richmond' : 'Richmond, Virgina',
          'Cincinnati' : 'Cincinnati, Ohio', 'Fort Worth' : 'Fort Worth, Texas', 'Fresno' : 'Fresno, California',
          'Grand Rapids' : 'Grand Rapids, Michigan', 'Tucson' : 'Tucson, Arizona', 'Fremont' : 'Fremont, California',
          'Riverside' : 'Riverside, California', 'Albuquerque' : 'Albuquerque, New Mexico', 'Tulsa' : 'Tulsa, Oklahoma',
          'Chattanooga' : 'Chattanooga, Tennessee', 'Louisville' : 'Louisville, Kentucky', 'Salt-Lake-City' : 'Salt Lake City, Utah',
          'Houston' : 'Houston, Texas', 'Oklahoma City' : 'Oklahoma-City, Oklahoma', 'Des-Moines' : 'Des Moines, Iowa',
          'Fayetteville' : 'Fayetteville, Arizona', 'Madison' : 'Madison, Wisconsin', 'Wichita' : 'Wichita, Kansas',
          'San-Antonio' : 'San Antonio, Texas', 'Knoxville' : 'Knoxville, Tennessee', 'Vancouver' : 'Vancouver, Washington',
          'Toledo' : 'Toledo, Ohio', 'Boise' : 'Boise, Idaho', 'Reno' : 'Reno, Nevada', 'Little-Rock' : 'Little Rock, Arizona',
          'Huntsville' : 'Huntsville, Alabama', 'El-Paso' : 'El Paso, Texas', 'Lexington' : 'Lexington, Kentucky'}


In [20]:
city_data = list(scrape_cities(cities.keys()))

In [21]:
citydf = pd.DataFrame(city_data)

In [31]:
def get_city_data(city):
    city_data = pd.DataFrame(list(scrape_cities(city)))
    return city_data

In [37]:
get_city_data(['Washington'])

Unnamed: 0,category,city,item,price
0,Restaurants,Washington,"Meal, Inexpensive Restaurant",16.0
1,Restaurants,Washington,"Meal for 2 People, Mid-range Restaurant, Three...",70.0
2,Restaurants,Washington,McMeal at McDonalds (or Equivalent Combo Meal),8.0
3,Restaurants,Washington,Domestic Beer (1 pint draught),6.0
4,Restaurants,Washington,Imported Beer (11.2 oz small bottle),8.0
5,Restaurants,Washington,Cappuccino (regular),4.13
6,Restaurants,Washington,Coke/Pepsi (11.2 oz small bottle),1.96
7,Restaurants,Washington,Water (11.2 oz small bottle),1.76
8,Markets,Washington,"Milk (regular), (1 gallon)",3.46
9,Markets,Washington,Loaf of Fresh White Bread (1 lb),3.06


In [38]:
get_city_data(['Hoboken'])

Unnamed: 0,category,city,item,price
0,Restaurants,Hoboken,"Meal, Inexpensive Restaurant",13.70
1,Restaurants,Hoboken,"Meal for 2 People, Mid-range Restaurant, Three...",71.75
2,Restaurants,Hoboken,McMeal at McDonalds (or Equivalent Combo Meal),8.00
3,Restaurants,Hoboken,Domestic Beer (1 pint draught),6.00
4,Restaurants,Hoboken,Imported Beer (11.2 oz small bottle),8.00
5,Restaurants,Hoboken,Cappuccino (regular),4.08
6,Restaurants,Hoboken,Coke/Pepsi (11.2 oz small bottle),2.25
7,Restaurants,Hoboken,Water (11.2 oz small bottle),1.67
8,Markets,Hoboken,"Milk (regular), (1 gallon)",3.10
9,Markets,Hoboken,Loaf of Fresh White Bread (1 lb),4.93
