In [18]:
import requests 
import numpy as np 
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler 
from sklearn.model_selection import train_test_split

pd.options.display.max_columns = None
pd.options.display.max_rows = 100

In [13]:
def get_waterfront(lat,long): 
    '''This functions takes lat and long of a house and returns the waterfrontness value'''
    df = pd.read_csv('../data/NEW.csv')
    lat_range = (df.lat.max() - df.lat.min())
    long_range = (df.long.max() - df.long.min())
    chunk_size = 100
    lat_step = lat_range / chunk_size
    long_step = long_range / chunk_size
    return df[((lat - lat_step) < df.lat) & (df.lat < (lat + lat_step)) &
        ((long - long_step) < df.long) & (df.long < (long + long_step))].waterfront.mean()

In [14]:
def get_coord_water(address): 
    '''This function takes an address that will return the long-lat 
    and the waterfrontness value for the property'''
    apikey = '7YnZN2qCttPFhZdGTY3Oxj9vTxFjibnt'
    url = f'http://www.mapquestapi.com/geocoding/v1/address?key={apikey}&location={address}'
    response = requests.get(url)
    response_json = response.json()
    long = response_json['results'][0]['locations'][0]['displayLatLng']['lng']
    lat = response_json['results'][0]['locations'][0]['displayLatLng']['lat']
    return lat, long

In [15]:
def predict_house_value(street, city, state, zipcode, living_sqft = 0, lot_sqft=0, basement=0, yr_renovated=0, 
                        bedrooms=0, bathrooms=0, floors=0): 
    '''This function will calculate the cost of a house in King County WA given the parameters: 
    livig_sqft, lot_sqft, basement, yr_renovated, bedrooms, bathrooms, floors, street, city, state, zipcode'''
    address = f'{street}, {city}, {state} {zipcode}'
    lat, long = get_coord_water(address)
    waterfrontness = get_waterfront(lat, long)
    df = pd.read_csv('../data/NEW.csv') 
    X = df[['sqft_living', 'sqft_lot', 'basement', 'yr_renovated', 'bedrooms', 'bathrooms', 'floors']]
    y = df[['price']]
    
    linear = LinearRegression() 
    linear.fit(X, y)
    
    house_predict = linear.predict([[living_sqft, lot_sqft, basement, yr_renovated, bedrooms, bathrooms, floors]])
    return house_predict

    
# street = '5104 SW Admiral Way'
# city = 'Seattle' 
# state = 'WA'
# zipcode = '98116'
street = '10411 Maplewood Pl SW'
city = 'Seattle'
state = 'WA'
zipcode = '98146'
predict_house_value(street = street, city = city, state = state, zipcode = zipcode)

0.02564102564102564


Unnamed: 0.1,Unnamed: 0,id,date,price,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,condition,grade,sqft_above,sqft_basement,yr_built,yr_renovated,zipcode,lat,long,sqft_living15,sqft_lot15,basement,basement_ratio,sqft_living15_ratio,sqft_lot15_ratio
0,0,7129300520,10/13/2014,221900.0,3,1.0,1180,5650,1.0,,0.0,3,7,1180,0.0,1955,0.0,98178,47.5112,-122.257,1340,5650,0,0.0,0.881,1.0
1,1,6414100192,12/9/2014,538000.0,3,2.25,2570,7242,2.0,0.0,0.0,3,7,2170,400.0,1951,23.0,98125,47.721,-122.319,1690,7639,1,0.156,1.521,0.948
2,2,5631500400,2/25/2015,180000.0,2,1.0,770,10000,1.0,0.0,0.0,3,6,770,0.0,1933,,98028,47.7379,-122.233,2720,8062,0,0.0,0.283,1.24
3,3,2487200875,12/9/2014,604000.0,4,3.0,1960,5000,1.0,0.0,0.0,5,7,1050,910.0,1965,0.0,98136,47.5208,-122.393,1360,5000,1,0.464,1.441,1.0
4,4,1954400510,2/18/2015,510000.0,3,2.0,1680,8080,1.0,0.0,0.0,3,8,1680,0.0,1987,0.0,98074,47.6168,-122.045,1800,7503,0,0.0,0.933,1.077


In [None]:
assert False

In [None]:
address = '5104 SW Admiral Way, Seattle, WA 9811'
apikey = '7YnZN2qCttPFhZdGTY3Oxj9vTxFjibnt'
url = f'http://www.mapquestapi.com/geocoding/v1/address?key={apikey}&location={address}'
l = url.format(address, apikey)
response = requests.get(l)
response_json = response.json()

In [None]:
print(response_json['results'][0]['locations'][0]['displayLatLng'], response_json.keys())

In [None]:
response_json

In [None]:
import pandas as pd
unk_wf = df[df['waterfront'].isna()]
def estimate_waterfrontness(index):
    df = pd.read_csv('kc_house_data.csv')
    lat_range = (df.lat.max() - df.lat.min())
    long_range = (df.long.max() - df.long.min())
    chunk_size = 100
    lat_step = lat_range / chunk_size
    long_step = long_range / chunk_size
    return df[((df.loc[index, 'lat'] - lat_step) < df.lat) & (df.lat < (df.loc[index, 'lat'] + lat_step)) &
        ((df.loc[index, 'long'] - long_step) < df.long) & (df.long < (df.loc[index, 'long'] + long_step))].waterfront.mean()