In [1]:
import pandas as pd
import ipywidgets as widgets
from ipywidgets import fixed
import plotly.express as px
import seaborn as sns
import numpy as np
from multiprocessing import Pool
from geopy.geocoders import Nominatim
import defs
from matplotlib import pyplot as plt
from matplotlib import gridspec
np.set_printoptions(suppress=True)
pd.set_option('display.float_format', '{:.2f}'.format)
from IPython.core.display import HTML


In [2]:
def jupyter_settings():
    %matplotlib inline
    %pylab inline
    plt.style.use('bmh')
    plt.rcParams['figure.figsize'] = [20,10]
    plt.rcParams['font.size'] = 24
    display(HTML('<style>.container{width:100% !important; }</style>]'))
    pd.options.display.max_columns = None
    pd.options.display.max_rows = None
    pd.set_option( 'display.expand_frame_repr', False)
    sns.set()
jupyter_settings()

%pylab is deprecated, use %matplotlib inline and import the required libraries.
Populating the interactive namespace from numpy and matplotlib


In [3]:
data = pd.read_csv('archive/kc_house_data.csv')
api = pd.read_csv('archive/lat_long.csv', low_memory=False)

In [4]:
data['price_level'] = data['price'].apply(lambda x : 0 if 0 < x < 321950 else
                                         1 if 321950 < x < 450000 else
                                         2 if 450000 < x < 645000 else 3)
data[['price_level', 'price']].groupby('price_level').mean().reset_index()

Unnamed: 0,price_level,price
0,0,251544.62
1,1,383596.86
2,2,538941.84
3,3,968416.68


In [5]:
data['sqft_living_size'] = data['sqft_living'].apply(lambda x : 0 if 0 < x < 1427 else
                                                    1 if 1427 < x < 1910 else
                                                    2 if 1910 < x < 2550 else
                                                    3)

In [6]:
data[['sqft_living','sqft_living_size']].groupby('sqft_living_size').mean().reset_index()

Unnamed: 0,sqft_living_size,sqft_living
0,0,1123.78
1,1,1661.24
2,2,2207.12
3,3,3297.95


In [7]:
style = {'description_width': 'initial'}
sqft_living_maximum = widgets.IntSlider(
    value = 1600,
    min = 290,
    max = 13540,
    step = 1,
    description = 'Sqft living maximum',
    disable = False,
    style = style
    )
minimun_bathrooms = widgets.FloatSlider(
    value = 1.0,
    min = 0,
    max = 8.0 , 
    step = 1.0,
    description = 'minimun bathrooms',
    disable = False,
    style = style)
price_limit = widgets.IntSlider(
    value = 540000,
    min = 75000,
    max = 77000000,
    step = 1,
    description = 'Maximun Price',
    disable = False,
    style = style
    )
sqft_basement_limit = widgets.IntSlider(
    value = 100,
    min = 0,
    max = 4820,
    step = 1,
    description = 'Maximun SQFT basement',
    disable = False,
    style = style
    )
condition_filter =widgets.Dropdown(
    options = data['condition'].sort_values().unique().tolist(),
    value = 2,
    description = 'Condition',
    disable = False
    )
yr_built = widgets.IntRangeSlider(   
    value=[1950, 2015],
    min=1900,
    max=2015,
    step=1,
    description='Year of Built',
    disabled=False,
    continuous_update=False
)

In [8]:
def update_map(data,living,bathrooms,price,basement,condition,built):
    houses_filter = data[(data['sqft_living'] <= living) & 
                        (data['bathrooms'] >= bathrooms) &
                        (data['price'] <= price) &
                        (data['sqft_basement'] <= basement) & 
                        (data['condition'] == condition) &
                        (data['yr_built'] <= built[1]) &
                        (data['yr_built'] >= built[0])][['id','lat','long','price','condition','yr_built']]
    
    fig = px.scatter_mapbox(houses_filter,
                           lat = 'lat',
                           lon = 'long',
                           color = 'yr_built',
                           size = 'price',
                           color_continuous_scale= px.colors.sequential.thermal,
                           size_max = 15,
                           zoom = 9,
                           hover_name = 'yr_built' 
                           )
    fig.update_layout(mapbox_style = 'open-street-map')
    fig.update_layout(height = 600, margin = {'t':0,'b':0,'r':0,'l':0})
    fig.show()

In [9]:
widgets.interactive(update_map, data=fixed(data),
                   living = sqft_living_maximum,
                   bathrooms = minimun_bathrooms,
                   price = price_limit,
                   basement = sqft_basement_limit,
                   condition = condition_filter,
                   built = yr_built)

interactive(children=(IntSlider(value=1600, description='Sqft living maximum', max=13540, min=290, style=Slide…

In [10]:
# sns.lineplot(x = , y = , data = , ax = )

In [11]:
data['querry'] = data[['lat','long']].apply(lambda x : f'{x["lat"]},{x["long"]}', axis=1)

In [12]:
# def get_data(x):
#     geolocator = Nominatim( user_agent = 'geoapiExercises', timeout=10)
#     time.sleep(1)
#     index, row = x
#     geolocation = row['querry']
#     data = geolocator.reverse(geolocation).raw
#     address = data['address']
#     place_id = data['place_id']
#     osm_type = data['osm_type']
#     country = address['country']
#     country_code = address['country_code']
#     return place_id, osm_type, country, country_code
    

In [13]:
p = Pool(3)
df1 = data[['id','querry']].sample(10)

In [14]:
df1[['place_id', 'osm_type', 'country', 'country_code']] = p.map(defs.get_data, df1.iterrows())
df1

Unnamed: 0,id,querry,place_id,osm_type,country,country_code
4164,8682290100,"47.7225,-122.029",250160271,way,United States,us
17750,50300090,"47.3684,-122.073",230582444,way,United States,us
4552,5014000215,"47.5694,-122.395",158445595,way,United States,us
12188,5350201180,"47.6134,-122.282",156145550,way,United States,us
20391,126039467,"47.7747,-122.366",249974375,way,United States,us
14508,7697870530,"47.3674,-122.182",231806949,way,United States,us
16217,6073500190,"47.697,-122.39",154208032,way,United States,us
21509,148000072,"47.5779,-122.409",237408211,way,United States,us
18027,3300701185,"47.6917,-122.38",155594529,way,United States,us
4542,3124089086,"47.5163,-121.829",210392582,way,United States,us


In [15]:
data['date'] = pd.to_datetime(data['date']).dt.strftime( '%Y-%m-%d' )
data['year'] = pd.to_datetime(data['date']).dt.strftime('%Y')
data['year_week'] = pd.to_datetime(data['date']).dt.strftime('%Y-%U' )
# creating filters
date_limit = widgets.SelectionSlider(options = data['date'].sort_values().unique().tolist(),
                                      value = '2014-12-01',
                                      description =  'Max avaliable date',
                                      disable = False,
                                      continuos_update = False,
                                      style = {'description_width':'initial'},
                                      redout = True)


year_limit = widgets.SelectionSlider(options = data['yr_renovated'].sort_values().unique().tolist(),
                                      value = 2000,
                                      description =  'Max year renovated',
                                      disable = False,
                                      continuos_update = False,
                                      style = {'description_width':'initial'},
                                      redout = True)


waterfront_limit = widgets.Checkbox(
    value = False,
    description = 'Is waterfront',
    disable = False,
    indent = False
)


In [16]:
def update_graph(data, date_limit, year_limit, waterfront_limit):
    #filter data
    data_filtered = data[(data['date']<= date_limit)&
                         (data['yr_renovated'] >= year_limit)&
                         (data['waterfront'] == waterfront_limit)].copy()
    
    fig = plt.figure( figsize = (24,12))
    specs = gridspec.GridSpec(ncols = 2, nrows = 2, figure = fig)
    
    ax1 = fig.add_subplot ( specs [ 0 , : ] )
    ax2 = fig.add_subplot ( specs [ 1 , 0 ] )
    ax3 = fig.add_subplot ( specs [ 1 , 1 ] )
    
    by_year = data_filtered[['id', 'year']].groupby('year').count().reset_index()
    sns.barplot(x ='year' , y = 'id', data = by_year, ax = ax1 )
    
    
    
    by_day = data_filtered[['price','date']].groupby('date').mean().reset_index()
    sns.lineplot(x='date',y='price',data=by_day,ax =ax2)
    plt.xticks(rotation = 60);
   
    by_week = data_filtered[['price','year_week']].groupby('year_week').mean().reset_index()
    sns.lineplot(x='year_week',y='price', data=by_week, ax =ax3)

In [17]:
widgets.interactive(update_graph,
                   data = fixed(data),
                   date_limit = date_limit,
                   year_limit = year_limit,
                   waterfront_limit = waterfront_limit)

interactive(children=(SelectionSlider(description='Max avaliable date', index=212, options=('2014-05-02', '201…

In [18]:
num_attributes = data.select_dtypes( include='float' )
print(num_attributes.head())
data.head()

      price  bathrooms  floors   lat    long
0 221900.00       1.00    1.00 47.51 -122.26
1 538000.00       2.25    2.00 47.72 -122.32
2 180000.00       1.00    1.00 47.74 -122.23
3 604000.00       3.00    1.00 47.52 -122.39
4 510000.00       2.00    1.00 47.62 -122.05


Unnamed: 0,id,date,price,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,condition,grade,sqft_above,sqft_basement,yr_built,yr_renovated,zipcode,lat,long,sqft_living15,sqft_lot15,price_level,sqft_living_size,querry,year,year_week
0,7129300520,2014-10-13,221900.0,3,1.0,1180,5650,1.0,0,0,3,7,1180,0,1955,0,98178,47.51,-122.26,1340,5650,0,0,"47.5112,-122.257",2014,2014-41
1,6414100192,2014-12-09,538000.0,3,2.25,2570,7242,2.0,0,0,3,7,2170,400,1951,1991,98125,47.72,-122.32,1690,7639,2,3,"47.721,-122.319",2014,2014-49
2,5631500400,2015-02-25,180000.0,2,1.0,770,10000,1.0,0,0,3,6,770,0,1933,0,98028,47.74,-122.23,2720,8062,0,0,"47.7379,-122.233",2015,2015-08
3,2487200875,2014-12-09,604000.0,4,3.0,1960,5000,1.0,0,0,5,7,1050,910,1965,0,98136,47.52,-122.39,1360,5000,2,2,"47.5208,-122.393",2014,2014-49
4,1954400510,2015-02-18,510000.0,3,2.0,1680,8080,1.0,0,0,3,8,1680,0,1987,0,98074,47.62,-122.05,1800,7503,2,1,"47.6168,-122.045",2015,2015-07
