In [31]:
# necessary imports
import numpy as np
import os
import seaborn as sns
import scipy.stats as stat
import pandas as pd
import matplotlib.pyplot as plt
from pydataset import data
import warnings
warnings.filterwarnings("ignore")
import wrangle as wra
import env
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import RobustScaler

In [13]:
zillow_query = """
        select bedroomcnt, bathroomcnt, calculatedfinishedsquarefeet, taxvaluedollarcnt, yearbuilt,
        taxamount, fips
        from properties_2017
        where propertylandusetypeid = '261';
        """

zillow_df = wra.get_zillow_data(zillow_query)

zillow_df.head()

Unnamed: 0.1,Unnamed: 0,bedroomcnt,bathroomcnt,calculatedfinishedsquarefeet,taxvaluedollarcnt,yearbuilt,taxamount,fips
0,0,0.0,0.0,,27516.0,,,6037.0
1,1,0.0,0.0,,10.0,,,6037.0
2,2,0.0,0.0,,10.0,,,6037.0
3,3,0.0,0.0,,2108.0,,174.21,6037.0
4,4,4.0,2.0,3633.0,296425.0,2005.0,6941.39,6037.0


In [14]:
df = zillow_df

In [15]:
df = wra.wrangle_zillow(df)

In [27]:
df.head()

Unnamed: 0,bedroomcnt,bathroomcnt,squarefeet,taxvalue,yearbuilt,taxamount,county
4,4,2.0,3633,296425,2005,6941.39,LA
6,3,4.0,1620,847770,2011,10244.94,LA
7,3,2.0,2077,646760,1926,7924.68,LA
11,0,0.0,1200,5328,1972,91.6,LA
14,0,0.0,171,6920,1973,255.17,LA


In [17]:
train, validate, test = wra.split_data(df)

In [18]:
train.head()

Unnamed: 0,bedroomcnt,bathroomcnt,squarefeet,taxvalue,yearbuilt,taxamount,county
1571798,4,2.5,2987,885000,2006,13617.38,Orange
1723553,3,2.0,3013,489511,1977,5768.57,LA
718174,4,3.0,2572,390915,1986,4944.11,LA
1663205,3,1.0,1589,321758,1959,4031.78,LA
1087891,3,2.0,1727,231737,1962,2708.8,Ventura


In [19]:
train.describe()

Unnamed: 0,bedroomcnt,bathroomcnt,squarefeet,taxvalue,yearbuilt,taxamount
count,1219902.0,1219902.0,1219902.0,1219902.0,1219902.0,1219902.0
mean,3.258273,2.141407,1751.959,363532.1,1960.259,4509.706
std,0.8974253,0.8354628,726.0309,243295.8,21.55127,2845.417
min,0.0,0.0,1.0,22.0,1801.0,13.36
25%,3.0,2.0,1242.0,181850.0,1949.0,2458.92
50%,3.0,2.0,1587.0,313004.0,1958.0,3941.86
75%,4.0,3.0,2108.0,492316.8,1974.0,5943.167
max,16.0,20.0,22800.0,1202999.0,2016.0,100379.3


In [20]:
X_train, X_validate = train[['bedroomcnt', 'bathroomcnt', 'squarefeet', 'yearbuilt', 'taxamount']], validate[['bedroomcnt', 'bathroomcnt', 'squarefeet', 'yearbuilt', 'taxamount']]

y_train, y_validate = train.taxvalue, validate.taxvalue

In [32]:
scaler = MinMaxScaler()
train[['bedroomcnt_minmax', 'bathroomcnt_minmax', 'squarefeet_minmax', 'yearbuilt_minmax', 'taxamount_minmax']] = scaler.fit_transform(train[['bedroomcnt', 'bathroomcnt', 'squarefeet', 'yearbuilt', 'taxamount']])


In [33]:
scaler = StandardScaler()
train[['bedroomcnt_standard', 'bathroomcnt_standard', 'squarefeet_standard', 'yearbuilt_standard', 'taxamount_standard']] = scaler.fit_transform(train[['bedroomcnt', 'bathroomcnt', 'squarefeet', 'yearbuilt', 'taxamount']])


In [34]:
scaler = RobustScaler()
train[['bedroomcnt_robust', 'bathroomcnt_robust', 'squarefeet_robust', 'yearbuilt_robust', 'taxamount_robust']] = scaler.fit_transform(train[['bedroomcnt', 'bathroomcnt', 'squarefeet', 'yearbuilt', 'taxamount']])


In [37]:
train[sorted(train)]

Unnamed: 0,bathroomcnt,bathroomcnt_minmax,bathroomcnt_robust,bathroomcnt_standard,bedroomcnt,bedroomcnt_minmax,bedroomcnt_robust,bedroomcnt_standard,county,squarefeet,...,squarefeet_standard,taxamount,taxamount_minmax,taxamount_robust,taxamount_standard,taxvalue,yearbuilt,yearbuilt_minmax,yearbuilt_robust,yearbuilt_standard
1571798,0.125,0.125,0.5,0.429214,0.2500,0.2500,1.0,0.826506,Orange,0.130971,...,1.701087,0.135544,0.135544,2.776932,3.200824,885000,0.953488,0.953488,1.92,2.122405
1723553,0.100,0.100,0.0,-0.169257,0.1875,0.1875,0.0,-0.287794,LA,0.132111,...,1.736898,0.057342,0.057342,0.524277,0.442418,489511,0.818605,0.818605,0.76,0.776777
718174,0.150,0.150,1.0,1.027685,0.2500,0.2500,1.0,0.826506,LA,0.112768,...,1.129485,0.049128,0.049128,0.287652,0.152668,390915,0.860465,0.860465,1.12,1.194385
1663205,0.050,0.050,-1.0,-1.366198,0.1875,0.1875,0.0,-0.287794,LA,0.069652,...,-0.224452,0.040038,0.040038,0.025808,-0.167964,321758,0.734884,0.734884,0.04,-0.058441
1087891,0.100,0.100,0.0,-0.169257,0.1875,0.1875,0.0,-0.287794,Ventura,0.075705,...,-0.034377,0.026856,0.026856,-0.353896,-0.632915,231737,0.748837,0.748837,0.16,0.080762
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
531544,0.100,0.100,0.0,-0.169257,0.1875,0.1875,0.0,-0.287794,LA,0.070573,...,-0.195528,0.029777,0.029777,-0.269757,-0.529886,226464,0.851163,0.851163,1.04,1.101584
424806,0.050,0.050,-1.0,-1.366198,0.1250,0.1250,-1.0,-1.402093,LA,0.039432,...,-1.173448,0.032985,0.032985,-0.177352,-0.416736,227271,0.637209,0.637209,-0.80,-1.032862
518081,0.150,0.150,1.0,1.027685,0.1875,0.1875,0.0,-0.287794,LA,0.080705,...,0.122641,0.017115,0.017115,-0.634487,-0.976503,125533,0.716279,0.716279,-0.12,-0.244045
669341,0.100,0.100,0.0,-0.169257,0.1875,0.1875,0.0,-0.287794,LA,0.061626,...,-0.476508,0.045299,0.045299,0.177370,0.017626,364412,0.720930,0.720930,-0.08,-0.197644


In [None]:
plt.bar(train.bathroomcnt, train.taxvalue);