In [1]:
import BHU

import os
import numpy as np
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
import keys as k

keys = k.getKeys()

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'

2023-02-28 18:21:37.219761: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [4]:
user_home = BHU.get_UserHome(keys['SampleHouse'].strip("\'"))
user_home_details = BHU.get_PropertyDetail(str(user_home['property_id']))
hoi = BHU.get_HousesOfInterest(user_home, n=2000, listed_to_sold_ratio=0.3, verbose=True)
gd = BHU.GeoData(hoi['geo'])

Shortfall in listed houses detected, appending 114 of current listing to results.


In [5]:
# Can this be the first step in the pipeline or no?
fg = BHU.FeatureGenerator(
    houses = hoi['houses'],
    gd=gd,
    user_home=user_home_details
)

In [6]:
# This is the whole data set
target_transformer = StandardScaler()
train_targets = target_transformer.fit_transform(np.array(fg.targets).reshape(-1,1))
train_features = fg.features

In [7]:
#minmax_cols = ['distance_to_home'] # 'Days_listed', 'Days_listed', 

normalize_cols = ['lot_sqft', 'sqft']
bucketize_cols = ['year_built', 'distance_to_home', 'lat', 'long']
dummy_cols = ['baths_full', 'baths_3qtr', 'baths_half', 'baths_1qtr', 'garage', 'stories', 'beds']

preprocess_data = ColumnTransformer(
    [
        #('scale', preprocess_min_max_cols, minmax_cols),
        ('normalize', StandardScaler(), normalize_cols),
        ('bucketize', BHU.KerasTransformers.preprocess_bucketize_col, bucketize_cols),
        ('dummy', OneHotEncoder(sparse_output=False, handle_unknown='ignore'), dummy_cols),
        #('list', preprocess_tags_col, 'tags')
    ]
)

keras_pipeline = Pipeline(
    [
        ('to_data_frame', BHU.KerasTransformers.ToDataFrame()),
        ('preprocess', preprocess_data),
        ('keras_model', BHU.KerasModel(user_home, target_transformer))
    ]
)

keras_pipeline.set_params(**{
    'keras_model__load_model_if_available' : True,
    'keras_model__update_model' : False,
    'keras_model__save_model' : False
})

keras_pipeline.fit(train_features, train_targets)

In [10]:
user_pred = keras_pipeline.predict(fg.user_features)
user_pred[0][0], fg.user_target



(955466.5, 972500)

In [11]:
fg.user_home_formatted.address

'3416 19th Ave S'

In [12]:
xx = fg.user_features.copy()
xx['baths_half'] += 1
new_worth = keras_pipeline.predict(xx)
new_worth



array([[981661.1]], dtype=float32)

In [13]:
fg.user_features

{'Property_ID': 2949757771,
 'Address': None,
 'Status': 'sold',
 'Days_listed': 0,
 'Days_updated': 0,
 'baths_full': 2,
 'baths_3qtr': 0,
 'baths_half': 0,
 'baths_1qtr': 0,
 'year_built': 1915,
 'lot_sqft': 4414,
 'sqft': 2500,
 'garage': 1,
 'stories': 2,
 'beds': 3,
 'tags': ['city_view',
  'community_security_features',
  'dining_room',
  'dishwasher',
  'fireplace',
  'hardwood_floors',
  'hill_or_mountain_view',
  'lake_view',
  'ocean_view',
  'spa_or_hot_tub',
  'view',
  'washer_dryer',
  'water_view',
  'basement',
  'garage_1_or_more',
  'fruit_trees',
  'tennis_court',
  'tennis',
  'groundscare',
  'garage_1'],
 'new_construction': False,
 'distance_to_home': 0,
 'lat': 47.572613,
 'long': -122.306358}

In [14]:
worth_change_ratio = new_worth / user_pred
worth_change_ratio

array([[1.0274155]], dtype=float32)

In [15]:
new_worth_to_user = worth_change_ratio * fg.user_target
new_worth_to_user

array([[999161.58735752]])

In [16]:
worth_delta = new_worth_to_user - fg.user_target
worth_delta

array([[26661.58735752]])

In [17]:
'year_built' in fg.user_features.keys()

True