In [1]:
import pandas as pd 
import numpy as np 

from sklearn.datasets import load_boston
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error



In [2]:
boston_dataset = load_boston()
data = pd.DataFrame(data=boston_dataset.data, columns=boston_dataset.feature_names)

features = data.drop(['INDUS', 'AGE'], axis=1)
log_prices = np.log(boston_dataset.target)
target = pd.DataFrame(log_prices, columns=['PRICE'])



In [3]:
CRIME_IDX = 0
ZN_IDX = 1
CHAS_IDX = 2
RM_IDX = 4
PT_RATIO = 8

property_stats = np.ndarray(shape= (1, 11))
property_stats[0][CRIME_IDX] = features['CRIM'].mean()
property_stats[0][ZN_IDX] = features['ZN'].mean()
property_stats[0][CHAS_IDX] = features['CHAS'].mean()
property_stats = features.mean().values.reshape(1,11)


In [4]:
features.mean().values.reshape(1,11)

array([[3.61352356e+00, 1.13636364e+01, 6.91699605e-02, 5.54695059e-01,
        6.28463439e+00, 3.79504269e+00, 9.54940711e+00, 4.08237154e+02,
        1.84555336e+01, 3.56674032e+02, 1.26530632e+01]])

In [5]:
regr = LinearRegression().fit(features,target)
fitted_vals = regr.predict(features)

MSE = mean_squared_error(target, fitted_vals)
RSME = np.sqrt(MSE)


In [6]:
def get_log_price(nr_rooms, 
                  students_per_class,
                 next_to_river=False, 
                 high_confidence=True):
    
    # Configure property
    property_stats[0][RM_IDX] = nr_rooms
    property_stats[0][PT_RATIO] = students_per_class
    
    # make a prediction 
    log_estimate = regr.predict(property_stats)[0][0]
    
    if next_to_river:
        property_stats[0][CHAS_IDX] = 1
    else:
        property_stats[0][CHAS_IDX] = 0
        
    # calc range
    if high_confidence:
        lower_bound = log_estimate - 2*RSME
        upper_bound = log_estimate + 2*RSME
        interval = 95
    else: 
        lower_bound = log_estimate - RSME
        upper_bound = log_estimate + RSME
        interval = 68
        
    
    return log_estimate, upper_bound, lower_bound, interval

In [7]:
get_log_price(10, 4)

(3.9124068278430126, 4.287431098237273, 3.537382557448752, 95)

In [41]:
np.median(boston_dataset.target)
zillow_median_price = 583.3
scale_factor = zillow_median_price / np.median(boston_dataset.target)

log_est, upper, lower, conf = get_log_price(9, 
                                           students_per_class=15,
                                           next_to_river=False, 
                                           high_confidence=False)



dollar_estimate = np.e**log_est*1000*scale_factor
dollar_hi = np.e**upper*1000*scale_factor
dollar_lo = np.e**lower*1000*scale_factor

# round the estimate to the next thousands

rounded_dollar_value = round(dollar_estimate, -3)
rounded_upper_value = round(dollar_hi, -3)
rounded_lower_value = round(dollar_lo, -3)

# rounded_value
# rounded_lower_value
# rounded_upper_value

print('The estimate dollar price:', rounded_dollar_value)
print(f'with a confidence of {conf}: the range is as below')
print(f'{rounded_lower_value}, to {rounded_upper_value}')



The estimate dollar price: 827000.0
with a confidence of 68: the range is as below
685000.0, to 997000.0


In [64]:
def get_dollar_estimate(rm, ptratio, chas=False, large_range = False):
    """
    Estimate price for a property in Boston
    
    Keywords and Arguments:
    rm -- number of rooms in the property
    ptratio -- student/ Teacher ration
    chas -- True is the property is close to chas river, unless False 
    large_range = True for 95% prediction interval, False for a 68 prediction interval


    """
    
    log_est, upper, lower, conf = get_log_price(rm, 
                                           students_per_class=ptratio,
                                           next_to_river=chas, 
                                           high_confidence=large_range)
    if rm <1 or ptratio <1:
        print('Thats Unrealstic try again!')
        return



    dollar_estimate = np.e**log_est*1000*scale_factor
    dollar_hi = np.e**upper*1000*scale_factor
    dollar_lo = np.e**lower*1000*scale_factor

    # round the estimate to the next thousands

    rounded_dollar_value = round(dollar_estimate, -3)
    rounded_upper_value = round(dollar_hi, -3)
    rounded_lower_value = round(dollar_lo, -3)

    # rounded_value
    # rounded_lower_value
    # rounded_upper_value

    print('The estimate dollar price:', rounded_dollar_value)
    print(f'with a confidence of {conf}: the range is as below')
    print(f'{rounded_lower_value}, to {rounded_upper_value}')

    
    
   

    

In [61]:
 get_dollar_estimate(rm=1110, ptratio=0, chas=True)

Thats Unrealstic try again!


In [65]:
import boston_valueation as val

val.get_log_price(5, 12, )

The estimate dollar price: 833000.0
with a confidence of 68: the range is as below
690000.0, to 1005000.0


(3.15236254102975, 3.5273868114240106, 2.7773382706354894, 95)