In [1]:
import pandas as pd 
import numpy as np

from sklearn.datasets import load_boston
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

In [2]:
# Gather data
boston_data = load_boston()
data = pd.DataFrame(data = boston_data.data, columns = boston_data.feature_names)

features = data.drop(["INDUS","AGE"],axis = 1)

log_prices = np.log(boston_data.target)
target = pd.DataFrame(log_prices,columns = ["PRICE"])
features.head()

Unnamed: 0,CRIM,ZN,CHAS,NOX,RM,DIS,RAD,TAX,PTRATIO,B,LSTAT
0,0.00632,18.0,0.0,0.538,6.575,4.09,1.0,296.0,15.3,396.9,4.98
1,0.02731,0.0,0.0,0.469,6.421,4.9671,2.0,242.0,17.8,396.9,9.14
2,0.02729,0.0,0.0,0.469,7.185,4.9671,2.0,242.0,17.8,392.83,4.03
3,0.03237,0.0,0.0,0.458,6.998,6.0622,3.0,222.0,18.7,394.63,2.94
4,0.06905,0.0,0.0,0.458,7.147,6.0622,3.0,222.0,18.7,396.9,5.33


In [3]:
# Run Linearregression model

reg = LinearRegression().fit(features,target)
fitted_values = reg.predict(features)

# lets calculate the MSE and RMSE
MSE = mean_squared_error(target,fitted_values)
RMSE = np.sqrt(MSE)
MSE,RMSE


(0.03516080084618688, 0.18751213519713034)

In [4]:
features.head(2)

Unnamed: 0,CRIM,ZN,CHAS,NOX,RM,DIS,RAD,TAX,PTRATIO,B,LSTAT
0,0.00632,18.0,0.0,0.538,6.575,4.09,1.0,296.0,15.3,396.9,4.98
1,0.02731,0.0,0.0,0.469,6.421,4.9671,2.0,242.0,17.8,396.9,9.14


In [5]:
crime_idx = 0
zn_idx = 1
chas_idx = 2
room_idx = 4
ptratio_idx = 8


proparty_state = np.ndarray(shape=(1,11))
proparty_state = features.mean().values.reshape(1,11)
proparty_state

array([[3.61352356e+00, 1.13636364e+01, 6.91699605e-02, 5.54695059e-01,
        6.28463439e+00, 3.79504269e+00, 9.54940711e+00, 4.08237154e+02,
        1.84555336e+01, 3.56674032e+02, 1.26530632e+01]])

In [6]:
def get_log_estimate(nr_rooms, student_per_classroom, next_to_river = False, high_confidance = True):
    
    # Configure proparty
    
    proparty_state[0][room_idx] = nr_rooms
    proparty_state[0][ptratio_idx] = student_per_classroom
    
    if next_to_river is True:
        proparty_state[0][chas_idx] = 1
    else:
        proparty_state[0][chas_idx] = 0
    
    # Estimating price 
    
    log_estimate = reg.predict(proparty_state)[0][0]
    
    # calculate Range 
    
    if high_confidance:
        upper_bound = log_estimate + 2*RMSE
        lower_bound = log_estimate - 2*RMSE
        interval = 95
    else:
        upper_bound = log_estimate + RMSE
        lower_bound = log_estimate - RMSE
        interval = 68
    
    return log_estimate , upper_bound, lower_bound , interval
    

In [7]:
get_log_estimate(5,35,True,True)

(2.396715247065015, 2.7717395174592756, 2.0216909766707545, 95)

In [30]:
# Lets change this log price to todays price

ZELLO_MEDIAN_PRICE = 583.3
SCALE_FACTOR = ZELLO_MEDIAN_PRICE/ np.median(boston_data.target)
SCALE_FACTOR


def get_doller_estimate(rm, ptratio, river = False, high_diff = True):
    """Estimate the Current price of a proparty in Boston.
    
    key arguments :
    
    rm :  Number of Room per house.
    ptratio : Number of stident per classroom.
    river : House that are next to the Charles River.
    high_deff : The confidentiality of price Interval in normal distribution
    
    """
    
    
    if rm < 1 or ptratio < 1:
        print("INPUT is Unrealistic . Please Try Again.")
        return
    
    log_est, upper, lower, intv = get_log_estimate(rm, ptratio,next_to_river=river,high_confidance = high_diff)
    
    #revarting log in to real price 
    doller_est = np.e**log_est * SCALE_FACTOR * 1000
    doller_high = np.e**upper *SCALE_FACTOR * 1000
    doller_low = np.e**lower * SCALE_FACTOR * 1000
    
    #round the price 
    round_est = np.around(doller_est,-3)
    round_high = np.around(doller_high,-3)
    round_low = np.around(doller_low,-3)
    
    print(f"House Price at Current Market Value is {round_est}")
    print(f"Higher Bound at Current Market Value is {round_high}")
    print(f"Lower Bound at Current Market Value is {round_low}")
    
    

In [35]:
get_doller_estimate(10,100,False,True)

House Price at Current Market Value is 38000.0
Higher Bound at Current Market Value is 55000.0
Lower Bound at Current Market Value is 26000.0


In [19]:
SCALE_FACTOR

27.514150943396224

In [37]:
import boston_valuation as val
val.get_doller_estimate(6,12,True,True)

House Price at Current Market Value is 783000.0
Higher Bound at Current Market Value is 1139000.0
Lower Bound at Current Market Value is 538000.0
