# Imports

In [2]:
from sklearn.datasets import load_boston
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

import pandas as pd
import numpy as np

In [21]:
# Data

boston_dataset=load_boston()
data=pd.DataFrame(data=boston_dataset.data,columns=boston_dataset.feature_names)
features=data.drop(['INDUS','AGE'],axis=1)

log_prices=np.log(boston_dataset.target)
target=pd.DataFrame(data=log_prices,columns=['PRICE'])
features.head()

Unnamed: 0,CRIM,ZN,CHAS,NOX,RM,DIS,RAD,TAX,PTRATIO,B,LSTAT
0,0.00632,18.0,0.0,0.538,6.575,4.09,1.0,296.0,15.3,396.9,4.98
1,0.02731,0.0,0.0,0.469,6.421,4.9671,2.0,242.0,17.8,396.9,9.14
2,0.02729,0.0,0.0,0.469,7.185,4.9671,2.0,242.0,17.8,392.83,4.03
3,0.03237,0.0,0.0,0.458,6.998,6.0622,3.0,222.0,18.7,394.63,2.94
4,0.06905,0.0,0.0,0.458,7.147,6.0622,3.0,222.0,18.7,396.9,5.33


In [45]:
CRIM_IDX=0
ZN_IDX=1
CHAS_IDX=2
RM_IDX=4
PTRATIO_IDX=8
#etc

property_stats=np.ndarray(shape=(1,11))
property_stats[0][0]=48

property_stats=features.mean().values.reshape(1,11)
property_stats

array([[3.61352356e+00, 1.13636364e+01, 6.91699605e-02, 5.54695059e-01,
        6.28463439e+00, 3.79504269e+00, 9.54940711e+00, 4.08237154e+02,
        1.84555336e+01, 3.56674032e+02, 1.26530632e+01]])

In [22]:
#test start here,look for end

type(features.mean())

pandas.core.series.Series

In [23]:
type(features.mean().values)

numpy.ndarray

In [24]:
features.mean().values.shape

(11,)

In [25]:
#test end
features.mean().values.reshape(1,11)

array([[3.61352356e+00, 1.13636364e+01, 6.91699605e-02, 5.54695059e-01,
        6.28463439e+00, 3.79504269e+00, 9.54940711e+00, 4.08237154e+02,
        1.84555336e+01, 3.56674032e+02, 1.26530632e+01]])

In [30]:
regr=LinearRegression().fit(features,target)
fitted_vals=regr.predict(features)

mse=mean_squared_error(target,fitted_vals)
rmse=np.sqrt(mse)


In [51]:
def get_log_estimate(nr_rooms,
                    students_per_classroom,
                    next_to_river=False,
                    high_confidence=True):
    
    #Config props
    property_stats[0][RM_IDX]=nr_rooms
    property_stats[0][PTRATIO_IDX]=students_per_classroom
    
    property_stats[0][CHAS_IDX]=1 if next_to_river else 0
           
    log_estimate=regr.predict(property_stats)[0][0]
    
    #high confidence means 95%(2 sd), low means 68%(1 sd)
    if high_confidence:
        upper_bound = log_estimate + 2*rmse
        lower_bound = log_estimate - 2*rmse
        interval= 95
    else:
        upper_bound = log_estimate + rmse
        lower_bound = log_estimate - rmse
        interval= 68
    
    return log_estimate,upper_bound,lower_bound,interval

In [52]:
get_log_estimate(3,10,0)

(3.045868761493372, 3.4208930318876325, 2.6708444910991114, 95)

In [54]:
np.median(boston_dataset.target)

21.2

In [84]:
inflation=583.322/np.median(boston_dataset.target)
def get_todays_price(nr_rooms,
                    pt_ratio,
                    next_to_river=False,
                    high_confidence=True):
    """
    Estimate the price of property in Boston in 2017
    
    Keyword args:
    nr_rooms -- number of rooms in the property
    pt_ratio -- students to teacher ratio
    next_to_river -- True if property near Chase river
    high_confidence -- True if 95% predtion interval and False if 68% required 
    """
    
    if nr_rooms<1 or students_per_classroom<1:
        return ValueError
    
    #Config props
    property_stats[0][RM_IDX]=nr_rooms
    property_stats[0][PTRATIO_IDX]=pt_ratio
    
    property_stats[0][CHAS_IDX]=1 if next_to_river else 0
           
    log_estimate=(regr.predict(property_stats)[0][0])
    actual_price=1000*inflation*np.e**(log_estimate)
    #high confidence means 95%(2 sd), low means 68%(1 sd)
    if high_confidence:
        upper_bound = 1000*inflation*np.e**(log_estimate + 2*rmse)
        lower_bound = 1000*inflation*np.e**(log_estimate - 2*rmse)
        interval= 95
    else:
        upper_bound = 1000*inflation*np.e**(log_estimate + rmse)
        lower_bound = 1000*inflation*np.e**(log_estimate - rmse)
        interval= 68
    
    return round(actual_price,-3),round(upper_bound,-3),round(lower_bound,-3),interval

In [82]:
get_todays_price(1,1,0)

(676000.0, 984000.0, 465000.0, 95)