In [61]:
from sklearn.datasets import load_boston
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

import pandas as pd
import numpy as np
import boston_valuation as vals

In [4]:
#Gather Data
boston_dataset = load_boston()

# Take the data and put it in a DataFrame:
data = pd.DataFrame(data=boston_dataset.data, columns=boston_dataset.feature_names)

#Remove the feautures that we have determined are not useful for the model.
features = data.drop(['INDUS', 'AGE'], axis=1)

#Change target to log_prices as we determined it improves the model.
log_prices = np.log(boston_dataset.target)

# Change target into a DataFrame so that it can be 2d as the rest of the data is.
target = pd.DataFrame(log_prices, columns=['PRICE'])


In [5]:
features.head()

Unnamed: 0,CRIM,ZN,CHAS,NOX,RM,DIS,RAD,TAX,PTRATIO,B,LSTAT
0,0.00632,18.0,0.0,0.538,6.575,4.09,1.0,296.0,15.3,396.9,4.98
1,0.02731,0.0,0.0,0.469,6.421,4.9671,2.0,242.0,17.8,396.9,9.14
2,0.02729,0.0,0.0,0.469,7.185,4.9671,2.0,242.0,17.8,392.83,4.03
3,0.03237,0.0,0.0,0.458,6.998,6.0622,3.0,222.0,18.7,394.63,2.94
4,0.06905,0.0,0.0,0.458,7.147,6.0622,3.0,222.0,18.7,396.9,5.33


In [6]:
# Create an object that will hold values we need for making estimates. 
# The opject must be the same shape as the features.
# Some will be made default values so we have to populate it with mean values of the original features value.

CRIME_IDX = 0
ZN_IDX = 1
CHAS_IDX = 2
RM_IDX = 4
PTRATIO_IDX = 8

property_stats = features.mean().values.reshape(1, 11)

property_stats

array([[3.61352356e+00, 1.13636364e+01, 6.91699605e-02, 5.54695059e-01,
        6.28463439e+00, 3.79504269e+00, 9.54940711e+00, 4.08237154e+02,
        1.84555336e+01, 3.56674032e+02, 1.26530632e+01]])

In [7]:
#Calculate the estimated theta values using the regression model from sklearn.
regr = LinearRegression().fit(features, target) #Our theta values will be claculated here.
fitted_vals = regr.predict(features) # Calculating all the predicted values using thetas calculated on the fit module above.

# Calculate MSE using sklearn
MSE = mean_squared_error(target, fitted_vals)

# calculate RMSE using sklean
RMSE = mean_squared_error(target, fitted_vals, squared=False) # Alternative RMSE = np.sqrt(MSE)

In [28]:
def get_log_estimates(nr_rooms, 
                      student_per_class, 
                      next_to_river=False, 
                      high_confidence=True):
    # We will be using our property stats object we created ealier to configer it before it predicts.
    property_stats[0][RM_IDX]=nr_rooms
    property_stats[0][PTRATIO_IDX]=student_per_class
    
    if next_to_river:
        property_stats[0][CHAS_IDX]=1
    else:
        property_stats[0][CHAS_IDX]=0
        
    # We will now run the prediction.
    log_estimates = regr.predict(property_stats)[0][0]
    
    # Calculating Range
    if high_confidence:
        upper_bound = log_estimates + 2*RMSE
        lower_bound = log_estimates - 2*RMSE
        interval = 95
    else:
        upper_bound = log_estimates + RMSE
        lower_bound = log_estimates - RMSE
        interval = 68
    
    return log_estimates, upper_bound, lower_bound, interval

In [29]:
get_log_estimates(3, 20, next_to_river=True, high_confidence=True)

(2.7767581914803987, 3.1517824618746593, 2.401733921086138, 95)

In [30]:
np.median(boston_dataset.target)

21.2

In [35]:
ZILLOW_MEDIAN_PRICE = 583.8

SCALE_FACTOR = ZILLOW_MEDIAN_PRICE/np.median(boston_dataset.target)

log_est, upper, lower, conf = get_log_estimates(9, student_per_class=15, next_to_river=False, high_confidence=False)

# Converting values to current dollar prices
dollar_est = np.e**log_est * 1000 *SCALE_FACTOR
dollar_hi = np.e**upper*1000*SCALE_FACTOR
dollar_low = np.e**lower*1000*SCALE_FACTOR

#Rounding the prices

rounded_est = np.around(dollar_est, -3)
rounded_hi = np.around(dollar_hi, -3)
rounded_low = np.around(dollar_low, -3)

print(f'The esimate at the {conf}% prediction interval.')
print(f'The estimate price is ${rounded_est}.')
print(f'The lower range is ${rounded_low} and the upper range is ${rounded_hi}')


The esimate at the 68% prediction interval.
The estimate price is $827000.0.
The lower range is $686000.0 and the upper range is $998000.0


In [59]:
def get_estimate(rm, ptratio, chas=False, pred_inter=True):
    """
    Estimates the price for a Boston real estate property.
    
    rm-- number of rooms in the property
    ptratio-- the number of children per teacher in the classroom
    chas-- True if next to the river and False otherwise.
    pred_inter-- True for 95% prediction interval and False for 68% prediction interval
    
    """
    
    if rm < 1 or ptratio < 1:
        print('This is unrealistic, Try again')
        return
    
    log_est, upper, lower, conf = get_log_estimates(rm, 
                                                    student_per_class=ptratio, 
                                                    next_to_river=chas, 
                                                    high_confidence=pred_inter)

    # Converting values to current dollar prices
    dollar_est = np.e**log_est * 1000 *SCALE_FACTOR
    dollar_hi = np.e**upper*1000*SCALE_FACTOR
    dollar_low = np.e**lower*1000*SCALE_FACTOR

    #Rounding the prices

    rounded_est = np.around(dollar_est, -3)
    rounded_hi = np.around(dollar_hi, -3)
    rounded_low = np.around(dollar_low, -3)

    print(f'The esimate at the {conf}% prediction interval.')
    print(f'The estimate price is ${rounded_est}.')
    print(f'The lower range is ${rounded_low} and the upper range is ${rounded_hi}')

In [60]:
get_estimate(rm=3, ptratio=15, chas=True)

The esimate at the 95% prediction interval.
The estimate price is $533000.0.
The lower range is $367000.0 and the upper range is $776000.0


In [62]:
vals.get_estimate(rm=3, ptratio=15, chas=True)

The esimate at the 95% prediction interval.
The estimate price is $533000.0.
The lower range is $367000.0 and the upper range is $776000.0


In [2]:
import calendar
c=calendar.TextCalendar(calendar.FRIDAY)
c.prmonth(2021, 12)

   December 2021
Fr Sa Su Mo Tu We Th
                1  2
 3  4  5  6  7  8  9
10 11 12 13 14 15 16
17 18 19 20 21 22 23
24 25 26 27 28 29 30
31
