In [1]:
from sklearn.datasets import load_boston
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
import pandas as pd
import numpy as np

In [4]:
# Gather Data
boston_dataset = load_boston()
data = pd.DataFrame(data = boston_dataset.data, columns = boston_dataset.feature_names)
features = data.drop(["INDUS", "AGE"], axis = 1)

# Log prices
log_prices = np.log(boston_dataset.target)
# Covert to 2D array.
target = pd.DataFrame(log_prices, columns = ["PRICE"])

In [11]:
# Create property_stats varible to store
CRIME_IDX = 0
ZN_IDX = 1
CHAS_IDX = 2
RM_IDX = 4
PTRATIO_IDX = 8

# property_stats = np.ndarray(shape = (1, 11))
# property_stats[0][CRIME_IDX] = features["CRIM"].mean()
# property_stats[0][ZN_IDX] = features["ZN"].mean()
# property_stats[0][CHAS_IDX] = features["CHAS"].mean()

# Fill the default value using the mean value of every feature, reshape the pandas.series to ndarray.
property_stats = features.mean().values.reshape(1, 11)

In [26]:
regr = LinearRegression().fit(features, target)
fitted_values = regr.predict(features)
#print(fitted_values.shape)
MSE = mean_squared_error(target, fitted_values)
RMSE = np.sqrt(MSE)

In [33]:
def get_log_estimate(room_number, pupil_teacher_ratio, beside_river = False, high_confidence = True):
    # Configure property values
    property_stats[0][RM_IDX] = room_number
    property_stats[0][PTRATIO_IDX] = pupil_teacher_ratio
    if beside_river:
        property_stats[0][CHAS_IDX] = 1
    else:
        property_stats[0][CHAS_IDX] = 0
    # Predict
    log_estimate = regr.predict(property_stats)[0][0]
    # Calculate the range
    if high_confidence:
        # 2 sigma
        upper_bound = log_estimate + 2*RMSE
        lower_bound = log_estimate - 2*RMSE
        interval = 95
    else:
        # 1 sigma
        upper_bound = log_estimate + RMSE
        lower_bound = log_estimate - RMSE
        interval = 68
    return log_estimate, upper_bound, lower_bound, interval

In [35]:
get_log_estimate(3, 20, True, False)

(2.776758191480399, 2.9642703266775294, 2.589246056283269, 68)