In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import ShuffleSplit

import visuals as vs

%matplotlib inline

data = pd.read_csv('housing.csv')
prices = data['MEDV']
features = data.drop('MEDV', axis = 1)
    

print("Boston housing dataset has {} data points with {} variables each.".format(*data.shape))

In [None]:
np.amin(prices)

In [None]:
minimum_price = np.amin(prices)


maximum_price = np.amax(prices)


mean_price = np.mean(prices)


median_price = np.median(prices)


std_price = np.std(prices)


print("Statistics for Boston housing dataset:\n")
print("Minimum price: ${}".format(minimum_price)) 
print("Maximum price: ${}".format(maximum_price))
print("Mean price: ${}".format(mean_price))
print("Median price ${}".format(median_price))
print("Standard deviation of prices: ${}".format(std_price))

**Developing a Model**

In [None]:
from sklearn.metrics import r2_score

def performance_metric(y_true, y_predict):
    """ Calculates and returns the performance score between 
        true and predicted values based on the metric chosen. """
    
    
    score = r2_score(y_true, y_predict)
    
    
    return score

In [None]:
from sklearn.model_selection import train_test_split


X_train, X_test, y_train, y_test = train_test_split(features, prices, test_size=0.2, random_state = 42)

print("Training and testing split was successful.")

In [None]:
vs.ModelLearning(features, prices)

In [None]:
vs.ModelComplexity(X_train, y_train)

In [None]:
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import make_scorer
from sklearn.model_selection import GridSearchCV

def fit_model(X, y):
    """ Performs grid search over the 'max_depth' parameter for a 
        decision tree regressor trained on the input data [X, y]. """
    
    
    cv_sets = ShuffleSplit(n_splits = 10, test_size = 0.20, random_state = 0)

    
    regressor = DecisionTreeRegressor()

    
    params = {'max_depth':[1,2,3,4,5,6,7,8,9,10]}

     
    scoring_fnc = make_scorer(performance_metric)

    
    grid = GridSearchCV(estimator=regressor, param_grid=params, scoring=scoring_fnc, cv=cv_sets)

    
    grid = grid.fit(X, y)

    
    return grid.best_estimator_

In [None]:
reg = fit_model(X_train, y_train)


print("Parameter 'max_depth' is {} for the optimal model.".format(reg.get_params()['max_depth']))

In [None]:
client_data = [[5, 17, 15], 
               [4, 32, 22], 
               [8, 3, 12]]  

for i, price in enumerate(reg.predict(client_data)):
    print("Predicted selling price for Client {}'s home: ${:,.2f}".format(i+1, price))

In [None]:
vs.PredictTrials(features, prices, fit_model, client_data)