In [None]:
%reset -f 

In [None]:
# Bagging Regressor Class

In [None]:
# Importing necessary libraries

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import BaggingRegressor
from sklearn.tree import export_graphviz
import pydot
from IPython.display import Image, display
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
# Definition of Bagging Regressor Class with functions

class baggingreg:
    
    # Dunder method init
    def __init__(self, x_train, x_test, y_train, y_test):
        self.x_train = x_train
        self.x_test = x_test
        self.y_train = y_train
        self.y_test = y_test
    
    # Dunder method del
    def __del__(self):
        print("The Scikit Learn Bagging Regressor has been deleted")
        
    # Initialize and Train Regression Model
    def buildregmodel(self,x_train, y_train, no_of_estimators, max_features, max_samples, random_state, oob_score):
        self.x_train = x_train
        self.y_train = y_train
        self.no_of_estimators = no_of_estimators
        self.max_features = max_features
        self.max_samples = max_samples
        self.random_state = random_state
        self.oob_score = oob_score
        
        regmodel = BaggingRegressor(n_estimators=no_of_estimators, max_features = max_features, max_samples = max_samples, random_state = random_state, oob_score = oob_score)      
        regmodel.fit(x_train,y_train)
        
        return regmodel
    
    # Evaluate and test the model of the Scikit Learn Bagging Regressor
    def testregmodel(self, x_test, y_test, regmodel):
        self.x_test = x_test
        self.y_test = y_test
        self.regmodel = regmodel
     
        score=regmodel.score(x_test,y_test)
        return score
    
    # Plot the tree graph(s)  
    def get_tree_graph(self, regmodel, x):
        self.x=x
        self.regmodel=regmodel
        feature_list = list(x.columns)
        tree_counter = 0
        for tree_in_forest in regmodel.estimators_:
            file_name1 = 'tree_' + str(tree_counter) + '.dot'
            file_name2 = 'tree_' + str(tree_counter) + '.png'
            tree = regmodel.estimators_[tree_counter]
            export_graphviz(tree, out_file = file_name1, feature_names = feature_list, rounded = True, precision = 1)
            (graph, ) = pydot.graph_from_dot_file(file_name1)
            graph.write_png(file_name2)
            plt = Image(file_name2)
            display(plt)
            tree_counter += 1
     
    # Plot the price difference between the predicted and the real prices of the training set
    def get_oob_prediction_graph(self,regmodel,x):
        self.x=x
        self.regmodel=regmodel
        # Set the style
        plt.style.use('fivethirtyeight')
        # list of x locations for plotting
        prediction = list(regmodel.oob_prediction_)
        difference = y_train - prediction
        x_values = list(range(len(difference)))
        # Make a bar chart
        plt.bar(x_values, difference, orientation = 'vertical')
        # Axis labels and title
        plt.ylabel('Price Difference'); plt.xlabel('Number'); plt.title('Price Difference Predicted/Real');
    
    
    # Get the oob_score
    def get_oob_score(self, regmodel):
        self.regmodel = regmodel
        score = regmodel.oob_score_
        
        return score
    
    
    # Predict new results
    def predictregmodel(self,regmodel,x_predict):
        self.regmodel = regmodel
        self.x_predict = x_predict
        
        y_predict=regmodel.predict(x_predict)
        return y_predict
    


In [None]:
# Read Data Set and Split into X_Train, Y_Train, X_Test and Y_Test

# Read Data Set
df=pd.read_excel('Real estate valuation data set.xlsx')

# Define X and Y Data
x=df.drop(columns=['Y house price of unit area', 'No']) # inputs
y=df['Y house price of unit area']                # outputs

# Splitting the data (20% recommended)
x_train, x_test, y_train, y_test = train_test_split(x,y, test_size=0.2)

In [None]:
# Define Settings for Regressor

#Criteria for the Bagging Regressor Model:
no_of_estimators = 10 # integer value: number of trees in the forest
max_features = 1.0 # The number of features to draw from X to train each base estimator 
max_samples = 1.0 # The number of samples to draw from X to train each base estimator
random_state = 10 # To randomly resample the original dataset
oob_score = True # Use out-of-bag samples to estimate the generalization error.

In [None]:
# Initialize the Regression Model

regressionmodel = baggingreg(x_train, x_test, y_train, y_test) # creation of the object
regmodel = regressionmodel.buildregmodel(x_train, y_train, no_of_estimators, max_features, max_samples, random_state, oob_score) # creation of the model

In [None]:
# Get Model Quality Information

quality = regressionmodel.testregmodel(x_test, y_test, regmodel) # measure of the quality 
print (str(quality))

In [None]:
# Plotting the Model

regressionmodel.get_tree_graph(regmodel,x)

In [None]:
# Plotting price differences between predicted and real prices of the training set
regressionmodel.get_oob_prediction_graph(regmodel,x)


In [None]:
# The OOB_Score (An estimation for the quaility of the Regressor compared to the best constant predictor)
# Score < 0 = Worse, Score > 0 = Better (So the higher the better)
oob_score = regressionmodel.get_oob_score(regmodel)
print(str(oob_score))

In [None]:
# Predict Values
x_predict = [[2017, 25, 60, 20, 20, 100]]
predicted_Y = regressionmodel.predictregmodel(regmodel, x_predict)
print(predicted_Y)

In [None]:
# Get Bagging Parameters
parameters = regmodel.get_params()
print(parameters)