In [None]:
!pip install mlxtend

In [1]:
# Importing necessary packages
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.metrics import accuracy_score, roc_auc_score, make_scorer,mean_squared_error
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import GradientBoostingRegressor,BaggingRegressor
from sklearn.linear_model import LinearRegression
import math
from mlxtend.regressor import StackingRegressor

ModuleNotFoundError: No module named 'mlxtend'

In [None]:
#Loading the house prices dataset for the mini-challenge
df = pd.read_csv('../data/house_prices.csv')
df.head()

# Random Forest Solutions

<img src="../images/icon/ppt-icons.png" alt="Concept-Alert" style="width: 100px;float:left; margin-right:15px"/>
<br /> 

##  Mini-Challenge - 1
LabelEncode the categorical features.

In [None]:
categorical_features = list(set(df.columns)-set(df._get_numeric_data().columns))
label_enc = LabelEncoder()
for feature in categorical_features:
    df[feature] = label_enc.fit_transform(df[feature].astype(str))
categorical_features = list(set(df.columns)-set(df._get_numeric_data().columns))

<img src="../images/icon/ppt-icons.png" alt="Concept-Alert" style="width: 100px;float:left; margin-right:15px"/>
<br /> 

##  Mini-Challenge - 2
Split the features and target variable and then split the data into train and test.
The 'SalePrice' feature is the target variable.

In [None]:
X = df.drop('SalePrice',axis=1)
y = df['SalePrice']
X_train,X_test,y_train,y_test = train_test_split(X,y)

<img src="../images/icon/ppt-icons.png" alt="Concept-Alert" style="width: 100px;float:left; margin-right:15px"/>
<br /> 

##  Mini-Challenge - 3
Fit a RandomForest Regressor and tune the parameters using GridSearchCV.
Search for the best set of features amongst the following list of features and their value.
'n_estimators': [10,20,30],
'max_depth': [6,8],
'min_samples_split': [10,15],

In [None]:
#RandomForestRegressor tuned using GridSearchCV
regressor = RandomForestRegressor(random_state=9)
params = {
            'n_estimators': [10,20,30],
            'max_depth': [6,8],
            'min_samples_split': [10,15]
             }

search = GridSearchCV(regressor,param_grid=params,cv=3)
search.fit(X_train,y_train)

<img src="../images/icon/ppt-icons.png" alt="Concept-Alert" style="width: 100px;float:left; margin-right:15px"/>
<br /> 

##  Mini-Challenge - 4
Predict on the test data and calculate the mean-squared-error of your model.

In [None]:
y_pred = search.predict(X_test)
mean_squared_error(y_test,y_pred)

# Ensemble Methods Solutions

In [None]:
#Loading the house prices dataset for the mini-challenge
df = pd.read_csv('../data/house_prices.csv')
df.head()

<img src="../images/icon/ppt-icons.png" alt="Concept-Alert" style="width: 100px;float:left; margin-right:15px"/>
<br /> 

##  Mini-Challenge - 1
LabelEncode the categorical features and split the dataset into features and target variable('SalePrice') then split it into train and test.

In [None]:
categorical_features = list(set(df.columns)-set(df._get_numeric_data().columns))
label_enc = LabelEncoder()
for feature in categorical_features:
    df[feature] = label_enc.fit_transform(df[feature].astype(str))
categorical_features = list(set(df.columns)-set(df._get_numeric_data().columns))
X = df.drop('SalePrice',axis=1)
y = df['SalePrice']
X_train,X_test,y_train,y_test = train_test_split(X,y)

<img src="../images/icon/ppt-icons.png" alt="Concept-Alert" style="width: 100px;float:left; margin-right:15px"/>
<br /> 

##  Mini-Challenge - 2
Fit a GradientBoosting Regressor on the above model and predict on the test data and calculate the mean-squared-error.

In [None]:
gb_regressor = GradientBoostingRegressor()
gb_regressor.fit(X_train,y_train)
y_pred_gb = gb_regressor.predict(X_test)
mean_squared_error(y_test,y_pred_gb)

<img src="../images/icon/ppt-icons.png" alt="Concept-Alert" style="width: 100px;float:left; margin-right:15px"/>
<br /> 

##  Mini-Challenge - 3
Fit a Bagging  model on the above data and predict on the test data and calculate the mean-squared-error.

In [None]:
bag_regressor = BaggingRegressor()
bag_regressor.fit(X_train,y_train)
y_pred_bag = bag_regressor.predict(X_test)
mean_squared_error(y_test,y_pred_bag)

<img src="../images/icon/ppt-icons.png" alt="Concept-Alert" style="width: 100px;float:left; margin-right:15px"/>
<br /> 

##  Mini-Challenge - 4
Stack the above models and predict on the test data.

In [None]:
lin_reg = LinearRegression()
regression_models = [bag_regressor,gb_regressor]
stacking_regressor = StackingRegressor(regressors=regression_models,meta_regressor=lin_reg)
stacking_regressor.fit(X_train, y_train)
y_pred_stack = stacking_regressor.predict(X_test)
mean_squared_error(y_test,y_pred_stack)