### <center><h1> Ensembling & Random Forest </h1></center>

In [2]:
# Importing necessary packages
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.metrics import accuracy_score, roc_auc_score, make_scorer,mean_squared_error,r2_score
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import GradientBoostingRegressor,BaggingRegressor
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
import math
from mlxtend.regressor import StackingRegressor
import warnings
warnings.filterwarnings("ignore")

In [3]:
#Loading the house prices dataset for the mini-challenge
df = pd.read_csv('graduate-admissions/Admission_Predict.csv')
df=df.rename(columns = {'Chance of Admit ':'Chance of Admit'})
df.head()

Unnamed: 0,Serial No.,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research,Chance of Admit
0,1,337,118,4,4.5,4.5,9.65,1,0.92
1,2,324,107,4,4.0,4.5,8.87,1,0.76
2,3,316,104,3,3.0,3.5,8.0,1,0.72
3,4,322,110,3,3.5,2.5,8.67,1,0.8
4,5,314,103,2,2.0,3.0,8.21,0,0.65


# Ensemble Methods

<img src="../images/icon/ppt-icons.png" alt="Concept-Alert" style="width: 100px;float:left; margin-right:15px"/>
<br /> 

##  Mini-Challenge - 1
***
### Instructions
* Split the dataset into features and target variable(`Chance of Admit`) then split it into train and test.

In [4]:
X = df.drop('Chance of Admit', axis =1)
y = df['Chance of Admit']

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X,y, random_state = 9)

<img src="../images/icon/ppt-icons.png" alt="Concept-Alert" style="width: 100px;float:left; margin-right:15px"/>
<br /> 

##  Mini-Challenge - 2
***
### Instructions
### Decision Tree
* Fit a Decision tree model on the above data and predict on the test data and calculate the r2 score

In [6]:
de = DecisionTreeRegressor()
de.fit(X_train, y_train)
y_pred = de.predict(X_test)
r2 = r2_score(y_test,y_pred)
print(r2)

0.7189996862426272


<img src="../images/icon/ppt-icons.png" alt="Concept-Alert" style="width: 100px;float:left; margin-right:15px"/>
<br /> 

##  Mini-Challenge - 3
***
### Instructions
### Bagging
* Fit a Bagging model(i.e resampling with replacement) on the above data and predict on the test data and calculate the r2 score.Pass the parameter `base_estimator= DecisionTreeRegressor()`, `n_estimators=20` , `max_samples=100` and `random_state=0`

In [7]:
br = BaggingRegressor(base_estimator = DecisionTreeRegressor(), n_estimators = 20, max_samples = 100, random_state = 0)
br.fit(X_train, y_train)
y_pred = br.predict(X_test)

In [8]:
r2_score(y_test, y_pred)

0.8505171816694654

<img src="../images/icon/ppt-icons.png" alt="Concept-Alert" style="width: 100px;float:left; margin-right:15px"/>
<br /> 

##  Mini-Challenge - 4
***
### Instructions
### Pasting
* Fit a Bagging model(i.e resampling without replacement) on the above data and predict on the test data and calculate the r2 score. Pass the parameter `base_estimator= DecisionTreeRegressor`, `n_estimators=100` , `max_samples=100`, `bootstrap=False` and `random_state=0`

In [9]:
br1 = BaggingRegressor(base_estimator = DecisionTreeRegressor(), n_estimators = 20, max_samples = 100, bootstrap = False, random_state = 0)
br1.fit(X_train, y_train)
y_pred = br1.predict(X_test)

In [10]:
r2_score(y_test, y_pred)

0.8569886741481685

<img src="../images/icon/ppt-icons.png" alt="Concept-Alert" style="width: 100px;float:left; margin-right:15px"/>
<br /> 

##  Mini-Challenge - 5
***
### Instructions
### Random Forest
* Fit a Random Forest regressor on the above data and predict on the test data and calculate the r2 score. Pass the parameter `random_state=9`

In [11]:
a = RandomForestRegressor(random_state = 9)
a.fit(X_train, y_train)
y_pred1 = a.predict(X_test)

In [12]:
r2 = r2_score(y_test, y_pred1)
r2

0.85763801871099

<img src="../images/icon/ppt-icons.png" alt="Concept-Alert" style="width: 100px;float:left; margin-right:15px"/>
<br /> 

##  Mini-Challenge - 6
***
### Instructions
### Random Forest tuned with GridSearchCV
* Fit a RandomForest Regressor with `random_state=9` and tune the parameters using GridSearchCV. Search for the best set of features amongst the following list of features and their value.`'n_estimators': [10,20,30],'max_depth': [6,8],'min_samples_split': [6,8]`

In [13]:
a1 = RandomForestRegressor(random_state = 9)
params = {
    'n_estimators': [10,20,30],
    'max_depth': [6,8],
    'min_samples_split': [6,8]
}
tuning = GridSearchCV(estimator = a1, param_grid = params)

In [14]:
tuning.fit(X_train, y_train)
y_pred2 = tuning.predict(X_test)

In [15]:
r2 = r2_score(y_test, y_pred2)
r2

0.8820283195170714

<img src="../images/icon/ppt-icons.png" alt="Concept-Alert" style="width: 100px;float:left; margin-right:15px"/>
<br /> 

##  Mini-Challenge - 7
***
### Instructions
### Stacking
* Stack the below models(i.e `regressor1,regressor2,regressor3,regressor4`) and predict on the test data.
* Use the Stacking() from mlxtend to initialize a stacking Regressor object. Pass the `regressor_list` to parameter `regressors` and `lin_reg` as `meta_regressor` parameter , while initializing the object.

In [16]:
list = [de,br,br1,a,a1]
lin_reg = LinearRegression()
z = StackingRegressor(regressors = list, meta_regressor = lin_reg)
z.fit(X_train, y_train)
y_pred3 = z.predict(X_test)

In [17]:
r2_score(y_test, y_pred3)

0.7156943805857201