In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

### Recently I published a self help book titled Inspiration: Thoughts on Spirituality, Technology, Wealth, Leadership and Motivation. The preview of the book can be read from the Amazon link https://lnkd.in/gj7bMQA

Any machine learning algorithm has many hyperparameters.Many people generally use the default values for the hyper parameters.But if we can optimise these hyper parameters then we wll be in a position to further improve the performance of our machine learning models.In this notebooks we will cover following things

1.Data Import

2.Data Preparation

3.Building a Random Forest Model

4.Hyper Parameter tuning using Grid Search

5.Creating dataframe and Pivot table of hyperparameter and accuracy

6.Result Visualization

7.Conclusion

### You can refer to my other notebooks from https://www.kaggle.com/binuthomasphilip/code

# 1.Importing and Data Exploration data

### ### Importing Python Modules 

In [None]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns
from dateutil import parser
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler 
from sklearn.svm import SVC 
from xgboost import XGBClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix,classification_report,accuracy_score
from sklearn.model_selection import GridSearchCV
import pickle
from lightgbm import LGBMClassifier
import warnings
plt.style.use('fivethirtyeight')
import warnings
warnings.filterwarnings('ignore')

### Importing Data

In [None]:
df=pd.read_csv('../input/banknote-authentication-uci/BankNoteAuthentication.csv')
df.head()

# 2.Data Preparation  

### 2.1 Creating Matrix of Features

In [None]:
X = df.iloc[:,:-1]
y = df.iloc[:,-1]

### 2.2 Examining the Shape of Data

In [None]:
df.shape

### 2.3 Test Train Split

In [None]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=0)

# 3.Model Build

We will be building up Random Forest model to classify Bank notes.We will be considering n_estimators and max_features hyper parameters while building up our Random Forest Model.

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

rf = RandomForestClassifier(max_features=4,n_estimators=100)

In [None]:
rf.fit(X_train,y_train)

### 3.1 Making Predictions

In [None]:
y_pred = rf.predict(X_test)

### 3.2 R Square Error 

In [None]:
rf.score(X_test,y_test)

### 3.3 Accuracy Score 

In [None]:
# rf.score(X_test,y_test)
accuracy_score(y_pred,y_test)

Using the second approach we have access to predicted data values.

# 4.Hypeparameter Tuning 

In [None]:
from sklearn.model_selection import GridSearchCV

max_features_range = np.arange(1,5,1)
n_estimators_range = np.arange(10,201,10)
param_grid = dict(max_features=max_features_range,n_estimators=n_estimators_range)

rd = RandomForestClassifier()
    
grid = GridSearchCV(estimator=rf,param_grid=param_grid,cv=5)

In [None]:
grid.fit(X_train,y_train)

In [None]:
print("The best parameters are %s with a score of %0.2f"
      % (grid.best_params_,grid.best_score_))

# 5.Hyperparameter's and Accuracy 

In [None]:
grid_results = pd.concat([pd.DataFrame(grid.cv_results_["params"]),pd.DataFrame(grid.cv_results_["mean_test_score"], columns=["Accuracy"])],axis=1)
grid_results.head()

### 

# 6.Result Visualization

### 6.1 2D Contour Plot

In [None]:
# Creating Data Frame

grid_contour = grid_results.groupby(['max_features','n_estimators']).mean()
grid_contour

In [None]:
# Pivoting Data 
grid_reset = grid_contour.reset_index()
grid_reset.columns = ['max_features', 'n_estimators', 'Accuracy']
grid_pivot = grid_reset.pivot('max_features', 'n_estimators')
grid_pivot

In [None]:
x = grid_pivot.columns.levels[1].values
y = grid_pivot.index.values
z = grid_pivot.values

In [None]:
import plotly.graph_objects as go

# X and Y axes labels
layout = go.Layout(
            xaxis=go.layout.XAxis(
              title=go.layout.xaxis.Title(
              text='n_estimators')
             ),
             yaxis=go.layout.YAxis(
              title=go.layout.yaxis.Title(
              text='max_features') 
            ) )

fig = go.Figure(data = [go.Contour(z=z, x=x, y=y)], layout=layout )

fig.update_layout(title='Hyperparameter tuning', autosize=False,
                  width=500, height=500,
                  margin=dict(l=65, r=50, b=65, t=90))

fig.show()

We can see that the region with light Yellow have the best hyperparameters.We can select hyperparameters fro this area.Our Grid Search has give us the best values for max_features and n_estimators as 1 and 30.If we look at the 2D contour plot carefully we can see that a region is marked in light yellow indicating the best hyper parameters.

### 6.2 3D Surface Plot

In [None]:
import plotly.graph_objects as go


fig = go.Figure(data= [go.Surface(z=z, y=y, x=x)], layout=layout )
fig.update_layout(title='Hyperparameter tuning',
                  scene = dict(
                    xaxis_title='n_estimators',
                    yaxis_title='max_features',
                    zaxis_title='Accuracy'),
                  autosize=False,
                  width=800, height=800,
                  margin=dict(l=65, r=50, b=65, t=90))
fig.show()

# 7.Conclusion 

1.We have build a Random Forest Model to Predict the Fake Notes

2.We have used Grid Search to find out the best hyperameter for our machine learning model

3.We have displayed the hyperparameters with model accuracy with 2D and 3D plots.

### You can refer to my other notebooks from https://www.kaggle.com/binuthomasphilip/code¶