## Random Forest Classification

### Import dataset and libraries

In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [5]:
dataset = pd.read_csv('Social_Network_Ads.csv')

In [11]:
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

### Train test split

In [13]:
from sklearn.model_selection import train_test_split

In [15]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

### Training and testing
<a href=https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.GridSearchCV.html> Grid Search Cross Validation </a>

In [17]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV

In [18]:
rf_classifier = RandomForestClassifier()
param_grid = [
    { 'n_estimators': [10, 250, 500], 'max_leaf_nodes': [10, 16], 'criterion': ['gini', 'entropy'] } 
]
grid_search = GridSearchCV(estimator=rf_classifier, param_grid=param_grid, cv=5)

In [19]:
grid_search.fit(X_train, y_train)

GridSearchCV(cv=5, estimator=RandomForestClassifier(),
             param_grid=[{'criterion': ['gini', 'entropy'],
                          'max_leaf_nodes': [10, 16],
                          'n_estimators': [10, 250, 500]}])

In [20]:
y_predicted = grid_search.predict(X_test)

### Validation

In [23]:
from sklearn.metrics import precision_score, accuracy_score, recall_score, confusion_matrix, f1_score

print(confusion_matrix(y_test, y_predicted))
print(precision_score(y_test, y_predicted))
print(accuracy_score(y_test, y_predicted))
print(recall_score(y_test, y_predicted))
print(f1_score(y_test, y_predicted))

[[55  3]
 [ 1 21]]
0.875
0.95
0.9545454545454546
0.9130434782608695


In [24]:
grid_search.best_params_

{'criterion': 'gini', 'max_leaf_nodes': 10, 'n_estimators': 10}