# Implementing a Random Forest Classification Model in Python

### loading data

In [1]:
import pandas as pd

names= ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']

df = pd.read_csv("https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv", names=names)
print(df.shape)

print(df.head())

(768, 9)
   preg  plas  pres  skin  test  mass   pedi  age  class
0     6   148    72    35     0  33.6  0.627   50      1
1     1    85    66    29     0  26.6  0.351   31      0
2     8   183    64     0     0  23.3  0.672   32      1
3     1    89    66    23    94  28.1  0.167   21      0
4     0   137    40    35   168  43.1  2.288   33      1


### Creating a random forest model

In [6]:
X = df.drop('class', axis=1)
y = df['class']

In [7]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=66)


In [13]:
from sklearn import model_selection

from sklearn.ensemble import RandomForestClassifier

rfc = RandomForestClassifier()
rfc.fit(X_train, y_train)

rfc_predict = rfc.predict(X_test)

### Evaluating Performance

In [15]:
from sklearn.model_selection import cross_val_score
from sklearn.metrics import classification_report, confusion_matrix

In [16]:
rfc_cv_score = cross_val_score(rfc, X, y, cv=10, scoring='roc_auc')

In [17]:
print("=== Confusion Matrix ===")
print(confusion_matrix(y_test, rfc_predict))
print('\n')
print("=== Classification Report ===")
print(classification_report(y_test, rfc_predict))
print('\n')
print("=== All AUC Scores ===")
print(rfc_cv_score)
print('\n')
print("=== Mean AUC Score ===")
print("Mean AUC Score - Random Forest: ", rfc_cv_score.mean())

=== Confusion Matrix ===
[[153  23]
 [ 33  45]]


=== Classification Report ===
             precision    recall  f1-score   support

          0       0.82      0.87      0.85       176
          1       0.66      0.58      0.62        78

avg / total       0.77      0.78      0.78       254



=== All AUC Scores ===
[ 0.76555556  0.81222222  0.78703704  0.66259259  0.73962963  0.84592593
  0.82407407  0.86925926  0.76076923  0.85269231]


=== Mean AUC Score ===
Mean AUC Score - Random Forest:  0.791975783476
