In [33]:
from sklearn.datasets import load_iris
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import (confusion_matrix, classification_report, precision_recall_fscore_support,
                             roc_auc_score, roc_curve)

##plotting
from plotly.offline import init_notebook_mode, iplot, download_plotlyjs
import plotly.offline as pyo
import cufflinks as cf
init_notebook_mode(connected=True)
cf.go_offline()

In [2]:
iris = load_iris()

In [3]:
df = pd.DataFrame(np.concatenate((iris.data.reshape(-1,4),iris.target.reshape(-1,1)),axis=1))
df.columns =[feature[:-5] for feature in iris.feature_names]+['target']

In [38]:
df['target2'] = df.target.apply(lambda x: iris['target_names'][int(x)])

In [39]:
target = 'target'
variables = list(set(df.columns) - set([target,'target2']))

In [41]:
pyo.iplot(
    {
        'data': [
            {
                'x': df[df['target2']==label]['petal width'],
                'y': df[df['target2']==label]['petal length'],
                'name': label, 'mode': 'markers',
            } for label in iris.target_names
        ],
        'layout': {
            'xaxis': {'title': 'petal width'},
            'yaxis': {'title': "petal length"}
        }
})

In [43]:
X_train, X_test, y_train, y_test = train_test_split(df[variables],df[target],test_size=0.2,random_state=42)
rf = RandomForestClassifier(random_state=42, max_depth=4, n_estimators=10, min_samples_leaf=3)
rf.fit(X_train,y_train)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=4, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=3, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=None,
            oob_score=False, random_state=42, verbose=0, warm_start=False)

In [44]:
print(classification_report(y_train,rf.predict(X_train)))

              precision    recall  f1-score   support

         0.0       1.00      1.00      1.00        40
         1.0       0.95      0.93      0.94        41
         2.0       0.93      0.95      0.94        39

   micro avg       0.96      0.96      0.96       120
   macro avg       0.96      0.96      0.96       120
weighted avg       0.96      0.96      0.96       120



In [54]:
print(confusion_matrix(y_train,rf.predict(X_train)))
pd.DataFrame(confusion_matrix(y_train,rf.predict(X_train))).iplot(kind='heatmap',colorscale='greys')

[[40  0  0]
 [ 0 38  3]
 [ 0  2 37]]


In [46]:
rf.predict_proba(X_train)

array([[1.        , 0.        , 0.        ],
       [0.96      , 0.04      , 0.        ],
       [0.        , 0.98965517, 0.01034483],
       [1.        , 0.        , 0.        ],
       [1.        , 0.        , 0.        ],
       [0.        , 0.39190476, 0.60809524],
       [0.        , 0.98965517, 0.01034483],
       [1.        , 0.        , 0.        ],
       [1.        , 0.        , 0.        ],
       [1.        , 0.        , 0.        ],
       [0.        , 0.09333333, 0.90666667],
       [0.        , 0.90298851, 0.09701149],
       [0.        , 0.98965517, 0.01034483],
       [1.        , 0.        , 0.        ],
       [1.        , 0.        , 0.        ],
       [0.06      , 0.94      , 0.        ],
       [0.        , 0.49084565, 0.50915435],
       [0.        , 0.03333333, 0.96666667],
       [0.        , 0.98965517, 0.01034483],
       [0.        , 0.        , 1.        ],
       [0.        , 0.98965517, 0.01034483],
       [0.        , 0.        , 1.        ],
       [0.

$Precision = \frac{TP}{TP+FP}$