# Random Forest Classifier

In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris

In [6]:
data = load_iris()
dir(data)

['DESCR', 'data', 'feature_names', 'filename', 'target', 'target_names']

### 1. Create & Check Data Frame

In [9]:
df = pd.DataFrame(
    data['data'],
    columns = ['SL','SW','PL','PW']
)
df['target'] = data['target']
df['sp'] = df['target'].apply(
    lambda i: data['target_names'][i]
)
df.head()

Unnamed: 0,SL,SW,PL,PW,target,sp
0,5.1,3.5,1.4,0.2,0,setosa
1,4.9,3.0,1.4,0.2,0,setosa
2,4.7,3.2,1.3,0.2,0,setosa
3,4.6,3.1,1.5,0.2,0,setosa
4,5.0,3.6,1.4,0.2,0,setosa


In [11]:
df.shape

(150, 6)

In [13]:
df.isnull()

Unnamed: 0,SL,SW,PL,PW,target,sp
0,False,False,False,False,False,False
1,False,False,False,False,False,False
2,False,False,False,False,False,False
3,False,False,False,False,False,False
4,False,False,False,False,False,False
5,False,False,False,False,False,False
6,False,False,False,False,False,False
7,False,False,False,False,False,False
8,False,False,False,False,False,False
9,False,False,False,False,False,False


In [16]:
df.isnull().sum()

SL        0
SW        0
PL        0
PW        0
target    0
sp        0
dtype: int64

### 2. Splitting Data

In [18]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(
    df[['SL','SW','PL','PW']],
    df[['sp']],
    test_size = .05
)

### 3. Fitting Model Random Forest

In [36]:
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier(
    n_estimators = 100
)

model.fit(x_train, y_train)

  


RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=None,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)

In [37]:
model.score(x_train, y_train) * 100, '%'

(100.0, '%')

In [38]:
model.score(x_test, y_test) * 100, '%'

(87.5, '%')

In [50]:
x_test.iloc[0]

SL    6.3
SW    2.8
PL    5.1
PW    1.5
Name: 133, dtype: float64

In [51]:
y_test.iloc[0]

sp    virginica
Name: 133, dtype: object

### 4. Predict

In [52]:
model.predict([x_test.iloc[0]])

array(['versicolor'], dtype=object)

In [59]:
model.predict_proba([x_test.iloc[0]])

array([[0.  , 0.63, 0.37]])

In [55]:
model.predict([[7.2,3.2,6.0,1.9]])

array(['virginica'], dtype=object)

In [57]:
model.predict_proba([[7.2,3.2,6.0,1.9]])

array([[0., 0., 1.]])

In [63]:
pred = model.predict([[7.2,3.2,6.0,1.9]])
prob = model.predict_proba([[7.2,3.2,6.0,1.9]])
np.max(prob) * 100, '%', pred

(100.0, '%', array(['virginica'], dtype=object))

### 5. Save Project

In [64]:
import joblib
joblib.dump(model, 'ml22RandomForest')

['ml22RandomForest']