In [96]:
import pandas as pd
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from joblib import dump, load
from flask import jsonify
import json

# Ignores warnings
import warnings
warnings.filterwarnings("ignore")

## Load Data

In [74]:
# Dict containing information regarding iris dataset
iris = load_iris()

# Feature/Target names
feature_names = [x[:-5] for x in iris['feature_names']]
target_name = 'species'

# Split iris data into X/y/df
X = pd.DataFrame(iris['data'], columns=feature_names) # removes ' (cm)' suffix on end of column names
y = pd.Series(iris['target'], name=target_name)
df = pd.concat([X, y], axis=1)

## Train Random Forest Classifier

In [54]:
# 1. Split into train/test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=7)

# 2. Fit Random Forest Classifier
rf_clf = RandomForestClassifier(random_state=7)
rf_clf.fit(X_train, y_train)

# 3. Return test accuracy score
acc_test_score = rf_clf.score(X_test, y_test)
print(f'Model accuracy on test set is {acc_test_score*100:.2f}%')

Model accuracy on test set is 89.47%


In [78]:
# Create dict mapping index-to-target
idx_to_target = {0: 'setosa', 1: 'versicolor', 2: 'virginica'}
#idx_to_target = {k: v for k, v in enumerate(iris.target_names)}

# Let's see what observations our model incorrectly classified
df_pred = pd.DataFrame({'pred': rf_clf.predict(X_test), 'actual': y_test}) \
            .replace(idx_to_target)
df_pred[df_pred['pred'] != df_pred['actual']]
# Model misclassifies couple of instances of virginica for versicolor and vice-versa

Unnamed: 0,pred,actual
106,versicolor,virginica
77,virginica,versicolor
70,virginica,versicolor
119,versicolor,virginica


## Save/Load Model as Joblib File

In [64]:
# Save model Random Forest Classifier Model
dump(rf_clf, 'rf_clf.joblib') 

['rf_clf.joblib']

In [65]:
# Loads the Random Forest Classifier Model
# rf_clf = load('rf_clf.joblib')

## Make Model Predictions in dict format

In [76]:
# Making model prediction with ONLY values

### Returns target_name index
pred_idx = rf_clf.predict([[2.5, 1.5, 3.5, 6.9]])[0]
print(pred_idx)

### Returns target_name
# Method 2
pred_target_name = iris.target_names[pred_idx]
print(pred_target_name)

# Method 2
print(idx_to_target[pred_idx])

1
versicolor
versicolor


## Function to parse features from json request

In [83]:
FEATURES = ['sepal length', 'sepal width', 'petal length', 'petal width']

def parse_args(requested_dict):
  x_list = []
  for feature in FEATURES:
    value = requested_dict.get(feature, None)
    if value: 
      x_list.append(value)
    else:
      x_list.append(0)
  return x_list

In [86]:
# Example of running parse_args function
### (1) No missing features
data = {'sepal length': 8.4, 'sepal width': 6.1, 'petal length': 1.2, 'petal width': 4.3}
print(parse_args(data))

### With missing features
data = {'sepal length': 8.4, 'petal width': 4.3}
print(parse_args(data))

[8.4, 6.1, 1.2, 4.3]
[8.4, 0, 0, 4.3]


## Load joblib model and make predictions

In [102]:
MODEL = load('rf_clf.joblib')

x_list = parse_args(data)
x_array = np.array([x_list])

pred = idx_to_target[MODEL.predict(x_array)[0]]
response = {'PREDICTION': pred}
json.loads(json.dumps(response))

{'PREDICTION': 'virginica'}

In [1]:
!python -V

Python 3.8.10
