In [0]:
import pandas as pd
import joblib
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

In [0]:
file_url = 'https://raw.githubusercontent.com/PacktWorkshops/The-Data-Science-Workshop/master/Chapter11/dataset/breast-cancer-wisconsin.data'

In [0]:
col_names = ['Sample code number','Clump Thickness','Uniformity of Cell Size','Uniformity of Cell Shape','Marginal Adhesion','Single Epithelial Cell Size',
'Bare Nuclei','Bland Chromatin','Normal Nucleoli','Mitoses','Class']

In [0]:
df = pd.read_csv(file_url, header=None, names=col_names, na_values='?')

In [5]:
df.head()

Unnamed: 0,Sample code number,Clump Thickness,Uniformity of Cell Size,Uniformity of Cell Shape,Marginal Adhesion,Single Epithelial Cell Size,Bare Nuclei,Bland Chromatin,Normal Nucleoli,Mitoses,Class
0,1000025,5,1,1,1,2,1.0,3,1,1,2
1,1002945,5,4,4,5,7,10.0,3,2,1,2
2,1015425,3,1,1,1,2,2.0,3,1,1,2
3,1016277,6,8,8,1,3,4.0,3,7,1,2
4,1017023,4,1,1,3,2,1.0,3,1,1,2


In [0]:
df.fillna(0, inplace=True)

In [0]:
y = df.pop('Class')

In [0]:
X = df.drop('Sample code number', axis=1)

In [9]:
X.head()

Unnamed: 0,Clump Thickness,Uniformity of Cell Size,Uniformity of Cell Shape,Marginal Adhesion,Single Epithelial Cell Size,Bare Nuclei,Bland Chromatin,Normal Nucleoli,Mitoses
0,5,1,1,1,2,1.0,3,1,1
1,5,4,4,5,7,10.0,3,2,1
2,3,1,1,1,2,2.0,3,1,1
3,6,8,8,1,3,4.0,3,7,1
4,4,1,1,3,2,1.0,3,1,1


In [0]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=888)

In [0]:
rf_model = RandomForestClassifier(random_state=1)

In [12]:
rf_model.fit(X_train, y_train)



RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
                       max_depth=None, max_features='auto', max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=10,
                       n_jobs=None, oob_score=False, random_state=1, verbose=0,
                       warm_start=False)

In [13]:
rf_model.predict([X_test.iloc[0,]])

array([2])

In [14]:
joblib.dump(rf_model, "model.pkl") 

['model.pkl']

In [0]:
import socket
import threading
import requests
import json
from flask import Flask, jsonify, request
import numpy as np

In [16]:
ip_address = socket.gethostbyname(socket.gethostname())
ip_address

'172.28.0.2'

In [0]:
app = Flask(__name__)

In [0]:
trained_model = joblib.load("model.pkl")

In [0]:
@app.route('/api', methods=['POST'])
def predict():
  data = request.get_json()
  prediction = trained_model.predict(data)
  str_pred = np.array2string(prediction)
  return jsonify(str_pred)

In [20]:
flask_thread = threading.Thread(target=app.run, kwargs={'host':'0.0.0.0','port':80})
flask_thread.start()

 * Serving Flask app "__main__" (lazy loading)


In [21]:
record = X_test.iloc[0,].to_list()
record

 * Environment: production


[2.0, 3.0, 1.0, 1.0, 5.0, 1.0, 1.0, 1.0, 1.0]

   Use a production WSGI server instead.
 * Debug mode: off


 * Running on http://0.0.0.0:80/ (Press CTRL+C to quit)


In [0]:
j_data = json.dumps([record])

In [0]:
headers = {'content-type': 'application/json', 'Accept-Charset': 'UTF-8'}

In [24]:
r = requests.post(f"http://{ip_address}/api", data=j_data, headers=headers)
r.text

172.28.0.2 - - [25/Nov/2019 03:26:40] "[37mPOST /api HTTP/1.1[0m" 200 -


'"[2]"\n'