# API for Best Classifier So Far (currently: RBF Kernel SVC)

Note: I wanted to be able to test my API on the labeled data, so I took advantage of the fact that I have already processed and stored this data. At some point I plan to circle back on add in the preprocessing steps so that this API can be used with unlabeled, unprocessed data.

In [1]:
# import
from pathlib import Path

# path for api script
prediction_api_script_file = Path.cwd().parent / 'src' / 'models'/ 'prediction_api.py'

In [2]:
%%writefile $prediction_api_script_file

# import
from flask import Flask, request
import pandas as pd
import numpy as np
import json
from sklearn.externals import joblib
from pathlib import Path

# create the flask app
app = Flask(__name__)

# create file paths
project_dir = Path.home() / 'Python' / 'Kaggle' / 'digit_recognizer'
pca_file_path = project_dir / 'models' / 'pca_50.pkl'
model_file_path = project_dir / 'models' / 'rbf_svc_model.pkl'

# load pca and model
pca = joblib.load(pca_file_path)
model = joblib.load(model_file_path)

# use the Python decorator to create the API route with one endpoint ('/api') that will be used for POST requests
@app.route('/api', methods = ['POST'])

# the prediciton method that will be invoked internally once the API is invoked
def make_prediction():
    # read json object and conter to json string
    data = json.dumps(request.get_json(force = True))
    
    # create pandas dataframe from json string
    df = pd.read_json(data)
    
    # extract ImageIds
    image_ids = df['ImageId']
    
    # actual 'label' values
    # in practice, we wouldn't have this, but we'll include it for testing purposes since train is labeled
    actuals = df['label']
    
    # extract pixel columns
    X = df.drop(['ImageId', 'label'], axis = 1)
    
    # pca the input
    X_pca = pca.transform(X)
    
    # make predictions
    predictions = model.predict(X_pca)
    
    # create respnse data fram
    df_response = pd.DataFrame({'ImageId': image_ids,
                                'Predicted' : predictions,
                                'Actual' : actuals})
    
    # return json response object
    return df_response.to_json()
    
if __name__ == '__main__':
    # can pick any unused port
    # debug = True means if you have any problems in the API call, then you get a detailed stack trace
    # good for development process, probably set to False in the production environment
    app.run(port = 10001, debug = True)

Writing /Users/jgower/Python/Kaggle/digit_recognizer/src/models/prediction_api.py


## Test the API

Open a terminal, navigate to /your_path/digit_recognizer/src/models (which is where the API is located), then execute the script. Once that is running, proceed to the next cell...

In [3]:
# import
import pandas as pd

# get train data frame
train_file_path = Path.cwd().parent / 'data' / 'processed' / 'train_processed.csv'
train_df = pd.read_csv(train_file_path)

In [4]:
# pick five images labeled 7 to send to the API in order to test it
images_labeled_7 = train_df[train_df['label'] == 7][:5]

In [5]:
# verify labels are all 7 by looking at the rows of these five images
images_labeled_7

Unnamed: 0,ImageId,label,pixel12,pixel13,pixel14,pixel15,pixel32,pixel33,pixel34,pixel35,...,pixel770,pixel771,pixel772,pixel773,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779
6,5834,7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
11,39916,7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
14,8708,7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
17,41459,7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.65098,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
21,22127,7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [6]:
# import
import requests

# function to make API requests
def make_api_request(data):
    # url for API
    url = 'http://127.0.0.1:10001/api'
    
    # make post request
    r = requests.post(url, data)
    
    # return the json object
    return r.json()

In [7]:
# use api request function to make the API call
# the predicted values should all be 7
make_api_request(images_labeled_7.to_json())

{'ImageId': {'11': 39916, '14': 8708, '17': 41459, '21': 22127, '6': 5834},
 'Predicted': {'11': 7, '14': 7, '17': 7, '21': 7, '6': 7},
 'Actual': {'11': 7, '14': 7, '17': 7, '21': 7, '6': 7}}