In [79]:
import requests
import urllib.request
import json
import numpy as np
import pandas as pd

from sklearn.metrics import f1_score, roc_auc_score, precision_score, recall_score

In [80]:
def get_metrics(y_test, preds):
    
    precision, recall, thresholds = precision_recall_curve(y_test, preds)
    fscore = (2 * precision * recall) / (precision + recall)
    ix = np.nanargmax(fscore)
    
    return roc_auc_score(y_test, preds), precision[ix], recall[ix], fscore[ix], thresholds[ix]

In [81]:
def send_json(x):
    # ошибки с сериализацией json
#     if isinstance(x, pd.core.series.Series):
#         x.fillna('', inplace=True)  # не лучший вариант, но if np.isnan(obj) не срабатывает
    
    columns = ['age', 'workclass', 'fnlwgt', 'education', 'education.num', 'marital.status', 
               'occupation', 'relationship', 'race', 'capital.gain', 'capital.loss', 
               'hours.per.week']
    
    body = {column: value for column, value in zip(columns, x)} 
    # ошибки с сериализацией json:
    for key, val in body.items():
        if isinstance(val, np.integer):
            body[key] = int(val)
        elif isinstance(val, np.floating):          
            body[key] = float(val)
    
#     myurl = 'http://127.0.0.1:5000' + '/predict'
    myurl = 'http://127.0.0.1:8180/' + '/predict'
    headers = {'content-type': 'application/json; charset=utf-8'}
    response = requests.post(myurl, json=body, headers=headers)
    return response.json()['predictions']

In [82]:
path = 'app/models/app_data/'
X_test = pd.read_csv(path + 'X_test.csv')
y_test = pd.read_csv(path + 'y_test.csv')

In [87]:
X_test.iloc[0, :]

age                               29
workclass                    Private
fnlwgt                        280618
education               Some-college
education.num                     10
marital.status    Married-civ-spouse
occupation         Handlers-cleaners
relationship                 Husband
race                           White
capital.gain                       0
capital.loss                       0
hours.per.week                    40
Name: 0, dtype: object

In [83]:
data = (29, 'Private', 280618, 'Some-college', 10, 'Married-civ-spouse', 'Handlers-cleaners',
        'Husband', 'White', 0, 0, 40)

In [85]:
response = send_json(data)
response

0.11250568899133974

In [86]:
response = send_json(X_test.iloc[0, :])
response

0.11250568899133974

Пустой запрос

In [88]:
test_none = pd.DataFrame([['' for i in range(len(X_test.columns))]], columns=X_test.columns)
test_none

Unnamed: 0,age,workclass,fnlwgt,education,education.num,marital.status,occupation,relationship,race,capital.gain,capital.loss,hours.per.week
0,,,,,,,,,,,,


In [89]:
response = send_json(test_none.iloc[0, :])
response

'loop of ufunc does not support argument 0 of type str which has no callable sqrt method'

Несколько запросов

In [90]:
N = 100

In [91]:
%%time
predictions = X_test.iloc[:100].apply(lambda x: send_json(x), axis=1)

CPU times: user 443 ms, sys: 19.1 ms, total: 462 ms
Wall time: 6.99 s


In [105]:
predictions.values[:5]

array([0.11006082, 0.00230376, 0.50173577, 0.04081379, 0.00108458])

In [92]:
best_th = 0.317

In [93]:
y_pred = predictions.values > 0.317
y_pred[:5]

array([False, False,  True, False, False])

Метрики

In [94]:
roc_auc = roc_auc_score(y_test[:100], y_pred)
prec = precision_score(y_test[:100], y_pred)
rec = recall_score(y_test[:100], y_pred)
f_score = f1_score(y_test[:100], y_pred)

In [95]:
print(f'roc_auc: {roc_auc:.4f}, prec: {prec:.4f}, rec: {rec:.4f}, f-score: {f_score:.4f}')

roc_auc: 0.7452, prec: 0.4583, rec: 0.6471, f-score: 0.5366


In [60]:
# class NpJsonEncoder(json.JSONEncoder):
#   """Serializes numpy objects as json."""

# def default(self, obj):
#     if isinstance(obj, np.integer):
#         return int(obj)
#     elif isinstance(obj, np.bool_):
#         return bool(obj)
#     elif isinstance(obj, np.floating):
#         if np.isnan(obj):
#             return None  # Serialized as JSON null.
#         return float(obj)
#     elif isinstance(obj, np.ndarray):
#         return obj.tolist()
#     else:
#         return super().default(obj)

# body_js = json.dumps(body, cls=NpEncoder)