API Requests
====

In [1]:
import pandas as pd
import numpy as np
import json
import requests
import pickle
from flask import Flask, jsonify, request

from sklearn.preprocessing import scale, MinMaxScaler, StandardScaler, RobustScaler

In [2]:
data = pd.read_csv("/media/juanan/DATA/loan_data_analysis/data/loans_processed.csv", sep = "^").sample(10)

In [3]:
data

Unnamed: 0,num_bc_sats,num_rev_tl_bal_gt_0,grade,avg_cur_bal,pub_rec_bankruptcies,num_rev_accts,tax_liens,funded_amnt_inv,delinq_2yrs,total_bal_ex_mort,...,num_accts_ever_120_pd,total_il_high_credit_limit,bc_util,percent_bc_gt_75,sub_grade,mort_acc,num_op_rev_tl,dti,home_ownership,loan_status
298568,1.0,5.0,D,4426.0,0.0,21.0,0.0,16100.0,0.0,39835.0,...,0.0,36325.0,73.5,0.0,D2,0.0,6.0,33.44,OWN,0.0
122848,4.0,4.0,C,7758.0,0.0,7.0,0.0,15000.0,0.0,54307.0,...,0.0,35540.0,95.2,100.0,C4,0.0,6.0,15.76,RENT,0.0
577398,6.0,8.0,B,18221.0,0.0,18.0,0.0,14400.0,1.0,35418.0,...,0.0,47188.0,91.1,83.3,B4,1.0,11.0,16.3,MORTGAGE,0.0
741207,6.0,6.0,D,3541.0,0.0,8.0,0.0,20400.0,0.0,28325.0,...,0.0,21859.0,36.6,16.7,D3,0.0,6.0,12.93,RENT,0.0
385469,0.0,2.0,C,3217.0,0.0,12.0,0.0,12600.0,1.0,12867.0,...,0.0,14640.0,11.955,11.955,C3,0.0,2.0,11.91,RENT,1.0
715951,9.0,10.0,C,4718.0,0.0,13.0,0.0,16000.0,0.0,127374.0,...,0.0,117304.0,21.9,0.0,C5,0.0,13.0,16.54,RENT,1.0
684885,8.0,7.0,D,2429.0,0.0,26.0,0.0,13725.0,3.0,31577.0,...,0.0,34498.0,16.0,0.0,D2,0.0,12.0,31.81,RENT,0.0
238830,38.6,38.6,B,38.6,0.0,38.6,0.0,11672.201752,0.0,38.6,...,38.6,38.6,38.6,38.6,B3,38.6,38.6,10.88,RENT,0.0
277186,4.0,8.0,C,2639.0,2.0,11.0,0.0,7500.0,0.0,26392.0,...,0.0,23952.0,18.7,50.0,C2,4.0,9.0,16.48,OWN,0.0
448975,5.0,5.0,C,4575.0,0.0,8.0,0.0,18725.0,0.0,45750.0,...,0.0,47483.0,79.9,40.0,C2,0.0,6.0,35.42,RENT,1.0


In [35]:
def categorical_to_numeric(data, categorical_variable, target):
    """
    Parameters
    ---------
    data: DataFrame for transforming categorical to numeric
    categorical_variable: variable we want to transform to the mean value of the target.
    target: target of the data
    
    Returns:
    ---------
    result: numeric variable        
    """    
    
    categorical_dict =  dict(data.groupby(categorical_variable)[target].mean())
    
    result = data[categorical_variable].map(lambda i: categorical_dict[i])
    
    return result

In [36]:
def normalize_variables(data, normalization = "robust"):
    """
    Parameters
    ---------
    data: DataFrame to normalize
    normalization: type of normalization to perform: "robust", "standard" and "minMax"
    
    Returns
    ---------
    result: DataFrame with normalized variables
    """
    
    # numeric variables except target
    variables = data.loc[:,data.columns != "loan_status"]
    variables = variables._get_numeric_data().columns
    
    # normalization methods
    robust = RobustScaler()
    standard = StandardScaler()
    minMax = MinMaxScaler()
    
    normalization_dict = {"robust": robust,
                          "standard": standard,
                          "minMax": minMax}
    
    scaler = normalization_dict[normalization]
    
    # normalization
    print(scaler)
    scaler.fit(data[variables])
    data[variables] = scaler.transform(data[variables])
    
    return data

In [37]:
### 3. pre-processing

# categorical variables
categorical_variables = data.select_dtypes(include="object").columns

for variable in categorical_variables:
    data[variable] = categorical_to_numeric(data, variable, "loan_status")

data = normalize_variables(data, "standard")

StandardScaler(copy=True, with_mean=True, with_std=True)


In [38]:
data

Unnamed: 0,num_bc_sats,num_rev_tl_bal_gt_0,grade,avg_cur_bal,pub_rec_bankruptcies,num_rev_accts,tax_liens,funded_amnt_inv,delinq_2yrs,total_bal_ex_mort,...,num_accts_ever_120_pd,total_il_high_credit_limit,bc_util,percent_bc_gt_75,sub_grade,mort_acc,num_op_rev_tl,dti,home_ownership,loan_status
30751,-0.508876,-0.60559,0.160128,-0.343937,2.0,-0.828001,0.0,-0.308361,0.0,0.044719,...,-0.606712,0.196548,0.089555,-0.388992,-0.654654,-0.578605,-0.680223,0.434608,-1.083473,0.0
230053,0.81815,0.246052,-1.441153,-0.624309,-0.5,-0.284891,0.0,-0.495109,0.0,-1.046177,...,0.413215,-0.701712,-1.670505,-1.275098,-0.654654,2.878316,0.169267,-0.674836,0.120386,0.0
568037,-0.45404,-0.726106,-1.441153,2.856267,-0.5,-0.585541,0.0,1.513566,0.0,-0.354043,...,-0.606712,-0.702396,0.621563,0.017478,-0.654654,0.402082,-0.596115,-1.287798,0.120386,0.0
715220,-0.508876,-0.766278,-1.441153,-0.304981,-0.5,-1.458398,0.0,1.969048,0.0,0.568098,...,-0.570544,1.225178,-1.271499,-1.405168,-0.654654,-0.333434,-0.848439,-0.574053,0.120386,0.0
645294,2.759336,1.668135,0.160128,-0.622041,-0.5,1.431727,0.0,0.602602,0.0,-0.172594,...,1.693549,-0.700852,-0.101082,-0.388992,-0.654654,-0.333434,1.657977,-1.139101,-1.083473,0.0
633676,-0.344369,2.124086,0.960769,-0.621314,-0.5,1.982112,0.0,-0.58165,0.0,-0.72113,...,2.104051,-0.700577,0.736831,0.480855,-0.654654,-0.578605,2.13529,-0.865664,-1.083473,0.0
379371,-0.673383,-0.766278,0.960769,0.380283,-0.5,-0.731017,0.0,-0.763843,0.0,-0.967168,...,-0.606712,-0.702396,0.967368,1.64336,-0.654654,-0.456019,-0.890493,0.868307,0.120386,0.0
396972,-0.234697,-0.364559,0.960769,0.033775,-0.5,-0.100621,0.0,0.147121,0.0,0.545742,...,-0.606712,0.275156,0.346692,-0.616616,1.527525,-0.088262,-0.343791,1.467225,0.120386,1.0
452987,-0.399204,-0.645762,0.960769,-0.514501,2.0,0.093347,0.0,-0.955145,0.0,-0.442376,...,-0.606712,-0.55795,-1.191698,0.289813,1.527525,-0.333434,-0.427899,1.535791,0.120386,1.0
759499,-0.45404,-0.1637,0.160128,-0.239242,-0.5,0.481283,0.0,-1.128228,0.0,2.54493,...,-0.606712,2.369001,1.472775,1.64336,1.527525,-0.578605,-0.175575,0.235519,2.528103,1.0


In [39]:
data_json = data.to_json(orient='records')

In [55]:
data_json

'[{"num_bc_sats":-0.5088759495,"num_rev_tl_bal_gt_0":-0.6055903815,"grade":0.1601281538,"avg_cur_bal":-0.343937418,"pub_rec_bankruptcies":2.0,"num_rev_accts":-0.8280014233,"tax_liens":0.0,"funded_amnt_inv":-0.3083611797,"delinq_2yrs":0.0,"total_bal_ex_mort":0.0447186289,"pct_tl_nvr_dlq":0.6066387549,"disbursement_method":1.0,"fico_range_low":-1.2099204947,"verification_status":-0.5773502692,"delinq_amnt":0.0,"purpose":-0.3746343246,"emp_title":-0.6546536707,"zip_code":-0.6546536707,"loan_amnt":-0.3088860387,"installment":-0.0727958674,"fico_range_high":-1.2099204947,"annual_inc":-0.9027961235,"term":0.8164965809,"int_rate":-0.0628675502,"emp_length":-0.75,"revol_bal":-0.5789842702,"application_type":1.0,"num_bc_tl":-0.6621811684,"num_sats":-0.6254796522,"tot_hi_cred_lim":-0.3554197077,"tot_coll_amt":-0.6018190566,"initial_list_status":1.5275252317,"bc_open_to_buy":-0.3032301914,"total_bc_limit":-0.2998728454,"open_acc":-0.4719875816,"revol_util":-1.1718218767,"pub_rec":2.0,"funded_amnt

In [75]:
with open("../output/models/logistic_regression_model.sav","rb") as f:
    loaded_model = pickle.load(f)

pd.DataFrame(loaded_model.predict_proba(data.drop("loan_status", axis=1))).loc[:,0]

0    0.909001
1    0.887868
2    0.986931
3    0.840045
4    0.970084
5    0.951651
6    0.961256
7    0.269149
8    0.180407
9    0.138683
Name: 0, dtype: float64

In [52]:
header = {'Content-Type': 'application/json',
          'Accept': 'application/json'}

In [114]:
print("Loading models...")
logistic_regression = pickle.load(open("../output/models/logistic_regression_model.sav","rb"))
random_forest = pickle.load(open("../output/models/random_forest_model.sav","rb"))
xg_boost = pickle.load(open("../output/models/xg_boost_model.sav","rb"))

print("Models have been loaded...doing predictions now...")
logit_predictions = list(pd.DataFrame(logistic_regression.predict_proba(data.drop("loan_status", axis=1))).loc[:,1])
rf_predictions = list(pd.DataFrame(random_forest.predict_proba(data.drop("loan_status", axis=1))).loc[:,1])
xg_predictions = list(pd.DataFrame(xg_boost.predict_proba(np.matrix(data.drop("loan_status", axis=1)))).loc[:,1])

final_predictions = pd.DataFrame(list(zip(data.index, logit_predictions, rf_predictions, xg_predictions)))
final_predictions.columns = ["id", "logit", "rf", "xg"]
print("Done!")
final_predictions

Loading models...
Models have been loaded...doing predictions now...
Done!


Unnamed: 0,id,logit,rf,xg
0,30751,0.090999,0.255761,0.173322
1,230053,0.112132,0.095084,0.075058
2,568037,0.013069,0.101116,0.03449
3,715220,0.159955,0.117432,0.129458
4,645294,0.029916,0.135984,0.07818
5,633676,0.048349,0.224005,0.272576
6,379371,0.038744,0.178876,0.252282
7,396972,0.730851,0.496185,0.617083
8,452987,0.819593,0.5068,0.677487
9,759499,0.861317,0.419609,0.731682


In [120]:
resp = requests.post("http://0.0.0.0:8000/predict",
                     data = json.dumps(data_json),
                     headers= header)

In [121]:
resp.json()

{'predictions': '[{"id":0,"logit":0.1156613959,"rf":0.2118039377,"xg":0.2403198928},{"id":1,"logit":0.0387050322,"rf":0.1336294551,"xg":0.0476554334},{"id":2,"logit":0.1518343135,"rf":0.1147616606,"xg":0.0859299824},{"id":3,"logit":0.1494674967,"rf":0.1656625856,"xg":0.1434585005},{"id":4,"logit":0.0638777361,"rf":0.1902400035,"xg":0.1170688942},{"id":5,"logit":0.0084895377,"rf":0.2128716671,"xg":0.0975515991},{"id":6,"logit":0.0633470516,"rf":0.1726716874,"xg":0.2018158883},{"id":7,"logit":0.8491677456,"rf":0.3263729858,"xg":0.6370109916},{"id":8,"logit":0.3332118571,"rf":0.2937150986,"xg":0.5720311999},{"id":9,"logit":0.8807806245,"rf":0.3283493345,"xg":0.5924571753}]'}

In [95]:
data["loan_status"]

30751     0.0
230053    0.0
568037    0.0
715220    0.0
645294    0.0
633676    0.0
379371    0.0
396972    1.0
452987    1.0
759499    1.0
Name: loan_status, dtype: float64