# PA004 - HEALTH INSURANCE CROS-SELL - FIRST CYCLE

## 0 - IMPORTS AND FUNCTIONS

In [None]:
import pandas              as pd
import scikitplot          as skplt
import matplotlib.pyplot   as plt
import lightgbm            as lgbm

from pandas_profiling      import ProfileReport
from IPython.display       import HTML
from catboost              import CatBoostClassifier
from xgboost               import XGBClassifier
from sklearn.utils         import resample
from sklearn               import metrics               as mt
from sklearn               import model_selection       as ms

### 0.1 - HELPER FUNCTIONS

In [None]:
def ml_performance( model_name, precision_atK, recall_atK, accuracy ):
        
        return pd.DataFrame( {'Model Name': model_name,
                              'Precision_at_K': precision_atK,
                              'Recall_at_K': recall_atK}, index=[0] )
                          
def recall_at_k (data, k=20000):
    # Reset Index
    data = data.reset_index( drop=True )

    # Create Ranking Order
    data['ranking'] = data.index + 1

    data['recall_at_k'] = data['response'].cumsum() / data['response'].sum()

    return data.loc[k, 'recall_at_k'] 

def precision_at_k (data, k=20000):
    # Reset Index
    data = data.reset_index( drop=True )

    # Create Ranking Order
    data['ranking'] = data.index + 1

    data['precision_at_k'] = data['response'].cumsum() / data['ranking']

    return data.loc[k, 'precision_at_k']

def prediction_score(model, data):
    # Separating train and validation dataset for each kfold
    x_train = data.drop(columns=['response'])
    y_train = data['response']
    
    # prediction probability (score)
    yhat_proba = model.predict_proba(x_train)[:, 1].tolist()
    
    # merging score to dataset
    data_scored = data.copy()
    data_scored['score'] = yhat_proba
    
    # sort
    data_scored = data_scored.sort_values('score', ascending=False)
    
    # precision and recall
    data_scored = data_scored.reset_index(drop=True)
    data_scored['n_samples'] = data_scored.index + 1
    data_scored['precision_at_k'] = data_scored['response'].cumsum() / data_scored['n_samples']
    data_scored['recall_at_k'] = data_scored['response'].cumsum() / data_scored['response'].sum()
    
    return data_scored

def cross_validation(kfold, modelName, model, data, at_k):
    
    # Number of folds
    fold=ms.StratifiedKFold(n_splits = kfold, shuffle=True, random_state=42)
    
    # Performance variables
    precision_list = []
    recall_list = []
    cv_performance = {}
    
    for train_cv,val_cv in fold.split(data, data['response']):
        
        # Separating train and validation dataset for each kfold
        # training data
        x_train_fold = data.iloc[train_cv]
        x_train_fold = x_train_fold[cols_selected]
        
        y_train_fold = data['response'].iloc[train_cv]
        
        # validation data
        x_val_fold = data.iloc[val_cv]
        x_val_fold = x_val_fold[cols_selected]
        
        y_val_fold = data['response'].iloc[val_cv]
        
        # fitting the model
        model_fitted = model.fit(x_train_fold,y_train_fold)
        
        # getting the prediction probability
        x_val_fold['response'] = y_val_fold
        val_scored = prediction_score(model_fitted, x_val_fold)
        
        # Getting precision and recall at k
        precision = val_scored.loc[at_k, 'precision_at_k']
        recall = val_scored.loc[at_k, 'recall_at_k']
        
        precision_list.append(precision)
        recall_list.append(recall)
        
    # calculating the mean and std performance of all kfolds
    precision_cv = np.round(np.mean(precision_list),4).astype(float)
    std_precision_cv = '+/-' + np.round(np.std(precision_list),4).astype(str)
    recall_cv = np.round(np.mean(recall_list),4).astype(float)
    std_recall_cv = ' +/- ' + np.round(np.std(recall_list),4).astype(str)
        
    cv_performance[modelName] = [precision_cv, std_precision_cv, recall_cv, std_recall_cv]
    model_performance_cv = pd.DataFrame(cv_performance, index=['precision_at_k', 'std_precision', 'recall_at_k', 'std_recall'])

    return model_performance_cv

def ml_performance( model_name, precision_atK, recall_atK ):
    
    return pd.DataFrame( {'Model Name': model_name,
                          'Precision_at_K': precision_atK,
                          'Recall_at_K': recall_atK}, index=[0])

## 1 - LOADING AND UNDESTANDING DATA

In [None]:
# All data - Linux
#df = pd.read_csv('/home/reng/Documents/ds_repos/Projects/Health_Insurance_Cross_Sell/data/raw/data.csv')

# All Data - Windows
df = pd.read_csv('C:/Users/perot/Documents/ds_repos/projects/Health_Insurance_Cross_Sell/data/raw/data.csv') 

### 1.1 - SPLIT DATASET INTO TRAINING, TEST AND VALIDATION

In [None]:
# Applying train test split to create Train, Validation and Test datasets

X = df.drop( 'response', axis=1 )
y = df['response'].copy()
x_train, x_valid, y_train, y_valid = ms.train_test_split( X, y, test_size=0.2 )
x_train, x_test, y_train, y_test = ms.train_test_split( x_train, y_train, test_size=0.2, shuffle=True, random_state=None )

print(f" x_train: {x_train.shape}\n y_train: {y_train.shape}\n x_valid: {x_valid.shape}\n y_valid: {y_valid.shape}\n x_test: {x_test.shape}\n y_test: {y_test.shape}\n")

In [None]:
df_train_balanced = pd.concat([x_train,y_train], axis=1)

In [None]:
df_train_balanced.head()

In [None]:
df_train_balanced['response'].value_counts()

##### 1.1.1 - Balancing Datasets according to majority class qty

In [None]:
#create two different dataframe of majority and minority class 
df_majority = df_train_balanced[(df_train_balanced['response']==0)] 
df_minority = df_train_balanced[(df_train_balanced['response']==1)] 

# upsample minority class
df_minority_upsampled = resample(df_minority, 
                                 replace=True,    # sample with replacement
                                 n_samples= 214091, # to match majority class
                                 random_state=0)  # reproducible results

# Combine majority class with upsampled minority class
df_upsampled = pd.concat([df_minority_upsampled, df_majority])

In [None]:
df_upsampled['response'].value_counts()

In [None]:
df_upsampled.dtypes

In [None]:
df_upsampled['id'] = df_upsampled['id'].astype('category')
df_upsampled['gender'] = df_upsampled['gender'].astype('category')
df_upsampled['region_code'] = df_upsampled['region_code'].astype('category')
df_upsampled['policy_sales_channel'] = df_upsampled['policy_sales_channel'].astype('category')
df_upsampled['vehicle_age'] = df_upsampled['vehicle_age'].astype('category')
df_upsampled['vehicle_damage'] = df_upsampled['vehicle_damage'].astype('category')
df_upsampled['driving_license'] = df_upsampled['driving_license'].astype('category')
df_upsampled['previously_insured'] = df_upsampled['previously_insured'].astype('category')
df_upsampled['age'] = df_upsampled['age'].astype('float')
df_upsampled['vintage'] = df_upsampled['vintage'].astype('float')
df_upsampled['annual_premium'] = df_upsampled['annual_premium'].astype('float')
df_upsampled['response'] = df_upsampled['response'].astype('category')

In [None]:
x_training = df_upsampled.drop('response', axis = 1)
y_training = df_upsampled['response'].copy()

In [None]:
#Remove Outlier using Z-Score Method
import scipy.stats as stats
import numpy       as np
z = np.abs(stats.zscore(df_upsampled))
data_clean = df_upsampled[(z<3).all(axis = 1)] 
data_clean.shape

### 1.2 - DATA DESCRIPTION

In [None]:
#df1 = pd.concat([x_train,y_train], axis=1)

In [None]:
#prof = ProfileReport(df1)
#prof.to_file(output_file='eda_x_training.html')

In [None]:
#HTML(filename='eda_x_training.html')

## 2 - FEATURE ENGINEERING

In [None]:
df2 = x_train.copy()

In [None]:
df2['id'] = df2['id'].astype('category')
df2['gender'] = df2['gender'].astype('category')
df2['region_code'] = df2['region_code'].astype('category')
df2['policy_sales_channel'] = df2['policy_sales_channel'].astype('category')
df2['vehicle_age'] = df2['vehicle_age'].astype('category')
df2['vehicle_damage'] = df2['vehicle_damage'].astype('category')
df2['driving_license'] = df2['driving_license'].astype('category')
df2['previously_insured'] = df2['previously_insured'].astype('category')

In [None]:
x_valid['id'] = x_valid['id'].astype('category')
x_valid['gender'] = x_valid['gender'].astype('category')
x_valid['region_code'] = x_valid['region_code'].astype('category')
x_valid['policy_sales_channel'] = x_valid['policy_sales_channel'].astype('category')
x_valid['vehicle_age'] = x_valid['vehicle_age'].astype('category')
x_valid['vehicle_damage'] = x_valid['vehicle_damage'].astype('category')
x_valid['driving_license'] = x_valid['driving_license'].astype('category')
x_valid['previously_insured'] = x_valid['previously_insured'].astype('category')

In [None]:
x_validation = x_valid
y_validation = y_valid

## 7 - ML MODELS TRAINING

In [None]:
# Parameter "scale_pos_weight" definition

from collections import Counter

# ESTIMATE SCALE_POS_WEIGHT
counter = Counter(y_train)
estimate = counter[0]/counter[1]
print('Estimate: %.3f' % estimate)

#### 7.1.1 - LGBM Baseline Model

In [None]:
y_training.shape

In [None]:
# Model Definition
lgbm_base_model = lgbm.LGBMClassifier()

# Model training
lgbm_baseline = lgbm_base_model.fit( x_training, y_training )

# Model Prediction
yhat_lgbm_baseline = lgbm_baseline.predict_proba( x_validation )

In [None]:
fig, axs = plt.subplots(ncols= 3, figsize = (18,5))

# cumulative gain - Metric for sorting problem
skplt.metrics.plot_cumulative_gain(y_validation, yhat_lgbm_baseline, ax=axs[0],title='Cumulative Gain - LGBM');

# Lift Curve
skplt.metrics.plot_lift_curve(y_validation, yhat_lgbm_baseline,ax=axs[1],title='Lift Curve - LGBM');

# Roc Curve
skplt.metrics.plot_roc(y_validation, yhat_lgbm_baseline, ax=axs[2], title='ROC-Curve - LGBM');

plt.tight_layout()

In [None]:
# Copy Data
perf_valid = x_validation.copy()
perf_valid['response'] = y_validation.copy()

# Propensity score
perf_valid['score'] = yhat_lgbm_baseline[:,1].tolist()

# sorted clients by score
perf_valid = perf_valid.sort_values( 'score', ascending=False )

# Compute precision at K
precision_atK = precision_at_k(perf_valid, k=20000 )

# Compute precision at K
recall_atK = recall_at_k(perf_valid, k=20000 )

lgbm_baseline_performance = ml_performance('LGBM Baseline Model', precision_atK, recall_atK )
lgbm_baseline_performance

#### 7.1.2 - CatBoost Baseline Model

In [None]:
cat_features=['id',
              'gender',
              'region_code',
              'policy_sales_channel',
              'driving_license',
              'vehicle_age',
              'vehicle_damage',
              'previously_insured'   
]

In [None]:
# Model Definition
cb_base_model = CatBoostClassifier(task_type='GPU')

# Model training
cb_baseline = cb_base_model.fit( x_training, y_training, cat_features)

# Model Prediction
yhat_cb_baseline = cb_baseline.predict_proba( x_validation )

In [None]:
fig, axs = plt.subplots(ncols= 3, figsize = (18,5))

# cumulative gain - Metric for sorting problem
skplt.metrics.plot_cumulative_gain(y_validation, yhat_cb_baseline, ax=axs[0],title='Cumulative Gain - LGBM');

# Lift Curve
skplt.metrics.plot_lift_curve(y_validation, yhat_cb_baseline, ax=axs[1],title='Lift Curve - LGBM');

# Roc Curve
skplt.metrics.plot_roc(y_validation, yhat_cb_baseline, ax=axs[2], title='ROC-Curve - LGBM');

plt.tight_layout()

In [None]:
# Copy Data
perf_valid = x_validation.copy()
perf_valid['response'] = y_validation.copy()

# Propensity score
perf_valid['score'] = yhat_cb_baseline[:,1].tolist()

# sorted clients by score
perf_valid = perf_valid.sort_values( 'score', ascending=False )

# Compute precision at K
precision_atK = precision_at_k(perf_valid, k=20000 )

# Compute precision at K
recall_atK = recall_at_k(perf_valid, k=20000 )

cb_baseline_performance = ml_performance('CatBoost Baseline Model', precision_atK, recall_atK )
cb_baseline_performance

#### XGB Classifier Baseline Model

In [None]:
# Model Definition
xgb_base_model = XGBClassifier(enable_categorical=True, tree_method='gpu_hist')

# Model training
xgb_baseline = xgb_base_model.fit( x_training, y_training )

# Model Prediction
yhat_xgb_baseline = xgb_baseline.predict_proba( x_validation )

In [None]:
fig, axs = plt.subplots(ncols= 3, figsize = (18,5))

# cumulative gain - Metric for sorting problem
skplt.metrics.plot_cumulative_gain(y_validation, yhat_xgb_baseline, ax=axs[0],title='Cumulative Gain - LGBM');

# Lift Curve
skplt.metrics.plot_lift_curve(y_validation, yhat_xgb_baseline, ax=axs[1],title='Lift Curve - LGBM');

# Roc Curve
skplt.metrics.plot_roc(y_validation, yhat_xgb_baseline, ax=axs[2], title='ROC-Curve - LGBM');

plt.tight_layout()

In [None]:
# Copy Data
perf_valid = x_validation.copy()
perf_valid['response'] = y_validation.copy()

# Propensity score
perf_valid['score'] = yhat_xgb_baseline[:,1].tolist()

# sorted clients by score
perf_valid = perf_valid.sort_values( 'score', ascending=False )

# Compute precision at K
precision_atK = precision_at_k(perf_valid, k=20000 )

# Compute precision at K
recall_atK = recall_at_k(perf_valid, k=20000 )

xgb_baseline_performance = ml_performance('XGB Baseline Model', precision_atK, recall_atK )
xgb_baseline_performance

In [None]:
# Baseline Performance for all Machine Leraning Algorithms
baseline_performance = pd.concat( [lgbm_baseline_performance, cb_baseline_performance, xgb_baseline_performance] )
baseline_performance.sort_values('Precision_at_K', ascending= False)

## 9 - HYPERPARAMETER FINE TUNING

### 9.1 - LGBM Fine Tunning

### 9.2 - CatBoost Fine Tunning

### 9.2 - XGBoost Fine Tunning

###  9.3 - Last model training with all datas

#### 9.3.2.1 - Data preparation applied on validation and test datasets.

In [None]:
x_test = data_preparation( x_test )

#### 9.3.2.2 - Union of training and validation data.

In [None]:
x_train_treino = pd.concat([ x_training, x_validation ] )
y_train_treino = pd.concat([ y_train, y_validation ] )

#### 9.3.2.3 - Generalization capacity of LGBM Model

In [None]:
# Model Definition
lgbm_model = lgbm.LGBMClassifier(**params )

# Model Training
model_lgbm = lgbm_model.fit( x_train_treino, y_train_treino )

In [None]:
# Model Prediction
yhat_lgbm = model_lgbm.predict_proba( x_test )

In [None]:
# Copy Data
df_test = x_test.copy()
df_test['response'] = y_test.copy()

# Propensity score
df_test['score'] = yhat_lgbm[:,1].tolist()

# sorted clients by score
df_test = df_test.sort_values( 'score', ascending=False )

# Compute precision at K
precision_atK = precision_at_k(df_test, k=20000 )

# Compute precision at K
recall_atK = recall_at_k(df_test, k=20000 )

lgbm_performance_cv = ml_performance('LGBM Model', precision_atK, recall_atK )
lgbm_performance_cv

In [None]:
yhat_lgbm_curve = yhat_lgbm[:,1]

In [None]:
precision, recall, thresholds = precision_recall_curve(y_test, yhat_lgbm_curve)

In [None]:
# plot curves
plt.plot(thresholds, precision[:-1], marker='.', label='Precision')
plt.plot(thresholds, recall[:-1], marker='.', label='Recall')
plt.xlabel('thresholds')
plt.ylabel('Precision, Recall')
plt.legend()
plt.show()

#### 9.3.2.4 - Generalization capacity of XGB Model

In [None]:
# XGB Model

# model definition
model_xgb_cv = XGBClassifier( scale_pos_weight=7.2,
                              colsample_bytree = 0.3,
                              subsample = 0.5,
                              objective='binary:logistic', 
                              n_estimators=1500, 
                              max_depth=3,
                              min_child_weight = 3,
                              eta= 0.03 )

# model training
model_xgb_cv.fit( x_training, y_train )

# model prediction - The generalization POWER
yhat_xgb_cv = model_xgb_cv.predict_proba( x_test )

In [None]:
# Copy Data
df_test = x_test.copy()
df_test['response'] = y_test.copy()

# Propensity score
df_test['score'] = yhat_xgb_cv[:,1].tolist()

# sorted clients by score
df_test = df_test.sort_values( 'score', ascending=False )

# Compute precision at K
precision_atK = precision_at_k(df_test, k=20000 )

# Compute precision at K
recall_atK = recall_at_k(df_test, k=20000 )

xgb_performance_cv = ml_performance('XGB Model', precision_atK, recall_atK )
xgb_performance_cv

In [None]:
fig, axs = plt.subplots(ncols= 3, figsize = (18,5))

# cumulative gain - Metric for sorting problem
skplt.metrics.plot_cumulative_gain(y_test, yhat_xgb_cv, ax=axs[0],title='Cumulative Gain - XGB');

# Lift Curve
skplt.metrics.plot_lift_curve(y_test, yhat_xgb_cv,ax=axs[1],title='Lift Curve - XGB');

# Roc Curve
skplt.metrics.plot_roc(y_test, yhat_xgb_cv, ax=axs[2], title='ROC-Curve - XGB');

plt.tight_layout()

In [None]:
yhat_xgb_curve = yhat_xgb_cv[:,1]

In [None]:
precision, recall, thresholds = precision_recall_curve( y_test, yhat_xgb_curve)

In [None]:
x_test.shape

In [None]:
thresholds

In [None]:
# plot curves
plt.plot(thresholds, precision[:-1], marker='.', label='Precision')
plt.plot(thresholds, recall[:-1], marker='.', label='Recall')
plt.xlabel('thresholds')
plt.ylabel('Precision, Recall')
plt.legend()
plt.show()

### 9.1 - Cross Validation

In [None]:
df_cc = pd.concat([x_train_treino, y_train_treino], axis=1)

In [None]:
# Dictioary with models instantiated
models = {'XGB Model': model_xgb_cv,
          'LGBM Model': model_lgbm }

# Cross-validated models performance
model_performance = pd.DataFrame()

for key in models.keys():
    performance_cv = cross_validation(5, key, models[key], df_cc, 20000)
    model_performance = pd.concat([model_performance, performance_cv], axis=1)

model_performance.T

#### 9.3.2.5 - Last training with all datas before deploying to production

In [None]:
# Union of training, validation and test datasets
df_last = df.copy()

# Split response variable into a new dataset
x_last_training = df_last.drop( 'response', axis=1 )
y_last_training = df_last['response'].copy()

# Applying data transformation
x_last_training = data_preparation( x_last_training )

In [None]:
# XGB Model Last Training

# model definition
model_xgb_lv= XGBClassifier( scale_pos_weight=7.2,
                              colsample_bytree = 0.3,
                              subsample = 0.5,
                              objective='binary:logistic', 
                              n_estimators=1500, 
                              max_depth=3,
                              min_child_weight = 3,
                              eta= 0.03 )

# model training
model_xgb_lv.fit( x_last_training, y_last_training )

In [None]:
pickle.dump(model_xgb_lv, open('C:/Users/perot/Documents/ds_repos/projects/Health_Insurance_Cross_Sell/src/models/xgb_model.pkl', 'wb'))

## 10 - DEPLOYING MODEL TO PRODUCTION

### 10.1 - HEALTHINSURANCE CLASS

In [None]:
import pickle
import inflection
import numpy  as np
import pandas as pd

class HealthInsurance:
    
    def __init__( self ):
        self.home_path                                = ''
        self.frequency_encode_policy_sales_scaler     = pickle.load( open( self.home_path + 'src/features/frequency_encode_policy_sales_scaler.pkl', 'rb') )
        self.frequency_encode_region_code_scaler      = pickle.load( open( self.home_path + 'src/features/frequency_encode_region_code_scaler.pkl', 'rb') )
        self.target_encode_gender_scaler              = pickle.load( open( self.home_path + 'src/features/target_encode_gender_scaler.pkl', 'rb') )
        self.age_scaler                               = pickle.load( open( self.home_path + 'src/features/age_scaler.pkl', 'rb') )
        self.vintage_scaler                           = pickle.load( open( self.home_path + 'src/features/vintage_scaler.pkl', 'rb') )
        self.annual_premium_scaler                    = pickle.load( open( self.home_path + 'src/features/annual_premium_scaler.pkl', 'rb') )

    
    def data_cleaning( self, df5 ):
 
        ## rename Columns
        cols_old = ['id', 'Gender', 'Age', 'Driving_License', 'Region_Code', 'Previously_Insured', 'Vehicle_Age', 'Vehicle_Damage', 'Annual_Premium', 'Policy_Sales_Channel', 'Vintage']

        snakecase = lambda x: inflection.underscore( x )
        cols_new = list( map( snakecase, cols_old ) )
        
        # rename
        df5.columns = cols_new
        
        return( df5 )

    def feature_engineering( self, df5 ):
        
        # Changing data types
        df5['region_code'] = df5['region_code'].astype(object)
        df5['policy_sales_channel'] = df5['policy_sales_channel'].astype(object)
        
        # Fitting vehicle_age column
        df5['vehicle_age'] = df5['vehicle_age'].apply(lambda x: 1 if (x == '< 1 Year') else 2 if (x == '1-2 Year') else 3)

        # Fitting vehicle_damage column
        df5['vehicle_damage'] = df5['vehicle_damage'].apply(lambda x: 1 if (x == 'Yes') else 0)
              
        return( df5 )
    
    def data_preparation( self, df5 ):
              
        # STANDARDIZATION

        # annual premium
        df5['annual_premium'] = self.annual_premium_scaler.transform( df5[['annual_premium']].values )


        # REESCALING

        # age
        df5['age'] = self.age_scaler.transform( df5[['age']].values )

        # vintage
        df5['vintage'] = self.vintage_scaler.transform( df5[['vintage']].values )



        # ENCODER

        # policy sales channel
        df5.loc[:, 'policy_sales_channel'] = df5['policy_sales_channel'].map( self.frequency_encode_policy_sales_scaler )

        # region code
        df5.loc[:, 'region_code'] = df5['region_code'].map( self.frequency_encode_region_code_scaler )

        # gender
        df5.loc[:, 'gender'] = df5['gender'].map( self.target_encode_gender_scaler )

        # FILL NAN VALUES
        df5 = df5.fillna(0)
        
        # Feature Selection
        cols_selected = ['vintage', 'annual_premium','age','region_code','vehicle_damage','policy_sales_channel','previously_insured','vehicle_age']
        
        return df5[cols_selected]
    

    def get_prediction( self, model, original_data, test_data ):
        # prediction
        pred = model.predict_proba( test_data )
        
        # join pred into the original data
        original_data['score'] = pred[:, 1].tolist()
        
        return original_data.to_json( orient='records', date_format='iso' )

### 10.2 - API HANDLER

In [None]:
import pickle
import pandas as pd
import os
from flask                           import Flask, request, Response
from healthinsurance.healthinsurance import HealthInsurance

# logading model
model = pickle.load( open( 'src/models/xgb_m odel.pkl', 'rb' ) )
                          
# initialize API
app = Flask( __name__ )

@app.route( '/healthinsurance/predict', methods=['POST'] )
def health_insurance_predict():
    test_json = request.get_json()
    
    if test_json: #there is data
               
        if isinstance( test_json, dict ): # unique example
            test_raw = pd.DataFrame( test_json, index=[0] )
    
        else:
            test_raw = pd.DataFrame( test_json, columns=test_json[0].keys() ) # multiple examples
            
        # Instantiate Rossmann Class
        pipeline = HealthInsurance()

        # data cleaning
        df1 = pipeline.data_cleaning( test_raw )
              
        # feature engineering
        df2 = pipeline.feature_engineering( df1 )

        # Data Preparation
        df3 = pipeline.data_preparation( df2 )
                              
        # prediction
        df_response = pipeline.get_prediction( model, test_raw, df3 )
        
        return df_response
        
    else:
        return Response( '{}', status=200, mimetype='application/json' )

if __name__ == '__main__':
    app.run('0.0.0.0')

#    port = os.environ.get('PORT', 5000)
#    app.run( host='0.0.0.0', port=port )

### 10.3 - API Tester

In [None]:
import requests
import pandas as pd

In [None]:
# Load Datasets
df_test = pd.read_csv('C:/Users/perot/Documents/ds_repos/projects/Health_Insurance_Cross_Sell/data/raw/test.csv')

In [None]:
df_test = df_test.sample(10)

In [None]:
df_test.head()

In [None]:
# convert dataframe to json
data = json.dumps( df_test.to_dict( orient='records' ) )

In [None]:
# API Call
#url = 'https://healthinsurance-api.onrender.com/healthinsurance/predict'
url = 'http://127.0.0.1:5000/healthinsurance/predict'
header = {'Content-type': 'application/json' }

r = requests.post( url, data=data, headers=header )
print( 'Status Code {}'.format( r.status_code ) )

In [None]:
d2 = pd.DataFrame( r.json(), columns=r.json()[0].keys() )