# Reports
1. Inputs: data raw from kaggle
2. Outputs: predictions
3. Teste comparativo entre o data raw processado através do request em relação ao processamento pelo projeto

# 1.0 Imports

###### 1.1 Libraries

In [1]:
import pandas as pd
import numpy as np

from sklearn.metrics import classification_report, roc_auc_score, accuracy_score

import requests

import pickle

import time
import warnings

# Tempo de processamento
time_agora = time.time()

###### 1.2 Colecting data

In [2]:
# raw data
train = pd.read_csv(r'D:\\My Drive\\Pessoal\\Projetos\\insurance_sales_predict\\insurance_sales_predict\\train.csv')
test = pd.read_csv(r'D:\\My Drive\\Pessoal\\Projetos\\insurance_sales_predict\\insurance_sales_predict\\test.csv')
df_raw = pd.concat([train, test])

# # arrays
X = pickle.load(open('D:\\My Drive\\Pessoal\\Projetos\\insurance_sales_predict\\insurance_sales_predict\\exportings\\X_to_modeling.pkl', 'rb'))
y = pickle.load(open('D:\\My Drive\\Pessoal\\Projetos\\insurance_sales_predict\\insurance_sales_predict\\exportings\\y_to_modeling.pkl', 'rb'))
X_train = X[2]
y_train = y[6]

# model
lr_model = pickle.load(open('D:\\My Drive\\Pessoal\\Projetos\\insurance_sales_predict\\webapp\\model\\model.pkl', 'rb'))

# columns
model_columns = pickle.load(open('D:\\My Drive\\Pessoal\\Projetos\\insurance_sales_predict\\insurance_sales_predict\\exportings\\model_columns.pkl', 'rb'))

# 2.0 Predictions

##### 2.1 Request

In [3]:
# # apenas um cliente
# request_1 = df_raw.sample(1)
# request_1_json = request_1.to_json(orient='records')
# received = requests.post(url='http://192.168.0.11:5000/predict', 
#                          data=request_1_json, 
#                          headers={'Content-type':'application/json'})
# received.status_code

# mais de um cliente
train_json = train.sample(500).to_json(orient='records')

url = 'https://insurance-sales-predict.herokuapp.com/predict' # heroku host
# url = 'http://192.168.0.13:5000/predict' # local host

received = requests.post(url=url, 
                         data=train_json, 
                         headers={'Content-type':'application/json'})
received.status_code

200

##### 2.2 Received

In [4]:
df_predict_request = pd.DataFrame(received.json(), columns=received.json()[0].keys())
df_predict_request.head(5)

Unnamed: 0,Age,Previously_Insured,Gender_,Vehicle_Damage_,Policy_Sales_Channel_Response_Mean,Region_Code_Response_Mean,High_Response_Age,Log_Annual_Premium,Vehicle_Age_1,Vehicle_Age_2,Response,predictions,probability
0,-0.762588,1.086689,0.921495,-1.008517,-1.08823,-0.017348,-1.208024,0.029685,1.147338,-0.210045,0,0.0,0.002398
1,0.269626,-0.920227,0.921495,0.991555,1.428362,-1.116765,0.827798,0.324885,-0.871583,-0.210045,0,1.0,0.792271
2,-0.440021,-0.920227,-1.085193,0.991555,-1.08823,-1.034305,0.827798,0.128925,1.147338,-0.210045,0,1.0,0.602046
3,0.463166,-0.920227,0.921495,0.991555,0.889119,-0.21644,0.827798,-2.135897,-0.871583,-0.210045,0,1.0,0.727011
4,-1.149669,-0.920227,-1.085193,0.991555,-1.08823,-1.378621,-1.208024,0.088086,1.147338,-0.210045,0,0.0,0.452836


##### 2.3 Through request

In [5]:
print(classification_report(df_predict_request['Response'], df_predict_request['predictions'], zero_division=0))

              precision    recall  f1-score   support

           0       0.97      0.67      0.79       437
           1       0.27      0.84      0.41        63

    accuracy                           0.69       500
   macro avg       0.62      0.76      0.60       500
weighted avg       0.88      0.69      0.75       500



##### 2.4 Through projetct

In [6]:
pred = lr_model.predict(X_train)

In [7]:
proba = lr_model.predict_proba(X_train)[:,1]

In [8]:
df_predict_project = pd.DataFrame(X_train, columns=model_columns)

In [9]:
df_predict_project['predictions'] = pred
df_predict_project['probability'] = proba
df_predict_project['Response'] = y_train

In [10]:
df_predict_project.sort_index(inplace=True)

In [11]:
print(classification_report(df_predict_project['Response'], df_predict_project['predictions'], zero_division=0))

              precision    recall  f1-score   support

         0.0       0.98      0.68      0.80    334399
         1.0       0.28      0.91      0.43     46710

    accuracy                           0.71    381109
   macro avg       0.63      0.79      0.62    381109
weighted avg       0.90      0.71      0.76    381109



##### 2.5 Testando processamento por Request versus Project

In [12]:
np.sum(np.abs(df_predict_project - df_predict_request))

Age                                   599.071346
Gender_                               539.799094
High_Response_Age                     533.385411
Log_Annual_Premium                    494.567601
Policy_Sales_Channel_Response_Mean    522.257705
Previously_Insured                    479.652800
Region_Code_Response_Mean             554.029193
Response                              103.000000
Vehicle_Age_1                         522.900447
Vehicle_Age_2                         233.633593
Vehicle_Damage_                       496.017839
predictions                           245.000000
probability                           185.399231
dtype: float64

# 3.0 Exportings

###### Dataframe with predictions and probabilities

In [13]:
pickle.dump(df_predict_request, open('D:\\My Drive\\Pessoal\\Projetos\\insurance_sales_predict\\insurance_sales_predict\\exportings\\df_predict_request.pkl', 'wb'))

# Time

In [14]:
print(f'O tempo de processamento do projeto foi de: {int(round(time.time()-time_agora, 2)/60)} minutos')

O tempo de processamento do projeto foi de: 0 minutos
