# Predict de modelos entrenados en Data Test

In [282]:
import pandas as pd
import numpy as np

import missingno as msno
import matplotlib.pyplot as plt
import plotly.express as px

from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split 
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
from sklearn.decomposition import TruncatedSVD

from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import OneHotEncoder
import datetime
from dateutil.parser import parse
from sklearn.tree import  DecisionTreeClassifier
import plotly.graph_objects as go
import umap
from imblearn.under_sampling import RandomUnderSampler
from imblearn.over_sampling import RandomOverSampler
import xgboost as xgb
from sklearn.naive_bayes import MultinomialNB
from sklearn.model_selection import GridSearchCV
import ml_metrics
import pickle
from sklearn.metrics import accuracy_score
import json
from tensorflow import keras
import warnings
warnings.filterwarnings("ignore")

In [283]:
Data=pd.read_csv("consolidation_data_test06June2022.csv").drop(columns=["Unnamed: 0"]).fillna("")
Data.head()

Unnamed: 0,Duracion_Campaña,B,C,lec_B-B,lec_E-E,lec_D-E,lec_C-D,lec_A-A,num_comunicaciones,Length,...,Sexo,Renta,Recibe_sueldo_en_cuenta,Segmento_consumidor,Meses_antiguedad,Comuna,Ciudad,Estado_civil,Principalidad,Profesion
0,1.0,1.0,2.0,3.0,0.0,0.0,0.0,0.0,5.0,2,...,1,R1,0,A,Mayor a 10 años,331.0,13.0,D,B,P164
1,1.166667,2.0,4.0,2.0,0.0,0.0,2.0,0.0,4.0,2,...,1,R9,0,B,Mayor a 10 años,37.0,5.0,D,E,P164
2,1.0,2.0,0.0,1.0,0.0,0.0,0.0,0.0,2.0,2,...,1,R9,1,B,Mayor a 10 años,19.0,4.0,B,D,P114
3,1.333333,1.0,2.0,0.0,0.0,0.0,1.0,0.0,2.0,2,...,1,R10,0,B,Mayor a 10 años,37.0,5.0,B,C,P164
4,1.0,1.0,1.0,3.0,0.0,0.0,0.0,0.0,3.0,2,...,1,R4,0,A,Mayor a 10 años,91.0,13.0,D,B,P85


In [284]:
# datos categorical
categorical=list(Data.select_dtypes(include=["category","object"]).columns)+['Sexo','Recibe_sueldo_en_cuenta']


In [285]:
for i in categorical:
    Data[i]=Data[i].astype(str)

In [287]:
id=Data.index
id

RangeIndex(start=0, stop=79539, step=1)

# Model 1: Naive Bayes

In [288]:
# Leer modelo:
pipe_naive1= pickle.load(open('pipe_naive.pkl', 'rb'))

In [289]:
# Generación de predicciones
Ypred = pipe_naive1.predict(Data)
Ypred

array(['A-A B-B', 'D-E C-D', 'E-E B-B', ..., 'E-E C-D', 'B-B A-A',
       'C-D E-E'], dtype='<U11')

In [290]:
pd.DataFrame(data={"id":list(id+1),"productos":Ypred}).to_csv("predict_naive.csv",index=False)

# Model 2: Decision Tree

In [295]:
# Leer modelo:
pipe_tree= pickle.load(open('pipe_tree.pkl', 'rb'))

In [296]:
# Generación de predicciones
Ypred = pipe_tree.predict(Data)
Ypred

array(['', 'C-D D-E', 'E-E', ..., '', '', 'C-D E-E'], dtype=object)

In [297]:
pd.DataFrame(data={"id":list(id+1),"productos":Ypred}).to_csv("predict_decision_tree.csv",index=False)

# Model 3: Random Forest

In [298]:
# Leer modelo:
pipe_random_forest1= pickle.load(open('pipe_random_forest1.pkl', 'rb'))

In [299]:
# Generación de predicciones
Ypred = pipe_random_forest1.predict(Data)
Ypred

array(['', 'C-D', '', ..., '', '', 'C-D'], dtype=object)

In [300]:
pd.DataFrame(data={"id":list(id+1),"productos":Ypred}).to_csv("predict_random_forest.csv",index=False)

# Model 4: XGBoost

In [305]:
tf = open("myDictionary.json", "r")
dic_target = json.load(tf)

In [306]:
# Load
model_xgb1 = xgb.XGBClassifier()
model_xgb1.load_model("model_xgb1.bin")

In [307]:
# Leer pipeline:
pipe_xgb1= pickle.load(open('pipe_xgb1.pkl', 'rb'))

In [308]:
inv_map = {v: k for k, v in dic_target.items()}

In [309]:
Ypred=pd.DataFrame(model_xgb1.predict(pipe_xgb1.transform(Data))).replace(dic_target)[0]
Ypred

0               
1        C-D D-E
2               
3            E-E
4               
          ...   
79534    C-D D-E
79535           
79536        A-A
79537           
79538    C-D E-E
Name: 0, Length: 79539, dtype: object

In [311]:
pd.DataFrame(data={"id":list(id+1),"productos":Ypred.values}).to_csv("predict_xgboost.csv",index=False)

# Model 4: Multilayer Perceptron fully Connected

In [312]:
# Leer pipeline:
pipe_rn= pickle.load(open('pipe_rn.pkl', 'rb'))

In [313]:
# Leer enc:
enc= pickle.load(open('encoder.pkl', 'rb'))

In [314]:
# Leer modelo
model1 = keras.models.load_model('my_model1.h5')

model1.summary()

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_11 (Dense)            (None, 4)                 2344      
                                                                 
 dense_12 (Dense)            (None, 35)                175       
                                                                 
Total params: 2,519
Trainable params: 2,519
Non-trainable params: 0
_________________________________________________________________


In [315]:
dic={}
to_enc_inv=list(enc.categories_[0])
for i in range(len(to_enc_inv)):
    dic[i]=to_enc_inv[i]

In [316]:
inv_map = {v: k for k, v in dic.items()}

In [317]:
# Preprocesamiento
X_test1=pipe_rn.transform(Data)


In [318]:
  ### Genero la predicción con el modelo
y_pred = model1.predict(X_test1)
y_pred



array([[8.50595713e-01, 2.65357587e-02, 1.55446015e-03, ...,
        1.46976192e-04, 1.52014056e-03, 8.38084306e-05],
       [7.31176585e-02, 3.17908600e-02, 1.67913344e-02, ...,
        1.08341025e-02, 4.52717096e-02, 1.61966123e-02],
       [7.02647030e-01, 1.95384119e-02, 1.25323993e-03, ...,
        6.72401686e-04, 9.26389918e-03, 5.13164792e-04],
       ...,
       [6.70349300e-01, 4.21694554e-02, 5.07258531e-03, ...,
        1.60385564e-03, 8.39588419e-03, 8.37014522e-04],
       [7.82100499e-01, 4.76850234e-02, 3.62942857e-03, ...,
        2.52192462e-04, 1.87765516e-03, 1.48071907e-04],
       [1.22104368e-07, 3.98586280e-06, 2.00860050e-05, ...,
        2.60714829e-01, 1.15935165e-06, 1.49747284e-05]], dtype=float32)

In [319]:
y_pred_1col = np.argmax(y_pred, axis=1, out=None) 

In [320]:
y_pred_1col = pd.DataFrame(y_pred_1col, columns= ['Predict']).replace(dic)

In [324]:
pd.DataFrame(data={"id":list(id+1),"productos":y_pred_1col["Predict"]}).to_csv("predict_red_neuronal.csv",index=False)