In [31]:
# Tratamiento de datos
# ==============================================================================
import numpy as np
import pandas as pd
import statsmodels.api as sm
import math

# Preprocesado y modelado
# ==============================================================================
from sklearn.datasets import load_boston
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import plot_confusion_matrix
from sklearn.metrics import classification_report
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
from sklearn.model_selection import RepeatedKFold
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import ParameterGrid
from sklearn.inspection import permutation_importance
import multiprocessing

from sklearn.metrics import log_loss
from sklearn.impute import SimpleImputer

from sklearn.neural_network import MLPClassifier
from sklearn.neural_network import MLPRegressor

import pickle

# Configuración warnings
# ==============================================================================
import warnings
warnings.filterwarnings('ignore')

In [32]:
df = pd.read_csv( "/home/bautista/Datos/Machine-Learning-Datos/FeatureEngineering/df_time_series.csv" )
df = df.drop(columns = {'Unnamed: 0', 'Delivery_Year', 'Delivery_Quarter'})
df

Unnamed: 0,Region,Total_Amount,Años en entregar,Month Created,Product_Name,Opportunity_Owner,Delivery_Year,Delivery_Quarter,"Pricing, Delivery_Terms_Approved","Pricing, Delivery_Terms_Quote_Appr",Decision,Promedio_Region_Por_Quarter,Promedio_Owner_Por_Year_And_Quarter,Lag_1,Delta
0,EMEA,2.773332,0,2,Other,Other,2016,Q1,0,1,0,2.496126,2.585640,,3.088401
1,EMEA,2.740425,0,2,Product_Name_210,Other,2017,Q2,1,1,0,2.486798,2.545803,2.585640,2.098247
2,EMEA,2.999379,0,3,Product_Name_432,Other,2017,Q3,1,1,0,2.499777,2.852610,2.545803,2.583974
3,APAC,2.780462,0,7,Other,Other,2018,Q4,0,0,0,2.448872,2.594236,2.852610,1.552569
4,APAC,2.647444,0,2,Product_Name_91,Person_Name_11,2016,Q1,1,1,1,2.467621,2.468684,,3.019307
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13578,Americas,2.845924,1,6,Product_Name_488,Person_Name_8,2019,Q3,1,1,1,2.544620,2.706302,2.740744,2.097463
13579,Americas,2.819796,1,6,Product_Name_483,Person_Name_8,2019,Q3,1,1,1,2.544620,2.706302,2.740744,2.097463
13580,Americas,2.845924,1,6,Product_Name_488,Person_Name_8,2019,Q3,1,1,1,2.544620,2.706302,2.740744,2.097463
13581,Americas,3.032890,1,10,Product_Name_445,Person_Name_8,2019,Q3,1,1,0,2.544620,2.706302,2.740744,2.097463


In [33]:
df = df.fillna(0)

In [34]:
df.describe()

Unnamed: 0,Total_Amount,Años en entregar,Month Created,Delivery_Year,"Pricing, Delivery_Terms_Approved","Pricing, Delivery_Terms_Quote_Appr",Decision,Promedio_Region_Por_Quarter,Promedio_Owner_Por_Year_And_Quarter,Lag_1,Delta
count,13583.0,13583.0,13583.0,13583.0,13583.0,13583.0,13583.0,13583.0,13583.0,13583.0,13583.0
mean,2.65108,0.255025,6.574321,2017.213208,0.569388,0.77332,0.554001,2.449497,2.444527,2.339767,2.183309
std,0.17725,0.466251,3.421165,0.800279,0.49518,0.4187,0.497094,0.098289,0.142587,0.531066,0.214954
min,0.0,-1.0,1.0,2016.0,0.0,0.0,0.0,2.219173,1.336753,0.0,0.0
25%,2.588674,0.0,4.0,2017.0,0.0,1.0,0.0,2.437621,2.401642,2.389793,2.088779
50%,2.656112,0.0,6.0,2017.0,1.0,1.0,1.0,2.491107,2.465992,2.466176,2.159336
75%,2.753412,0.0,10.0,2018.0,1.0,1.0,1.0,2.499777,2.54416,2.546746,2.217261
max,3.104954,5.0,12.0,2019.0,1.0,1.0,1.0,2.553549,2.9739,2.85261,3.240224


# Encode

In [35]:
Mean_encoded_subject_region = df.groupby(['Region'])['Decision'].mean().to_dict() 

df['Region'] =  df['Region'].map(Mean_encoded_subject_region)

#--------------------------------------------------------------------------------------------

mean_encoded_product = df.groupby("Product_Name")["Decision"].mean().to_dict()

df["Product_Name"] = df["Product_Name"].map(mean_encoded_product)

#--------------------------------------------------------------------------------------------

#Mean_encoded_subject_quarter = df.groupby(['Delivery_Quarter'])['Decision'].mean().to_dict() 

#df['Delivery_Quarter'] =  df['Delivery_Quarter'].map(Mean_encoded_subject_quarter)

#--------------------------------------------------------------------------------------------

Mean_encoded_subject_owner = df.groupby(['Opportunity_Owner'])['Decision'].mean().to_dict() 

df['Opportunity_Owner'] =  df['Opportunity_Owner'].map(Mean_encoded_subject_owner)

# Split

In [36]:
X_test = df.tail(math.floor(df.shape[0]*0.20)).drop(columns = {'Decision'})
y_test = df.tail(math.floor(df.shape[0]*0.20))['Decision']
X_train = df.head(round(df.shape[0]*0.80)).drop(columns = {'Decision'})
y_train = df.head(round(df.shape[0]*0.80))['Decision']
X_train

Unnamed: 0,Region,Total_Amount,Años en entregar,Month Created,Product_Name,Opportunity_Owner,Delivery_Year,Delivery_Quarter,"Pricing, Delivery_Terms_Approved","Pricing, Delivery_Terms_Quote_Appr",Promedio_Region_Por_Quarter,Promedio_Owner_Por_Year_And_Quarter,Lag_1,Delta
0,0.532255,2.773332,0,2,0.445844,0.000000,2016,0.536141,0,1,2.496126,2.585640,0.000000,3.088401
1,0.532255,2.740425,0,2,0.283582,0.000000,2017,0.554948,1,1,2.486798,2.545803,2.585640,2.098247
2,0.532255,2.999379,0,3,0.009174,0.000000,2017,0.542319,1,1,2.499777,2.852610,2.545803,2.583974
3,0.587888,2.780462,0,7,0.445844,0.000000,2018,0.581007,0,0,2.448872,2.594236,2.852610,1.552569
4,0.587888,2.647444,0,2,0.900000,0.371429,2016,0.536141,1,1,2.467621,2.468684,0.000000,3.019307
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10861,0.587888,2.817755,0,9,0.055556,0.252427,2016,0.581007,1,1,2.448872,2.594527,2.441786,2.357726
10862,0.587888,2.793234,0,10,0.333333,0.252427,2016,0.581007,1,1,2.448872,2.594527,2.441786,2.357726
10863,0.587888,2.541749,0,10,0.722798,0.252427,2016,0.581007,1,1,2.448872,2.594527,2.441786,2.357726
10864,0.587888,2.772403,0,10,0.333333,0.252427,2016,0.581007,1,1,2.448872,2.594527,2.441786,2.357726


# Regresion

In [63]:
import sklearn as sk
import sklearn.neural_network


lr = 0.001           # learning rate
nn = [2, 16, 8, 1]  # número de neuronas por capa.

# Creamos el objeto del modelo de red neuronal multicapa.
modelRegresor = sk.neural_network.MLPRegressor(solver='adam', 
                                     activation = 'logistic',
                                     learning_rate_init=lr,
                                     hidden_layer_sizes=tuple(nn[1:]),
                                     verbose=True,
                                     n_iter_no_change=1000,
                                     batch_size = 64)

In [66]:
modelRegresor.fit(X_train, y_train)

Iteration 1, loss = 0.13607770
Iteration 2, loss = 0.12426167
Iteration 3, loss = 0.12420985
Iteration 4, loss = 0.12423593
Iteration 5, loss = 0.12421702
Iteration 6, loss = 0.12421190
Iteration 7, loss = 0.12421587
Iteration 8, loss = 0.12424215
Iteration 9, loss = 0.12422549
Iteration 10, loss = 0.12422571
Iteration 11, loss = 0.12425857
Iteration 12, loss = 0.12421060
Iteration 13, loss = 0.12422386
Iteration 14, loss = 0.12423375
Iteration 15, loss = 0.12427685
Iteration 16, loss = 0.12420602
Iteration 17, loss = 0.12424155
Iteration 18, loss = 0.12423847
Iteration 19, loss = 0.12423995
Iteration 20, loss = 0.12417292
Iteration 21, loss = 0.12424158


MLPRegressor(activation='logistic', batch_size=64,
             hidden_layer_sizes=(16, 8, 1), n_iter_no_change=1000,
             verbose=True)

In [39]:
LinReg_pred = modelRegresor.predict(X_test)

In [40]:
probando = pd.DataFrame()
probando['target'] = LinReg_pred
probando.describe()

Unnamed: 0,target
count,2716.0
mean,0.5590661
std,1.203155e-11
min,0.5590661
25%,0.5590661
50%,0.5590661
75%,0.5590661
max,0.5590661


In [41]:
modelRegresor.score(X_test, y_test)

-0.010150969073585703

In [42]:
loss = log_loss(y_test  , LinReg_pred)
print(f"Log loss is {loss}")

Log loss is 0.6744789382063657


In [43]:
#pickle.dump(modelRegresor, open('modeloRegresorRN.p', 'wb'))

# Kaggle

In [44]:
DataFrame_test = pd.read_csv( "/home/bautista/Datos/Machine-Learning-Datos/FeatureEngineering/df_time_series_test.csv" )
DataFrame_test

Unnamed: 0.1,Unnamed: 0,Opportunity_ID,Region,Total_Amount,Años en entregar,Month Created,Product_Name,Opportunity_Owner,Delivery_Year,Delivery_Quarter,"Pricing, Delivery_Terms_Approved","Pricing, Delivery_Terms_Quote_Appr",Promedio_Region_Por_Quarter,Promedio_Owner_Por_Year_And_Quarter,Lag_1,Delta
0,171,10792,EMEA,12.087389,0,1,Product_Name_201,Person_Name_13,2019,Q1,1,1,11.753909,11.001991,,11.000704
1,226,10806,EMEA,10.325097,0,1,Product_Name_241,Person_Name_13,2019,Q1,0,1,11.753909,11.001991,,11.000704
2,238,10812,EMEA,11.479095,0,1,Product_Name_303,Person_Name_13,2019,Q1,1,1,11.753909,11.001991,,11.000704
3,270,10836,EMEA,10.155219,0,1,Product_Name_182,Person_Name_13,2019,Q1,1,1,11.753909,11.001991,,11.000704
4,282,10845,EMEA,9.975017,0,1,Product_Name_283,Person_Name_13,2019,Q1,1,1,11.753909,11.001991,,11.000704
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1562,2519,12347,Americas,15.340732,1,4,Product_Name_487,Person_Name_9,2020,Q2,1,1,13.556432,15.849836,0.905656,15.849826
1563,2520,12348,Americas,16.193551,1,4,Product_Name_475,Person_Name_9,2020,Q2,1,1,13.556432,15.849836,0.905656,15.849826
1564,2521,12349,Americas,15.831542,1,4,Product_Name_432,Person_Name_9,2020,Q2,1,1,13.556432,15.849836,0.905656,15.849826
1565,2522,12350,Americas,16.193551,1,4,Product_Name_475,Person_Name_9,2020,Q3,1,1,14.756877,16.193551,0.939492,16.193544


In [45]:
subir = pd.DataFrame()
subir['Opportunity_ID'] = DataFrame_test['Opportunity_ID']
DataFrame_test = DataFrame_test.drop(columns = {'Opportunity_ID', 'Unnamed: 0', 'Delivery_Year', 'Delivery_Quarter'}).fillna(0)
DataFrame_test

Unnamed: 0,Region,Total_Amount,Años en entregar,Month Created,Product_Name,Opportunity_Owner,Delivery_Year,Delivery_Quarter,"Pricing, Delivery_Terms_Approved","Pricing, Delivery_Terms_Quote_Appr",Promedio_Region_Por_Quarter,Promedio_Owner_Por_Year_And_Quarter,Lag_1,Delta
0,EMEA,12.087389,0,1,Product_Name_201,Person_Name_13,2019,Q1,1,1,11.753909,11.001991,0.000000,11.000704
1,EMEA,10.325097,0,1,Product_Name_241,Person_Name_13,2019,Q1,0,1,11.753909,11.001991,0.000000,11.000704
2,EMEA,11.479095,0,1,Product_Name_303,Person_Name_13,2019,Q1,1,1,11.753909,11.001991,0.000000,11.000704
3,EMEA,10.155219,0,1,Product_Name_182,Person_Name_13,2019,Q1,1,1,11.753909,11.001991,0.000000,11.000704
4,EMEA,9.975017,0,1,Product_Name_283,Person_Name_13,2019,Q1,1,1,11.753909,11.001991,0.000000,11.000704
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1562,Americas,15.340732,1,4,Product_Name_487,Person_Name_9,2020,Q2,1,1,13.556432,15.849836,0.905656,15.849826
1563,Americas,16.193551,1,4,Product_Name_475,Person_Name_9,2020,Q2,1,1,13.556432,15.849836,0.905656,15.849826
1564,Americas,15.831542,1,4,Product_Name_432,Person_Name_9,2020,Q2,1,1,13.556432,15.849836,0.905656,15.849826
1565,Americas,16.193551,1,4,Product_Name_475,Person_Name_9,2020,Q3,1,1,14.756877,16.193551,0.939492,16.193544


# Encoding

In [46]:
DataFrame_test['Region'] =  DataFrame_test['Region'].map(Mean_encoded_subject_region)

#--------------------------------------------------------------------------------------------

DataFrame_test["Product_Name"] = DataFrame_test["Product_Name"].map(mean_encoded_product)

#--------------------------------------------------------------------------------------------

#DataFrame_test['Delivery_Quarter'] =  DataFrame_test['Delivery_Quarter'].map(Mean_encoded_subject_quarter)

#--------------------------------------------------------------------------------------------

DataFrame_test['Opportunity_Owner'] =  DataFrame_test['Opportunity_Owner'].map(Mean_encoded_subject_owner)
DataFrame_test = DataFrame_test.fillna(0.406068)

In [47]:
DataFrame_test.isnull().sum()

Region                                 0
Total_Amount                           0
Años en entregar                       0
Month Created                          0
Product_Name                           0
Opportunity_Owner                      0
Delivery_Year                          0
Delivery_Quarter                       0
Pricing, Delivery_Terms_Approved       0
Pricing, Delivery_Terms_Quote_Appr     0
Promedio_Region_Por_Quarter            0
Promedio_Owner_Por_Year_And_Quarter    0
Lag_1                                  0
Delta                                  0
dtype: int64

# Prediction

In [48]:
pred_posta = modelRegresor.predict(DataFrame_test)

In [49]:
subir['target'] = pred_posta
subir.set_index('Opportunity_ID', inplace = True)
subir

Unnamed: 0_level_0,target
Opportunity_ID,Unnamed: 1_level_1
10792,0.559066
10806,0.559066
10812,0.559066
10836,0.559066
10845,0.559066
...,...
12347,0.559066
12348,0.559066
12349,0.559066
12350,0.559066


In [50]:
subir.describe()

Unnamed: 0,target
count,1567.0
mean,0.5590661
std,2.841709e-12
min,0.5590661
25%,0.5590661
50%,0.5590661
75%,0.5590661
max,0.5590661


In [51]:
#subir.to_csv('RedesNeuronales_Entrega_9.csv')