## Neural nets

In this notebook I try various features and various NN architectures.

In [1]:
import pandas as pd
from keras.preprocessing import text, sequence
import numpy as np
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint, TensorBoard, Callback, EarlyStopping
import keras.backend as K
from sklearn import metrics
from sklearn.model_selection import train_test_split
import os

from gensim.models import FastText

import time 
import gc 

np.random.seed(42)
import pickle
from keras.layers import Input, Dropout, Dense, Embedding, SpatialDropout1D, concatenate
from keras.layers import GRU, Bidirectional, GlobalAveragePooling1D, GlobalMaxPooling1D, BatchNormalization
from keras.layers import Conv1D, MaxPooling1D, Flatten, CuDNNGRU
from keras.preprocessing.sequence import pad_sequences
from keras.preprocessing import text, sequence
from keras.preprocessing.text import Tokenizer
from keras import backend as K
from keras.models import Model

from sklearn.preprocessing import LabelBinarizer, LabelEncoder, MinMaxScaler, StandardScaler

import warnings
warnings.filterwarnings('ignore')

import os
os.environ['OMP_NUM_THREADS'] = '6'

import threading
import multiprocessing
from multiprocessing import Pool, cpu_count
from contextlib import closing
cores = 6

import nn_functions

def root_mean_squared_error(y_true, y_pred):
    return K.sqrt(K.mean(K.square(y_pred - y_true), axis=0))

# Check that GPU is ok.
K.tensorflow_backend._get_available_gpus()

%load_ext autoreload
%autoreload 2

Using TensorFlow backend.


In [4]:
X, X_valid, X_test, y, y_valid, tokenizer = nn_functions.load_processed_data(return_all_train=True)

In [5]:
embedding_matrix = nn_functions.load_emb('embeddings/avito_big_150m_sg1.w2v', tokenizer, max_features, embed_size)

In [6]:
def build_model(max_len, nb_words, embedding_matrix, embed_size):
    inp = Input(shape = (max_len, ))
    emb = Embedding(nb_words, embed_size, weights = [embedding_matrix],
                    input_length = max_len, trainable = False)(inp)
    main = SpatialDropout1D(0.2)(emb)
    main = Bidirectional(CuDNNGRU(32,return_sequences = True))(main)
    main = GlobalAveragePooling1D()(main)
    #main = Dropout(0.2)(main)
    main = BatchNormalization()(main)
    out = Dense(1, activation = "sigmoid")(main)

    model = Model(inputs = inp, outputs = out)

    model.compile(optimizer=Adam(lr=0.001), loss='mean_squared_error',
                  metrics=[root_mean_squared_error])
    model.summary()
    return model

In [7]:
early_stop = EarlyStopping(patience=2)

In [8]:
epochs = 20

model = build_model(max_len, max_features, embedding_matrix, embed_size)
file_path = "model.hdf5"
check_point = ModelCheckpoint(file_path, monitor = "val_loss", mode = "min", save_best_only = True, verbose = 1)
# history = model.fit(X_train, y_train, batch_size = 256, epochs = EPOCHS, validation_data = (X_valid, y_valid),
#                 verbose = 1, callbacks = [check_point])


history = model.fit(X, y, batch_size=512, epochs=epochs, verbose=1, validation_split=0.1,
                    callbacks=[early_stop,check_point])

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 300)               0         
_________________________________________________________________
embedding_1 (Embedding)      (None, 300, 300)          60000000  
_________________________________________________________________
spatial_dropout1d_1 (Spatial (None, 300, 300)          0         
_________________________________________________________________
bidirectional_1 (Bidirection (None, 300, 64)           64128     
_________________________________________________________________
global_average_pooling1d_1 ( (None, 64)                0         
_________________________________________________________________
batch_normalization_1 (Batch (None, 64)                256       
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 65        
Total para

Epoch 00009: val_loss did not improve
Epoch 10/20
1353081/1353081 [==============================] - 166s 123us/step - loss: 0.0534 - root_mean_squared_error: 0.2309 - val_loss: 0.0622 - val_root_mean_squared_error: 0.2492

Epoch 00010: val_loss did not improve

Best result:

basic structure.

avito_big_150m_sg1.w2v

loss: 0.0546 - root_mean_squared_error: 0.2330 - val_loss: 0.0547 - val_root_mean_squared_error: 0.2333

RMSE: 0.23379741145600408

Quality is low. I suppose that the problem is in processing. Even with these 3 features it is bad.

In [32]:
prediction = model.predict(test,batch_size = 128, verbose = True)

sample_submission = pd.read_csv('sample_submission.csv', index_col = 0)
submission = sample_submission.copy()
submission['deal_probability'] = prediction
submission.to_csv('submission_nn4.csv')

padding


In [19]:
tensorboard = TensorBoard(log_dir='./Graph', histogram_freq=0,  
          write_graph=True, write_images=True)

In [20]:
EPOCHS = 10

model = build_model1()
history = model.fit(X_train, y_train, batch_size = 256, epochs = EPOCHS, validation_data = (X_valid, y_valid),
                verbose = 1, callbacks = [tensorboard, check_point])

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_5 (InputLayer)            (None, 100)          0                                            
__________________________________________________________________________________________________
embedding_5 (Embedding)         (None, 100, 300)     30000000    input_5[0][0]                    
__________________________________________________________________________________________________
spatial_dropout1d_5 (SpatialDro (None, 100, 300)     0           embedding_5[0][0]                
__________________________________________________________________________________________________
bidirectional_5 (Bidirectional) (None, 100, 64)      64128       spatial_dropout1d_5[0][0]        
__________________________________________________________________________________________________
global_ave

tensorboard --logdir=Graph --host localhost --port 8088

Works a bit better then the first architecture. 1 dese layer improves.

In [9]:
cat_feat = np.load('data/cat_features_le_usual_train.npy')
cat_feat_test = np.load('data/cat_features_le_usual_test.npy')

In [10]:
price = np.load('data/price_log_usual_train.npy')
price_test = np.load('data/price_log_usual_test.npy')

In [11]:
region = cat_feat[:, 0]
city = cat_feat[:, 1]
parent_category_name = cat_feat[:, 2]
category_name = cat_feat[:, 3]
user_type = cat_feat[:, 4]
param_1 = cat_feat[:, 5]

In [12]:
max_region = np.max(region.max()) + 1
max_city = np.max(city.max()) + 1
max_parent_category_name = np.max(parent_category_name.max()) + 1
max_category_name = np.max(category_name.max()) + 1
max_user_type = np.max(user_type.max()) + 1
max_param_1 = np.max(param_1.max()) + 1

In [13]:
region_train, region_valid = train_test_split(region, test_size = 0.1, random_state = 23)
city_train, city_valid = train_test_split(city, test_size = 0.1, random_state = 23)
parent_category_name_train, parent_category_name_valid = train_test_split(parent_category_name, test_size = 0.1, random_state = 23)
price_train, price_valid = train_test_split(price, test_size = 0.1, random_state = 23)

category_name_train, category_name_valid = train_test_split(category_name, test_size = 0.1, random_state = 23)
user_type_train, user_type_valid = train_test_split(user_type, test_size = 0.1, random_state = 23)
param_1_train, param_1_valid = train_test_split(param_1, test_size = 0.1, random_state = 23)

In [14]:
train_data = nn_functions.get_keras_fasttext(features=[X, region, city, parent_category_name,
                                                       category_name, user_type, param_1, price],
                                            feature_names=['text', 'region', 'city', 'parent_category_name',
                                                            'category_name', 'user_type', 'param_1', 'price'])
valid_data = nn_functions.get_keras_fasttext(features=[X_valid, region_valid, city_valid, parent_category_name_valid,
                                                       category_name_valid, user_type_valid, param_1_valid, price_valid],
                                            feature_names=['text', 'region', 'city', 'parent_category_name',
                                                            'category_name', 'user_type', 'param_1', 'price'])

In [17]:
def build_model2(max_len, nb_words, embedding_matrix, embed_size):
    tex_imp = Input(shape = (max_len, ), name='text')
    emb = Embedding(nb_words
                    , embed_size, weights = [embedding_matrix],
                    input_length = max_len, trainable = False)(tex_imp)
    main = SpatialDropout1D(0.5)(emb)
    main = Bidirectional(CuDNNGRU(32,return_sequences = True))(main)
    # main = Bidirectional(CuDNNGRU(16,return_sequences = True))(main)
    avg_pool = GlobalAveragePooling1D()(main)
    max_pool = GlobalMaxPooling1D()(main)
    
    region = Input(shape=[1], name="region")
    city = Input(shape=[1], name="city")
    parent_category_name = Input(shape=[1], name="parent_category_name")
    category_name = Input(shape=[1], name="category_name")
    user_type = Input(shape=[1], name="user_type")
    param_1 = Input(shape=[1], name="param_1")
    
    emb_region = Embedding(max_region, 10)(region)
    emb_city = Embedding(max_city, 10)(city)
    emb_parent_category_name = Embedding(max_parent_category_name, 10)(parent_category_name)
    emb_category_name = Embedding(max_category_name, 10)(category_name)
    emb_user_type = Embedding(max_user_type, 10)(user_type)
    emb_param_1 = Embedding(max_param_1, 10)(param_1)
    
    price = Input(shape=[1], name='price')
    
    main = concatenate([
          avg_pool, max_pool
        , Flatten() (emb_region)
        , Flatten() (emb_city)
        , Flatten() (emb_parent_category_name)
        , Flatten() (emb_category_name)
        , Flatten() (emb_user_type)
        , Flatten() (emb_param_1)
        , price
    ])
    main = BatchNormalization()(main)
    
    main = Dropout(0.5)(Dense(128,activation='relu') (main))
    # main = Dropout(0.1)(Dense(16,activation='relu') (main))
    out = Dense(1, activation = "sigmoid")(main)

    model = Model(inputs = [tex_imp, region, city, parent_category_name, category_name, user_type, param_1, price],
                            outputs = out)
    #decay=1.3115701831711846e-05
    model.compile(optimizer=Adam(lr=0.001, ), loss='mean_squared_error',
                  metrics=[root_mean_squared_error])
    model.summary()
    return model

In [18]:
EPOCHS = 20

model = build_model2(max_len, max_features, embedding_matrix, embed_size)
file_path = "model.hdf5"
check_point = ModelCheckpoint(file_path, monitor = "val_loss", mode = "min", save_best_only = True, verbose = 1)
history = model.fit(train_data, y, batch_size = 256, epochs = EPOCHS, validation_data = (valid_data, y_valid),
                verbose = 1, callbacks = [early_stop, check_point])


model.load_weights(file_path)
prediction = model.predict(valid_data)
print('RMSE:', np.sqrt(metrics.mean_squared_error(y_valid, prediction)))

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
text (InputLayer)               (None, 300)          0                                            
__________________________________________________________________________________________________
embedding_9 (Embedding)         (None, 300, 300)     60000000    text[0][0]                       
__________________________________________________________________________________________________
spatial_dropout1d_3 (SpatialDro (None, 300, 300)     0           embedding_9[0][0]                
__________________________________________________________________________________________________
region (InputLayer)             (None, 1)            0                                            
__________________________________________________________________________________________________
city (Inpu

In [21]:
#item_seq_number
train = pd.read_csv('train.csv', usecols = ['item_seq_number', 'price'])
item_seq_number = np.log1p(train['item_seq_number']).values
del train

item_seq_number_train, item_seq_number_valid = train_test_split(item_seq_number, test_size = 0.1, random_state = 23)
train_data = nn_functions.get_keras_fasttext(features=[X, region, city, parent_category_name,
                                                       category_name, user_type, param_1, price, item_seq_number],
                                            feature_names=['text', 'region', 'city', 'parent_category_name',
                                                            'category_name', 'user_type', 'param_1', 'price', 'item_seq_number'])
valid_data = nn_functions.get_keras_fasttext(features=[X_valid, region_valid, city_valid, parent_category_name_valid,
                                                       category_name_valid, user_type_valid, param_1_valid, price_valid,
                                                      item_seq_number_valid],
                                            feature_names=['text', 'region', 'city', 'parent_category_name',
                                                            'category_name', 'user_type', 'param_1', 'price', 'item_seq_number'])
test = pd.read_csv('test.csv', usecols = ['item_seq_number', 'price'])
item_seq_number_test = np.log1p(test['item_seq_number']).values
del test

In [39]:
# added batch norm
def build_model2(max_len, nb_words, embedding_matrix, embed_size):
    tex_imp = Input(shape = (max_len, ), name='text')
    emb = Embedding(nb_words
                    , embed_size, weights = [embedding_matrix],
                    input_length = max_len, trainable = False)(tex_imp)
    main = SpatialDropout1D(0.5)(emb)
    main = Bidirectional(CuDNNGRU(128,return_sequences = True))(main)
    # main = Bidirectional(CuDNNGRU(16,return_sequences = True))(main)
    main1 = Conv1D(64, kernel_size=3, padding='valid', kernel_initializer='he_uniform')(main)
    avg_pool1 = GlobalAveragePooling1D()(main1)
    max_pool1 = GlobalMaxPooling1D()(main1)
    
    main2 = Conv1D(64, kernel_size=4, padding='valid', kernel_initializer='he_uniform')(main)
    avg_pool2 = GlobalAveragePooling1D()(main2)
    max_pool2 = GlobalMaxPooling1D()(main2)
    
    main3 = Conv1D(64, kernel_size=5, padding='valid', kernel_initializer='he_uniform')(main)
    avg_pool3 = GlobalAveragePooling1D()(main3)
    max_pool3 = GlobalMaxPooling1D()(main3)
    
    region = Input(shape=[1], name="region")
    city = Input(shape=[1], name="city")
    parent_category_name = Input(shape=[1], name="parent_category_name")
    category_name = Input(shape=[1], name="category_name")
    user_type = Input(shape=[1], name="user_type")
    param_1 = Input(shape=[1], name="param_1")
    
    emb_region = Embedding(max_region, 10)(region)
    emb_city = Embedding(max_city, 10)(city)
    emb_parent_category_name = Embedding(max_parent_category_name, 10)(parent_category_name)
    emb_category_name = Embedding(max_category_name, 10)(category_name)
    emb_user_type = Embedding(max_user_type, 10)(user_type)
    emb_param_1 = Embedding(max_param_1, 10)(param_1)
    
    price = Input(shape=[1], name='price')
    item_seq_number = Input(shape=[1], name='item_seq_number')
    
    main = concatenate([
          avg_pool1, max_pool1
        , avg_pool2, max_pool2
        , avg_pool3, max_pool3
        , Flatten() (emb_region)
        , Flatten() (emb_city)
        , Flatten() (emb_parent_category_name)
        , Flatten() (emb_category_name)
        , Flatten() (emb_user_type)
        , Flatten() (emb_param_1)
        , price
        , item_seq_number
    ])
    main = BatchNormalization()(main)
    
    main = Dropout(0.5)(Dense(128,activation='relu') (main))
    main = Dropout(0.5)(Dense(16,activation='relu') (main))
    out = Dense(1, activation = "sigmoid")(main)

    model = Model(inputs = [tex_imp, region, city, parent_category_name,
                            category_name, user_type, param_1,
                            price, item_seq_number],
                            outputs = out)
    #decay=1.3115701831711846e-05
    model.compile(optimizer=Adam(lr=0.001, ), loss='mean_squared_error',
                  metrics=[root_mean_squared_error])
    model.summary()
    return model

EPOCHS = 20

model = build_model2(max_len, max_features, embedding_matrix, embed_size)
file_path = "model.hdf5"
check_point = ModelCheckpoint(file_path, monitor = "val_loss", mode = "min", save_best_only = True, verbose = 1)
history = model.fit(train_data, y, batch_size = 128, epochs = EPOCHS, validation_data = (valid_data, y_valid),
                verbose = 1, callbacks = [early_stop, check_point])


model.load_weights(file_path)
prediction = model.predict(valid_data)
print('RMSE:', np.sqrt(metrics.mean_squared_error(y_valid, prediction)))

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
text (InputLayer)               (None, 300)          0                                            
__________________________________________________________________________________________________
embedding_23 (Embedding)        (None, 300, 300)     60000000    text[0][0]                       
__________________________________________________________________________________________________
spatial_dropout1d_5 (SpatialDro (None, 300, 300)     0           embedding_23[0][0]               
__________________________________________________________________________________________________
bidirectional_5 (Bidirectional) (None, 300, 256)     330240      spatial_dropout1d_5[0][0]        
__________________________________________________________________________________________________
region (In

Train on 1503424 samples, validate on 150343 samples
Epoch 1/20

Epoch 00001: val_loss improved from inf to 0.05731, saving model to model.hdf5
Epoch 2/20

Epoch 00002: val_loss improved from 0.05731 to 0.05534, saving model to model.hdf5
Epoch 3/20

Epoch 00003: val_loss improved from 0.05534 to 0.05463, saving model to model.hdf5
Epoch 4/20

Epoch 00004: val_loss improved from 0.05463 to 0.05422, saving model to model.hdf5
Epoch 5/20

Epoch 00005: val_loss did not improve
Epoch 6/20

Epoch 00006: val_loss improved from 0.05422 to 0.05378, saving model to model.hdf5
Epoch 7/20

Epoch 00007: val_loss did not improve
Epoch 8/20

Epoch 00008: val_loss improved from 0.05378 to 0.05364, saving model to model.hdf5
Epoch 9/20

Epoch 00009: val_loss improved from 0.05364 to 0.05343, saving model to model.hdf5
Epoch 10/20

Epoch 00010: val_loss did not improve
Epoch 11/20

Epoch 00011: val_loss did not improve
RMSE: 0.23115055805797105


In [35]:
test_data = nn_functions.get_keras_fasttext(features=[X_test, region_test, city_test, parent_category_name_test,
                                                       category_name_test, user_type_test, param_1_test, price_test,
                                                       item_seq_number_test],
                                             feature_names=['text', 'region', 'city', 'parent_category_name',
                                                            'category_name', 'user_type', 'param_1', 'price',
                                                            'item_seq_number'])

In [31]:
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
text (InputLayer)               (None, 300)          0                                            
__________________________________________________________________________________________________
embedding_16 (Embedding)        (None, 300, 300)     60000000    text[0][0]                       
__________________________________________________________________________________________________
spatial_dropout1d_4 (SpatialDro (None, 300, 300)     0           embedding_16[0][0]               
__________________________________________________________________________________________________
bidirectional_4 (Bidirectional) (None, 300, 128)     140544      spatial_dropout1d_4[0][0]        
__________________________________________________________________________________________________
region (In

In [36]:
prediction = model.predict(test_data, batch_size = 128, verbose = True)



In [37]:
np.mean(prediction)

0.14006568

In [38]:


sample_submission = pd.read_csv('sample_submission.csv', index_col = 0)
submission = sample_submission.copy()
submission['deal_probability'] = prediction
submission.to_csv('submission_nn_3_conv.csv')

In [23]:
import glob
X_meta = None
train_files = [i for i in glob.glob('pickles/1_5_sublinear_tf-True_smooth_idf-False/*') if 'X_meta' in i]
for i, file in enumerate(train_files):
    with open(file, 'rb') as f:
        X_temp = pickle.load(f)
    X_meta = X_temp if X is None else np.append(X, X_temp, axis=1)
    del X_temp

In [24]:
X_test_meta = None
test_files = [i for i in glob.glob('pickles/1_5_sublinear_tf-True_smooth_idf-False/*') if 'X_test_meta' in i]
for i, file in enumerate(test_files):
    with open(file, 'rb') as f:
        X_temp = pickle.load(f)
    X_temp = X_temp.reshape(-1, 1)
    X_test_meta = X_temp if X_test_meta is None else np.append(X_test_meta, X_temp, axis=1)
    del X_temp

In [25]:
c_f = np.load('data/cat_features_me_usual_train100_20_0.01.npy')
c_f_test = np.load('data/cat_features_me_usual_test100_20_0.01.npy')

In [26]:
category_name = c_f[:, 3]
user_type = c_f[:, 4]
param_1 = c_f[:, 5]
category_name_train, category_name_valid = train_test_split(category_name, test_size = 0.1, random_state = 23)
user_type_train, user_type_valid = train_test_split(user_type, test_size = 0.1, random_state = 23)
param_1_train, param_1_valid = train_test_split(param_1, test_size = 0.1, random_state = 23)

In [27]:
category_name_test = c_f_test[:, 3]
user_type_test = c_f_test[:, 4]
param_1_test = c_f_test[:, 5]

In [None]:
# no second batch norm
def build_model2(max_len, nb_words, embedding_matrix, embed_size):
    tex_imp = Input(shape = (max_len, ), name='text')
    emb = Embedding(nb_words
                    , embed_size, weights = [embedding_matrix],
                    input_length = max_len, trainable = False)(tex_imp)
    main = SpatialDropout1D(0.1)(emb)
    main = Bidirectional(CuDNNGRU(32,return_sequences = True))(main)
    # main = Bidirectional(CuDNNGRU(16,return_sequences = True))(main)
    avg_pool = GlobalAveragePooling1D()(main)
    max_pool = GlobalMaxPooling1D()(main)
    
    region = Input(shape=[1], name="region")
    city = Input(shape=[1], name="city")
    parent_category_name = Input(shape=[1], name="parent_category_name")
    category_name = Input(shape=[1], name="category_name")
    user_type = Input(shape=[1], name="user_type")
    param_1 = Input(shape=[1], name="param_1")
    
    emb_region = Embedding(max_region, 10)(region)
    emb_city = Embedding(max_city, 10)(city)
    emb_parent_category_name = Embedding(max_parent_category_name, 10)(parent_category_name)
    emb_category_name = Embedding(max_category_name, 10)(category_name)
    emb_user_type = Embedding(max_user_type, 10)(user_type)
    emb_param_1 = Embedding(max_param_1, 10)(param_1)
    
    price = Input(shape=[1], name='price')
    item_seq_number = Input(shape=[1], name='item_seq_number')
    X_meta_hegregressor = Input(shape=[1], name='X_meta_hegregressor')
    X_meta_lasso = Input(shape=[1], name='X_meta_lasso')
    X_meta_ridge = Input(shape=[1], name='X_meta_ridge')
    X_meta_sgd = Input(shape=[1], name='X_meta_sgd')
    other = Input(shape=[115], name='other_columns_usual_train')
    
    main = concatenate([
          avg_pool, max_pool
        , Flatten() (emb_region)
        , Flatten() (emb_city)
        , Flatten() (emb_parent_category_name)
        #, Flatten() (emb_category_name)
        #, Flatten() (emb_user_type)
        #, Flatten() (emb_param_1)
        , price
        , item_seq_number
        , X_meta_hegregressor
        , X_meta_lasso
        , X_meta_ridge
        , X_meta_sgd
        , other
    ])
    main = BatchNormalization()(main)
    
    main = Dropout(0.1)(Dense(128,activation='relu') (main))
    #main = BatchNormalization()(main)
    main = Dropout(0.1)(Dense(16,activation='sigmoid') (main))
    out = Dense(1, activation = "sigmoid")(main)

    model = Model(inputs = [tex_imp, region, city, parent_category_name,
                            category_name, user_type, param_1,
                            price, item_seq_number, X_meta_hegregressor, X_meta_lasso, X_meta_ridge, X_meta_sgd, other],
                            outputs = out)
    #decay=1.3115701831711846e-05
    model.compile(optimizer=Adam(lr=0.001, ), loss='mean_squared_error',
                  metrics=[root_mean_squared_error])
    model.summary()
    return model

EPOCHS = 200

model = build_model2(max_len, max_features, embedding_matrix, embed_size)
file_path = "model.hdf5"
check_point = ModelCheckpoint(file_path, monitor = "val_loss", mode = "min", save_best_only = True, verbose = 1)
history = model.fit(train_data, y, batch_size = 512, epochs = EPOCHS, validation_data = (valid_data, y_valid),
                verbose = 1, callbacks = [early_stop, check_point])


model.load_weights(file_path)
prediction = model.predict(valid_data)
print('RMSE:', np.sqrt(metrics.mean_squared_error(y_valid, prediction)))

In [None]:
# no second batch norm
def build_model2(max_len, nb_words, embedding_matrix, embed_size):
    tex_imp = Input(shape = (max_len, ), name='text')
    emb = Embedding(nb_words
                    , embed_size, weights = [embedding_matrix],
                    input_length = max_len, trainable = False)(tex_imp)
    main = SpatialDropout1D(0.1)(emb)
    main = Bidirectional(CuDNNGRU(32,return_sequences = True))(main)
    # main = Bidirectional(CuDNNGRU(16,return_sequences = True))(main)
    avg_pool = GlobalAveragePooling1D()(main)
    max_pool = GlobalMaxPooling1D()(main)
    
    region = Input(shape=[1], name="region")
    city = Input(shape=[1], name="city")
    parent_category_name = Input(shape=[1], name="parent_category_name")
    category_name = Input(shape=[1], name="category_name")
    user_type = Input(shape=[1], name="user_type")
    param_1 = Input(shape=[1], name="param_1")
    
    emb_region = Embedding(max_region, 10)(region)
    emb_city = Embedding(max_city, 10)(city)
    emb_parent_category_name = Embedding(max_parent_category_name, 10)(parent_category_name)
    emb_category_name = Embedding(max_category_name, 10)(category_name)
    emb_user_type = Embedding(max_user_type, 10)(user_type)
    emb_param_1 = Embedding(max_param_1, 10)(param_1)
    
    price = Input(shape=[1], name='price')
    item_seq_number = Input(shape=[1], name='item_seq_number')
    X_meta_hegregressor = Input(shape=[1], name='X_meta_hegregressor')
    X_meta_lasso = Input(shape=[1], name='X_meta_lasso')
    X_meta_ridge = Input(shape=[1], name='X_meta_ridge')
    X_meta_sgd = Input(shape=[1], name='X_meta_sgd')
    other = Input(shape=[115], name='other_columns_usual_train')
    
    main = concatenate([
          avg_pool, max_pool
        , Flatten() (emb_region)
        , Flatten() (emb_city)
        , Flatten() (emb_parent_category_name)
        #, Flatten() (emb_category_name)
        #, Flatten() (emb_user_type)
        #, Flatten() (emb_param_1)
        , price
        , item_seq_number
        , X_meta_hegregressor
        , X_meta_lasso
        , X_meta_ridge
        , X_meta_sgd
        , other
    ])
    main = BatchNormalization()(main)
    
    main = Dropout(0.1)(Dense(256,activation='relu') (main))
    #main = BatchNormalization()(main)
    main = Dropout(0.1)(Dense(64,activation='sigmoid') (main))
    out = Dense(1, activation = "sigmoid")(main)

    model = Model(inputs = [tex_imp, region, city, parent_category_name,
                            category_name, user_type, param_1,
                            price, item_seq_number, X_meta_hegregressor, X_meta_lasso, X_meta_ridge, X_meta_sgd, other],
                            outputs = out)
    #decay=1.3115701831711846e-05
    model.compile(optimizer=Adam(lr=0.001, ), loss='mean_squared_error',
                  metrics=[root_mean_squared_error])
    model.summary()
    return model

EPOCHS = 200

model = build_model2(max_len, max_features, embedding_matrix, embed_size)
file_path = "model.hdf5"
check_point = ModelCheckpoint(file_path, monitor = "val_loss", mode = "min", save_best_only = True, verbose = 1)
history = model.fit(train_data, y, batch_size = 512, epochs = EPOCHS, validation_data = (valid_data, y_valid),
                verbose = 1, callbacks = [early_stop, check_point])


model.load_weights(file_path)
prediction = model.predict(valid_data)
print('RMSE:', np.sqrt(metrics.mean_squared_error(y_valid, prediction)))

In [None]:
gc.collect()

In [58]:
K.clear_session()

In [106]:
prediction = model.predict(valid_data)
print('RMSE:', np.sqrt(metrics.mean_squared_error(y_valid, prediction)))
#RMSE: 0.22987448481942707

RMSE: 0.2300523231369783


In [34]:
region_test = cat_feat_test[:, 0]
city_test = cat_feat_test[:, 1]
parent_category_name_test = cat_feat_test[:, 2]

In [76]:
test_data = get_keras_fasttext(test, region_test, city_test, parent_category_name_test, price_test)

In [72]:
test_data = nn_functions.get_keras_fasttext(features=[X_test, region_test, city_test, parent_category_name_test,
                                                       category_name_test, user_type_test, param_1_test, price_test,
                                                       item_seq_number_test,
                                                       X_test_meta[:, 0], X_test_meta[:, 1], X_test_meta[:, 2],
                                                       X_test_meta[:, 3], other_columns_usual_test],
                                             feature_names=['text', 'region', 'city', 'parent_category_name',
                                                            'category_name', 'user_type', 'param_1', 'price',
                                                            'item_seq_number', 'X_meta_hegregressor', 'X_meta_lasso',
                                                            'X_meta_ridge', 'X_meta_sgd', 'other_columns_usual_train'])

In [73]:
prediction1 = model.predict(test_data)

In [74]:
np.mean(prediction1)
#0.14329907

0.14682978

In [75]:
test_cols = ['item_id']
test = pd.read_csv('test.csv', usecols = test_cols)
submission1 = pd.DataFrame( columns = ['item_id', 'deal_probability'])

submission1['item_id'] = test['item_id']
submission1['deal_probability'] = prediction1

print("Check Submission")
submission1.to_csv("submissions/neural_net_new_attempt.csv", index=False)
#0.2339

Check Submission
