In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/santander-customer-satisfaction/test.csv
/kaggle/input/santander-customer-satisfaction/sample_submission.csv
/kaggle/input/santander-customer-satisfaction/train.csv


In [2]:
train_data=pd.read_csv('../input/santander-customer-satisfaction/train.csv',index_col='ID')

In [5]:
train_data.dropna(axis=0, subset=['TARGET'], inplace=True)
y = train_data.TARGET
train_data.drop(['TARGET'], axis=1, inplace=True)

In [4]:
train_data.shape

(76020, 369)

In [5]:
y.shape

(76020,)

In [6]:
y.head()

ID
1     0
3     0
4     0
8     0
10    0
Name: TARGET, dtype: int64

In [6]:
l=float(len(train_data))
missing_col=[col for col in train_data.columns if train_data[col].isnull().sum()>int(0.4*l)]

In [7]:
print(missing_col)

[]


In [8]:
categorical_cols = [cname for cname in train_data.columns if
                    train_data[cname].nunique() < 10 and 
                    train_data[cname].dtype == "object"]

In [9]:
print(categorical_cols)

[]


In [10]:
numerical_cols = [cname for cname in train_data.columns if 
                train_data[cname].dtype in ['int64', 'float64']]

In [11]:
print(numerical_cols[0:5])

['var3', 'var15', 'imp_ent_var16_ult1', 'imp_op_var39_comer_ult1', 'imp_op_var39_comer_ult3']


In [12]:
my_cols = categorical_cols + numerical_cols
final_train = train_data[my_cols].copy()

In [13]:
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error

In [14]:
numerical_transformer = SimpleImputer(strategy='constant')

categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, numerical_cols),
        ('cat', categorical_transformer, categorical_cols)
    ])


In [15]:
from tensorflow import keras


In [16]:
num_class=3
out_y = keras.utils.to_categorical(y, num_class)

In [24]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Conv2D,Dropout

In [25]:
def create_model(optimizer='adagrad',
                 kernel_initializer='glorot_uniform', 
                 dropout=0.2):
    model = Sequential()
    model.add(Dense(64,activation='relu',kernel_initializer=kernel_initializer))
    model.add(Dropout(dropout))
    model.add(Dense(1,activation='sigmoid',kernel_initializer=kernel_initializer))

    model.compile(loss='binary_crossentropy',optimizer=optimizer, metrics=['accuracy'])

    return model

In [26]:
from keras.wrappers.scikit_learn import KerasClassifier
model= KerasClassifier(build_fn=create_model, batch_size=100, epochs=4,verbose=0,validation_split=0.2)

In [27]:
clf = Pipeline(steps=[('preprocessor', preprocessor),
                      ('model', model)
                     ])

In [28]:
out_y[0:5]

array([[1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.]], dtype=float32)

In [29]:
clf.fit(final_train,y)

Pipeline(steps=[('preprocessor',
                 ColumnTransformer(transformers=[('num',
                                                  SimpleImputer(strategy='constant'),
                                                  ['var3', 'var15',
                                                   'imp_ent_var16_ult1',
                                                   'imp_op_var39_comer_ult1',
                                                   'imp_op_var39_comer_ult3',
                                                   'imp_op_var40_comer_ult1',
                                                   'imp_op_var40_comer_ult3',
                                                   'imp_op_var40_efect_ult1',
                                                   'imp_op_var40_efect_ult3',
                                                   'imp_op_var40_ult1',
                                                   'imp_op_var41_comer_ult1',
                                                   'imp_...
     

In [30]:
test_data=pd.read_csv('../input/santander-customer-satisfaction/test.csv',index_col='ID')

In [31]:
final_test=test_data[my_cols].copy()

In [32]:
final_test.shape

(75818, 369)

In [33]:
train_data.shape

(76020, 369)

In [34]:
final_train.shape

(76020, 369)

In [35]:
preds=clf.predict(final_test)

In [36]:
print(0.5 in preds)

False


In [37]:
#output = pd.DataFrame({'ID': final_test.index,
#                       'TARGET': preds})
#output.to_csv('sub6.csv', index=False)
print(preds[33])

[0]


In [38]:
preds_new=preds.flatten()

In [39]:
print(preds_new.shape)

(75818,)


In [40]:
output = pd.DataFrame({'ID': final_test.index,
                       'TARGET': preds_new})
#output.to_csv('sub6.csv', index=False)

In [41]:
output.to_csv('sub6.csv', index=False)