In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

from sklearn.preprocessing import StandardScaler
import tensorflow as tf
#print(tf.__version__)
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense ,Dropout, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping,ReduceLROnPlateau

SEED = 42
os.environ['PYTHONHASHSEED']=str(SEED)
os.environ['TF_CUDNN_DETERMINISTIC'] = '1'  # new flag present in tf 2.0+

np.random.seed(SEED)



xTrain = pd.read_csv('../input/lish-moa/train_features.csv')
yTrain = pd.read_csv('../input/lish-moa/train_targets_scored.csv')
xTest = pd.read_csv('../input/lish-moa/test_features.csv')
submission = pd.read_csv('../input/lish-moa/sample_submission.csv')

def PreProcessX(df):
    '''
    Preprocessing for independent  vars
    encode categoricals
    
    returns processed df,
    '''
    df['cp_dose'] = (df['cp_dose'] == 'D1').astype(int)
    df['cp_type'] = (df['cp_type'] == 'trt_cp').astype(int)
    
    return df

xTrain = PreProcessX(xTrain)
xTest = PreProcessX(xTest)

idList=['sig_id']
xTrain=xTrain.drop(idList,axis=1)
yTrain=yTrain.drop(idList,axis=1)
xTest=xTest.drop(idList,axis=1)

scaler = StandardScaler()
## columns for scaling 
sNames= list(xTrain.columns[xTrain.columns.str.contains(pat = '^g-|^c-', regex = True)])
sNames.append('cp_time')


xTrain[sNames] = scaler.fit_transform(xTrain[sNames])
xTest[sNames] = scaler.transform(xTest[sNames] )


l2Pen= 0.0008#weight decay
model =Sequential([
    Dense(32,activation='relu',name='Dense1_32',input_shape= (xTrain.shape[1],)),
    BatchNormalization(),  # for covariate_shift
    Dense(64,activation="relu",kernel_regularizer=tf.keras.regularizers.l2(l2Pen)),
    BatchNormalization(),  
    Dropout(0.8),
    Dense(128,activation="relu",kernel_regularizer=tf.keras.regularizers.l2(l2Pen)),
    BatchNormalization(), 
    Dropout(0.8),
    Dense(yTrain.shape[1],activation='sigmoid'),
])

#print(model.summary())


model.compile(optimizer ='adam'
             ,loss= 'binary_crossentropy'
            #,metrics=['kullback_leibler_divergence']
             )


early_stopping = EarlyStopping(monitor='val_loss',patience = 20,min_delta=1e-6)

history = model.fit(xTrain,yTrain,epochs=250,batch_size=64,verbose=0,validation_split=0.2,
                    callbacks=[early_stopping,ReduceLROnPlateau(
                        monitor="val_loss",factor=0.2, verbose=1)])

df= pd.DataFrame(history.history)
#print(df.iloc[10:,:].plot(y=['loss','val_loss']))

print(df.tail(1))
submission[yTrain.columns]= model.predict(xTest)
submission.to_csv('submission.csv', index=False)
